{ "best_metric": 0.7836995124816895, "best_model_checkpoint": "/mnt/ainative-store-p/common/pcache-checkpoint/user/491880/MIMO/finetune/CC-llava-onevision-qwen2-0.5b-mid-stage-a4-mlp2x_gelu-NAMEVisualSkipMultiProjectorVisionSumAdaptiveWeightTVSimwPMAMultiQueryLoraProjector128Alpha128FixOriProjector_InitProj_MIDSTAGE-LayerRANGE-1-24-4-VisionRANGE-1-28-4-bs256-32ppus/checkpoint-11000", "epoch": 2.2481344960891847, "eval_steps": 500, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017980760586172796, "grad_norm": 8.079721450805664, "learning_rate": 1.1976047904191618e-08, "loss": 1.4064, "step": 1 }, { "epoch": 0.0003596152117234559, "grad_norm": 8.640740394592285, "learning_rate": 2.3952095808383236e-08, "loss": 1.4086, "step": 2 }, { "epoch": 0.0005394228175851838, "grad_norm": 8.870448112487793, "learning_rate": 3.592814371257485e-08, "loss": 1.4499, "step": 3 }, { "epoch": 0.0007192304234469118, "grad_norm": 8.3189115524292, "learning_rate": 4.790419161676647e-08, "loss": 1.3126, "step": 4 }, { "epoch": 0.0008990380293086397, "grad_norm": 8.367447853088379, "learning_rate": 5.98802395209581e-08, "loss": 1.3369, "step": 5 }, { "epoch": 0.0010788456351703676, "grad_norm": 7.827223777770996, "learning_rate": 7.18562874251497e-08, "loss": 1.3106, "step": 6 }, { "epoch": 0.0012586532410320957, "grad_norm": 3.477555274963379, "learning_rate": 8.383233532934132e-08, "loss": 1.3423, "step": 7 }, { "epoch": 0.0014384608468938236, "grad_norm": 2.8426051139831543, "learning_rate": 9.580838323353295e-08, "loss": 1.3486, "step": 8 }, { "epoch": 0.0016182684527555515, "grad_norm": 7.744060039520264, "learning_rate": 1.0778443113772456e-07, "loss": 1.3379, "step": 9 }, { "epoch": 0.0017980760586172794, "grad_norm": 7.837027072906494, "learning_rate": 1.197604790419162e-07, "loss": 1.3681, "step": 10 }, { "epoch": 0.0019778836644790076, "grad_norm": 8.506093978881836, "learning_rate": 1.3173652694610778e-07, "loss": 1.3193, "step": 11 }, { "epoch": 0.0021576912703407353, "grad_norm": 8.044903755187988, "learning_rate": 1.437125748502994e-07, "loss": 1.3695, "step": 12 }, { "epoch": 0.0023374988762024634, "grad_norm": 7.831776142120361, "learning_rate": 1.5568862275449104e-07, "loss": 1.4045, "step": 13 }, { "epoch": 0.0025173064820641915, "grad_norm": 7.7676496505737305, "learning_rate": 1.6766467065868263e-07, "loss": 1.4186, "step": 14 }, { "epoch": 0.002697114087925919, "grad_norm": 8.055481910705566, "learning_rate": 1.7964071856287425e-07, "loss": 1.3502, "step": 15 }, { "epoch": 0.0028769216937876473, "grad_norm": 8.120262145996094, "learning_rate": 1.916167664670659e-07, "loss": 1.3714, "step": 16 }, { "epoch": 0.003056729299649375, "grad_norm": 8.214150428771973, "learning_rate": 2.035928143712575e-07, "loss": 1.3226, "step": 17 }, { "epoch": 0.003236536905511103, "grad_norm": 8.348348617553711, "learning_rate": 2.1556886227544912e-07, "loss": 1.4477, "step": 18 }, { "epoch": 0.003416344511372831, "grad_norm": 8.062725067138672, "learning_rate": 2.2754491017964074e-07, "loss": 1.5086, "step": 19 }, { "epoch": 0.003596152117234559, "grad_norm": 8.236797332763672, "learning_rate": 2.395209580838324e-07, "loss": 1.3505, "step": 20 }, { "epoch": 0.003775959723096287, "grad_norm": 8.207820892333984, "learning_rate": 2.5149700598802395e-07, "loss": 1.2966, "step": 21 }, { "epoch": 0.003955767328958015, "grad_norm": 7.780220031738281, "learning_rate": 2.6347305389221556e-07, "loss": 1.3712, "step": 22 }, { "epoch": 0.004135574934819743, "grad_norm": 8.189355850219727, "learning_rate": 2.754491017964072e-07, "loss": 1.3691, "step": 23 }, { "epoch": 0.0043153825406814705, "grad_norm": 8.190055847167969, "learning_rate": 2.874251497005988e-07, "loss": 1.4263, "step": 24 }, { "epoch": 0.004495190146543199, "grad_norm": 8.480175971984863, "learning_rate": 2.9940119760479047e-07, "loss": 1.3691, "step": 25 }, { "epoch": 0.004674997752404927, "grad_norm": 8.097103118896484, "learning_rate": 3.113772455089821e-07, "loss": 1.4567, "step": 26 }, { "epoch": 0.004854805358266654, "grad_norm": 8.447556495666504, "learning_rate": 3.233532934131737e-07, "loss": 1.3802, "step": 27 }, { "epoch": 0.005034612964128383, "grad_norm": 2.85005259513855, "learning_rate": 3.3532934131736526e-07, "loss": 1.3737, "step": 28 }, { "epoch": 0.005214420569990111, "grad_norm": 6.80979061126709, "learning_rate": 3.4730538922155693e-07, "loss": 1.3788, "step": 29 }, { "epoch": 0.005394228175851838, "grad_norm": 7.640084743499756, "learning_rate": 3.592814371257485e-07, "loss": 1.4497, "step": 30 }, { "epoch": 0.005574035781713567, "grad_norm": 8.044221878051758, "learning_rate": 3.7125748502994017e-07, "loss": 1.2974, "step": 31 }, { "epoch": 0.005753843387575295, "grad_norm": 6.871516704559326, "learning_rate": 3.832335329341318e-07, "loss": 1.2879, "step": 32 }, { "epoch": 0.005933650993437022, "grad_norm": 7.320153713226318, "learning_rate": 3.952095808383234e-07, "loss": 1.3414, "step": 33 }, { "epoch": 0.00611345859929875, "grad_norm": 7.663967609405518, "learning_rate": 4.07185628742515e-07, "loss": 1.3726, "step": 34 }, { "epoch": 0.0062932662051604785, "grad_norm": 7.331847667694092, "learning_rate": 4.191616766467066e-07, "loss": 1.4063, "step": 35 }, { "epoch": 0.006473073811022206, "grad_norm": 7.447369575500488, "learning_rate": 4.3113772455089825e-07, "loss": 1.3593, "step": 36 }, { "epoch": 0.006652881416883934, "grad_norm": 7.204465389251709, "learning_rate": 4.431137724550898e-07, "loss": 1.4065, "step": 37 }, { "epoch": 0.006832689022745662, "grad_norm": 7.08964729309082, "learning_rate": 4.550898203592815e-07, "loss": 1.3131, "step": 38 }, { "epoch": 0.00701249662860739, "grad_norm": 6.595163345336914, "learning_rate": 4.670658682634731e-07, "loss": 1.2469, "step": 39 }, { "epoch": 0.007192304234469118, "grad_norm": 6.955565929412842, "learning_rate": 4.790419161676648e-07, "loss": 1.3331, "step": 40 }, { "epoch": 0.007372111840330846, "grad_norm": 6.590028762817383, "learning_rate": 4.910179640718563e-07, "loss": 1.2672, "step": 41 }, { "epoch": 0.007551919446192574, "grad_norm": 2.6982312202453613, "learning_rate": 5.029940119760479e-07, "loss": 1.3437, "step": 42 }, { "epoch": 0.007731727052054302, "grad_norm": 5.950307846069336, "learning_rate": 5.149700598802396e-07, "loss": 1.2965, "step": 43 }, { "epoch": 0.00791153465791603, "grad_norm": 6.299062252044678, "learning_rate": 5.269461077844311e-07, "loss": 1.3134, "step": 44 }, { "epoch": 0.008091342263777758, "grad_norm": 6.653504371643066, "learning_rate": 5.389221556886228e-07, "loss": 1.336, "step": 45 }, { "epoch": 0.008271149869639486, "grad_norm": 6.65632963180542, "learning_rate": 5.508982035928144e-07, "loss": 1.3349, "step": 46 }, { "epoch": 0.008450957475501213, "grad_norm": 5.969344615936279, "learning_rate": 5.62874251497006e-07, "loss": 1.3038, "step": 47 }, { "epoch": 0.008630765081362941, "grad_norm": 2.915808916091919, "learning_rate": 5.748502994011976e-07, "loss": 1.3195, "step": 48 }, { "epoch": 0.00881057268722467, "grad_norm": 5.762768268585205, "learning_rate": 5.868263473053893e-07, "loss": 1.275, "step": 49 }, { "epoch": 0.008990380293086398, "grad_norm": 5.502196311950684, "learning_rate": 5.988023952095809e-07, "loss": 1.2106, "step": 50 }, { "epoch": 0.009170187898948126, "grad_norm": 5.712020397186279, "learning_rate": 6.107784431137725e-07, "loss": 1.3207, "step": 51 }, { "epoch": 0.009349995504809853, "grad_norm": 5.297607421875, "learning_rate": 6.227544910179642e-07, "loss": 1.2724, "step": 52 }, { "epoch": 0.009529803110671581, "grad_norm": 5.752290725708008, "learning_rate": 6.347305389221557e-07, "loss": 1.2871, "step": 53 }, { "epoch": 0.009709610716533309, "grad_norm": 5.116922378540039, "learning_rate": 6.467065868263474e-07, "loss": 1.3114, "step": 54 }, { "epoch": 0.009889418322395037, "grad_norm": 4.597649574279785, "learning_rate": 6.586826347305391e-07, "loss": 1.3198, "step": 55 }, { "epoch": 0.010069225928256766, "grad_norm": 4.3456597328186035, "learning_rate": 6.706586826347305e-07, "loss": 1.3107, "step": 56 }, { "epoch": 0.010249033534118494, "grad_norm": 2.710043430328369, "learning_rate": 6.826347305389222e-07, "loss": 1.382, "step": 57 }, { "epoch": 0.010428841139980221, "grad_norm": 3.8968899250030518, "learning_rate": 6.946107784431139e-07, "loss": 1.1896, "step": 58 }, { "epoch": 0.010608648745841949, "grad_norm": 4.123720645904541, "learning_rate": 7.065868263473054e-07, "loss": 1.2588, "step": 59 }, { "epoch": 0.010788456351703677, "grad_norm": 3.99202823638916, "learning_rate": 7.18562874251497e-07, "loss": 1.2943, "step": 60 }, { "epoch": 0.010968263957565404, "grad_norm": 3.4421911239624023, "learning_rate": 7.305389221556887e-07, "loss": 1.2075, "step": 61 }, { "epoch": 0.011148071563427134, "grad_norm": 3.7782695293426514, "learning_rate": 7.425149700598803e-07, "loss": 1.183, "step": 62 }, { "epoch": 0.011327879169288861, "grad_norm": 3.718351364135742, "learning_rate": 7.544910179640719e-07, "loss": 1.2919, "step": 63 }, { "epoch": 0.01150768677515059, "grad_norm": 3.8805811405181885, "learning_rate": 7.664670658682636e-07, "loss": 1.163, "step": 64 }, { "epoch": 0.011687494381012317, "grad_norm": 3.6718029975891113, "learning_rate": 7.784431137724552e-07, "loss": 1.2341, "step": 65 }, { "epoch": 0.011867301986874045, "grad_norm": 3.5310075283050537, "learning_rate": 7.904191616766468e-07, "loss": 1.2354, "step": 66 }, { "epoch": 0.012047109592735772, "grad_norm": 3.392177104949951, "learning_rate": 8.023952095808384e-07, "loss": 1.2248, "step": 67 }, { "epoch": 0.0122269171985975, "grad_norm": 3.2093327045440674, "learning_rate": 8.1437125748503e-07, "loss": 1.3494, "step": 68 }, { "epoch": 0.01240672480445923, "grad_norm": 3.1907646656036377, "learning_rate": 8.263473053892217e-07, "loss": 1.1985, "step": 69 }, { "epoch": 0.012586532410320957, "grad_norm": 3.275195598602295, "learning_rate": 8.383233532934132e-07, "loss": 1.1322, "step": 70 }, { "epoch": 0.012766340016182685, "grad_norm": 2.995248317718506, "learning_rate": 8.502994011976048e-07, "loss": 1.2031, "step": 71 }, { "epoch": 0.012946147622044412, "grad_norm": 3.1148617267608643, "learning_rate": 8.622754491017965e-07, "loss": 1.2383, "step": 72 }, { "epoch": 0.01312595522790614, "grad_norm": 3.2554824352264404, "learning_rate": 8.742514970059882e-07, "loss": 1.2168, "step": 73 }, { "epoch": 0.013305762833767868, "grad_norm": 2.401789426803589, "learning_rate": 8.862275449101796e-07, "loss": 1.3337, "step": 74 }, { "epoch": 0.013485570439629597, "grad_norm": 3.0211269855499268, "learning_rate": 8.982035928143713e-07, "loss": 1.2744, "step": 75 }, { "epoch": 0.013665378045491325, "grad_norm": 2.815054178237915, "learning_rate": 9.10179640718563e-07, "loss": 1.2495, "step": 76 }, { "epoch": 0.013845185651353053, "grad_norm": 2.738988161087036, "learning_rate": 9.221556886227545e-07, "loss": 1.1738, "step": 77 }, { "epoch": 0.01402499325721478, "grad_norm": 2.5209007263183594, "learning_rate": 9.341317365269462e-07, "loss": 1.1655, "step": 78 }, { "epoch": 0.014204800863076508, "grad_norm": 2.977531909942627, "learning_rate": 9.461077844311379e-07, "loss": 1.1808, "step": 79 }, { "epoch": 0.014384608468938236, "grad_norm": 2.605039358139038, "learning_rate": 9.580838323353295e-07, "loss": 1.1505, "step": 80 }, { "epoch": 0.014564416074799963, "grad_norm": 2.369039297103882, "learning_rate": 9.70059880239521e-07, "loss": 1.1349, "step": 81 }, { "epoch": 0.014744223680661693, "grad_norm": 2.447789430618286, "learning_rate": 9.820359281437127e-07, "loss": 1.125, "step": 82 }, { "epoch": 0.01492403128652342, "grad_norm": 2.339059829711914, "learning_rate": 9.940119760479043e-07, "loss": 1.1764, "step": 83 }, { "epoch": 0.015103838892385148, "grad_norm": 2.325096845626831, "learning_rate": 1.0059880239520958e-06, "loss": 1.1892, "step": 84 }, { "epoch": 0.015283646498246876, "grad_norm": 2.173919439315796, "learning_rate": 1.0179640718562875e-06, "loss": 1.1201, "step": 85 }, { "epoch": 0.015463454104108603, "grad_norm": 2.2627322673797607, "learning_rate": 1.0299401197604791e-06, "loss": 1.1042, "step": 86 }, { "epoch": 0.01564326170997033, "grad_norm": 2.6182236671447754, "learning_rate": 1.0419161676646708e-06, "loss": 1.323, "step": 87 }, { "epoch": 0.01582306931583206, "grad_norm": 2.6619036197662354, "learning_rate": 1.0538922155688623e-06, "loss": 1.3331, "step": 88 }, { "epoch": 0.016002876921693786, "grad_norm": 2.2705130577087402, "learning_rate": 1.065868263473054e-06, "loss": 1.1767, "step": 89 }, { "epoch": 0.016182684527555516, "grad_norm": 2.319932222366333, "learning_rate": 1.0778443113772456e-06, "loss": 1.1993, "step": 90 }, { "epoch": 0.016362492133417242, "grad_norm": 2.1276230812072754, "learning_rate": 1.089820359281437e-06, "loss": 1.1618, "step": 91 }, { "epoch": 0.01654229973927897, "grad_norm": 2.1082019805908203, "learning_rate": 1.1017964071856287e-06, "loss": 1.1397, "step": 92 }, { "epoch": 0.0167221073451407, "grad_norm": 2.0926668643951416, "learning_rate": 1.1137724550898204e-06, "loss": 1.0929, "step": 93 }, { "epoch": 0.016901914951002427, "grad_norm": 2.1022205352783203, "learning_rate": 1.125748502994012e-06, "loss": 1.1531, "step": 94 }, { "epoch": 0.017081722556864156, "grad_norm": 2.335123062133789, "learning_rate": 1.1377245508982037e-06, "loss": 1.1264, "step": 95 }, { "epoch": 0.017261530162725882, "grad_norm": 2.187950372695923, "learning_rate": 1.1497005988023952e-06, "loss": 1.0897, "step": 96 }, { "epoch": 0.01744133776858761, "grad_norm": 2.0439155101776123, "learning_rate": 1.1616766467065869e-06, "loss": 1.1719, "step": 97 }, { "epoch": 0.01762114537444934, "grad_norm": 2.1897497177124023, "learning_rate": 1.1736526946107785e-06, "loss": 1.0785, "step": 98 }, { "epoch": 0.017800952980311067, "grad_norm": 3.611137628555298, "learning_rate": 1.1856287425149702e-06, "loss": 1.1487, "step": 99 }, { "epoch": 0.017980760586172796, "grad_norm": 2.6158297061920166, "learning_rate": 1.1976047904191619e-06, "loss": 1.3295, "step": 100 }, { "epoch": 0.018160568192034522, "grad_norm": 2.5348360538482666, "learning_rate": 1.2095808383233535e-06, "loss": 1.305, "step": 101 }, { "epoch": 0.01834037579789625, "grad_norm": 2.0414185523986816, "learning_rate": 1.221556886227545e-06, "loss": 1.08, "step": 102 }, { "epoch": 0.018520183403757978, "grad_norm": 2.4176416397094727, "learning_rate": 1.2335329341317367e-06, "loss": 1.3203, "step": 103 }, { "epoch": 0.018699991009619707, "grad_norm": 2.0187857151031494, "learning_rate": 1.2455089820359283e-06, "loss": 1.0419, "step": 104 }, { "epoch": 0.018879798615481436, "grad_norm": 2.334994077682495, "learning_rate": 1.2574850299401198e-06, "loss": 1.1367, "step": 105 }, { "epoch": 0.019059606221343162, "grad_norm": 1.860371470451355, "learning_rate": 1.2694610778443115e-06, "loss": 1.0314, "step": 106 }, { "epoch": 0.019239413827204892, "grad_norm": 2.038701057434082, "learning_rate": 1.2814371257485031e-06, "loss": 1.1188, "step": 107 }, { "epoch": 0.019419221433066618, "grad_norm": 1.8611613512039185, "learning_rate": 1.2934131736526948e-06, "loss": 1.0635, "step": 108 }, { "epoch": 0.019599029038928347, "grad_norm": 1.8903237581253052, "learning_rate": 1.3053892215568865e-06, "loss": 1.0322, "step": 109 }, { "epoch": 0.019778836644790073, "grad_norm": 1.9517110586166382, "learning_rate": 1.3173652694610781e-06, "loss": 1.1371, "step": 110 }, { "epoch": 0.019958644250651802, "grad_norm": 2.3672075271606445, "learning_rate": 1.3293413173652694e-06, "loss": 1.0948, "step": 111 }, { "epoch": 0.020138451856513532, "grad_norm": 2.0277442932128906, "learning_rate": 1.341317365269461e-06, "loss": 1.1416, "step": 112 }, { "epoch": 0.020318259462375258, "grad_norm": 2.1179141998291016, "learning_rate": 1.3532934131736527e-06, "loss": 1.1239, "step": 113 }, { "epoch": 0.020498067068236987, "grad_norm": 1.891800045967102, "learning_rate": 1.3652694610778444e-06, "loss": 1.1528, "step": 114 }, { "epoch": 0.020677874674098713, "grad_norm": 1.997679352760315, "learning_rate": 1.377245508982036e-06, "loss": 1.0307, "step": 115 }, { "epoch": 0.020857682279960443, "grad_norm": 2.127122402191162, "learning_rate": 1.3892215568862277e-06, "loss": 1.3141, "step": 116 }, { "epoch": 0.02103748988582217, "grad_norm": 1.9796534776687622, "learning_rate": 1.4011976047904194e-06, "loss": 1.0424, "step": 117 }, { "epoch": 0.021217297491683898, "grad_norm": 1.866485834121704, "learning_rate": 1.4131736526946109e-06, "loss": 1.0607, "step": 118 }, { "epoch": 0.021397105097545627, "grad_norm": 1.877120018005371, "learning_rate": 1.4251497005988023e-06, "loss": 1.0334, "step": 119 }, { "epoch": 0.021576912703407353, "grad_norm": 1.9353066682815552, "learning_rate": 1.437125748502994e-06, "loss": 1.0694, "step": 120 }, { "epoch": 0.021756720309269083, "grad_norm": 2.0797111988067627, "learning_rate": 1.4491017964071857e-06, "loss": 1.1672, "step": 121 }, { "epoch": 0.02193652791513081, "grad_norm": 2.110569953918457, "learning_rate": 1.4610778443113773e-06, "loss": 1.1573, "step": 122 }, { "epoch": 0.022116335520992538, "grad_norm": 5.0340166091918945, "learning_rate": 1.473053892215569e-06, "loss": 1.1212, "step": 123 }, { "epoch": 0.022296143126854268, "grad_norm": 2.0902700424194336, "learning_rate": 1.4850299401197607e-06, "loss": 1.0387, "step": 124 }, { "epoch": 0.022475950732715994, "grad_norm": 1.9115797281265259, "learning_rate": 1.4970059880239521e-06, "loss": 0.9861, "step": 125 }, { "epoch": 0.022655758338577723, "grad_norm": 1.7227460145950317, "learning_rate": 1.5089820359281438e-06, "loss": 1.0869, "step": 126 }, { "epoch": 0.02283556594443945, "grad_norm": 2.0870113372802734, "learning_rate": 1.5209580838323355e-06, "loss": 1.3041, "step": 127 }, { "epoch": 0.02301537355030118, "grad_norm": 1.8842995166778564, "learning_rate": 1.5329341317365271e-06, "loss": 1.124, "step": 128 }, { "epoch": 0.023195181156162904, "grad_norm": 1.8657200336456299, "learning_rate": 1.5449101796407188e-06, "loss": 1.1309, "step": 129 }, { "epoch": 0.023374988762024634, "grad_norm": 2.131864309310913, "learning_rate": 1.5568862275449105e-06, "loss": 1.0604, "step": 130 }, { "epoch": 0.023554796367886363, "grad_norm": 1.8081941604614258, "learning_rate": 1.568862275449102e-06, "loss": 1.0338, "step": 131 }, { "epoch": 0.02373460397374809, "grad_norm": 1.9179913997650146, "learning_rate": 1.5808383233532936e-06, "loss": 1.048, "step": 132 }, { "epoch": 0.02391441157960982, "grad_norm": 2.0850675106048584, "learning_rate": 1.592814371257485e-06, "loss": 1.0558, "step": 133 }, { "epoch": 0.024094219185471544, "grad_norm": 1.9054895639419556, "learning_rate": 1.6047904191616767e-06, "loss": 1.0419, "step": 134 }, { "epoch": 0.024274026791333274, "grad_norm": 1.9059480428695679, "learning_rate": 1.6167664670658684e-06, "loss": 1.1507, "step": 135 }, { "epoch": 0.024453834397195, "grad_norm": 1.8899602890014648, "learning_rate": 1.62874251497006e-06, "loss": 1.0316, "step": 136 }, { "epoch": 0.02463364200305673, "grad_norm": 1.8169358968734741, "learning_rate": 1.6407185628742517e-06, "loss": 1.0037, "step": 137 }, { "epoch": 0.02481344960891846, "grad_norm": 1.8690828084945679, "learning_rate": 1.6526946107784434e-06, "loss": 1.2315, "step": 138 }, { "epoch": 0.024993257214780185, "grad_norm": 1.9013617038726807, "learning_rate": 1.664670658682635e-06, "loss": 1.0896, "step": 139 }, { "epoch": 0.025173064820641914, "grad_norm": 1.8242830038070679, "learning_rate": 1.6766467065868263e-06, "loss": 1.1008, "step": 140 }, { "epoch": 0.02535287242650364, "grad_norm": 1.9077048301696777, "learning_rate": 1.688622754491018e-06, "loss": 1.0641, "step": 141 }, { "epoch": 0.02553268003236537, "grad_norm": 1.8449251651763916, "learning_rate": 1.7005988023952097e-06, "loss": 1.0705, "step": 142 }, { "epoch": 0.025712487638227095, "grad_norm": 1.7996550798416138, "learning_rate": 1.7125748502994013e-06, "loss": 1.064, "step": 143 }, { "epoch": 0.025892295244088825, "grad_norm": 1.966722846031189, "learning_rate": 1.724550898203593e-06, "loss": 1.126, "step": 144 }, { "epoch": 0.026072102849950554, "grad_norm": 1.873964548110962, "learning_rate": 1.7365269461077847e-06, "loss": 1.074, "step": 145 }, { "epoch": 0.02625191045581228, "grad_norm": 1.7739455699920654, "learning_rate": 1.7485029940119763e-06, "loss": 1.2593, "step": 146 }, { "epoch": 0.02643171806167401, "grad_norm": 1.880977988243103, "learning_rate": 1.7604790419161678e-06, "loss": 0.9918, "step": 147 }, { "epoch": 0.026611525667535735, "grad_norm": 2.147704839706421, "learning_rate": 1.7724550898203592e-06, "loss": 1.1023, "step": 148 }, { "epoch": 0.026791333273397465, "grad_norm": 1.8558069467544556, "learning_rate": 1.784431137724551e-06, "loss": 1.0921, "step": 149 }, { "epoch": 0.026971140879259194, "grad_norm": 1.7998782396316528, "learning_rate": 1.7964071856287426e-06, "loss": 1.0546, "step": 150 }, { "epoch": 0.02715094848512092, "grad_norm": 1.767452359199524, "learning_rate": 1.8083832335329343e-06, "loss": 1.1049, "step": 151 }, { "epoch": 0.02733075609098265, "grad_norm": 1.6455217599868774, "learning_rate": 1.820359281437126e-06, "loss": 1.2481, "step": 152 }, { "epoch": 0.027510563696844376, "grad_norm": 1.6964600086212158, "learning_rate": 1.8323353293413176e-06, "loss": 1.2909, "step": 153 }, { "epoch": 0.027690371302706105, "grad_norm": 1.8692700862884521, "learning_rate": 1.844311377245509e-06, "loss": 1.0425, "step": 154 }, { "epoch": 0.02787017890856783, "grad_norm": 1.9080753326416016, "learning_rate": 1.8562874251497007e-06, "loss": 1.1091, "step": 155 }, { "epoch": 0.02804998651442956, "grad_norm": 1.9769352674484253, "learning_rate": 1.8682634730538924e-06, "loss": 1.0703, "step": 156 }, { "epoch": 0.02822979412029129, "grad_norm": 1.5914653539657593, "learning_rate": 1.880239520958084e-06, "loss": 1.2699, "step": 157 }, { "epoch": 0.028409601726153016, "grad_norm": 1.8861240148544312, "learning_rate": 1.8922155688622757e-06, "loss": 1.0958, "step": 158 }, { "epoch": 0.028589409332014745, "grad_norm": 1.7882460355758667, "learning_rate": 1.9041916167664674e-06, "loss": 1.0593, "step": 159 }, { "epoch": 0.02876921693787647, "grad_norm": 2.235887050628662, "learning_rate": 1.916167664670659e-06, "loss": 1.0051, "step": 160 }, { "epoch": 0.0289490245437382, "grad_norm": 1.7658729553222656, "learning_rate": 1.9281437125748503e-06, "loss": 1.0167, "step": 161 }, { "epoch": 0.029128832149599927, "grad_norm": 1.8757953643798828, "learning_rate": 1.940119760479042e-06, "loss": 1.0125, "step": 162 }, { "epoch": 0.029308639755461656, "grad_norm": 1.859266996383667, "learning_rate": 1.9520958083832337e-06, "loss": 1.0536, "step": 163 }, { "epoch": 0.029488447361323385, "grad_norm": 1.4729900360107422, "learning_rate": 1.9640718562874253e-06, "loss": 1.2846, "step": 164 }, { "epoch": 0.02966825496718511, "grad_norm": 1.8722920417785645, "learning_rate": 1.976047904191617e-06, "loss": 1.0565, "step": 165 }, { "epoch": 0.02984806257304684, "grad_norm": 1.6923232078552246, "learning_rate": 1.9880239520958087e-06, "loss": 1.0245, "step": 166 }, { "epoch": 0.030027870178908567, "grad_norm": 1.7566766738891602, "learning_rate": 2.0000000000000003e-06, "loss": 0.9661, "step": 167 }, { "epoch": 0.030207677784770296, "grad_norm": 1.7714757919311523, "learning_rate": 2.0119760479041916e-06, "loss": 1.0546, "step": 168 }, { "epoch": 0.030387485390632022, "grad_norm": 1.8831342458724976, "learning_rate": 2.0239520958083832e-06, "loss": 1.0716, "step": 169 }, { "epoch": 0.03056729299649375, "grad_norm": 1.7899852991104126, "learning_rate": 2.035928143712575e-06, "loss": 1.0097, "step": 170 }, { "epoch": 0.03074710060235548, "grad_norm": 1.899746298789978, "learning_rate": 2.0479041916167666e-06, "loss": 1.0438, "step": 171 }, { "epoch": 0.030926908208217207, "grad_norm": 2.0608506202697754, "learning_rate": 2.0598802395209583e-06, "loss": 0.9839, "step": 172 }, { "epoch": 0.031106715814078936, "grad_norm": 1.9172217845916748, "learning_rate": 2.07185628742515e-06, "loss": 1.0048, "step": 173 }, { "epoch": 0.03128652341994066, "grad_norm": 1.8907560110092163, "learning_rate": 2.0838323353293416e-06, "loss": 0.9962, "step": 174 }, { "epoch": 0.03146633102580239, "grad_norm": 1.854789137840271, "learning_rate": 2.095808383233533e-06, "loss": 1.0255, "step": 175 }, { "epoch": 0.03164613863166412, "grad_norm": 1.849899411201477, "learning_rate": 2.1077844311377245e-06, "loss": 1.0346, "step": 176 }, { "epoch": 0.03182594623752585, "grad_norm": 1.7627307176589966, "learning_rate": 2.119760479041916e-06, "loss": 1.0636, "step": 177 }, { "epoch": 0.03200575384338757, "grad_norm": 1.897989273071289, "learning_rate": 2.131736526946108e-06, "loss": 1.1412, "step": 178 }, { "epoch": 0.032185561449249306, "grad_norm": 1.451680302619934, "learning_rate": 2.1437125748502995e-06, "loss": 1.2077, "step": 179 }, { "epoch": 0.03236536905511103, "grad_norm": 1.8913533687591553, "learning_rate": 2.155688622754491e-06, "loss": 1.0599, "step": 180 }, { "epoch": 0.03254517666097276, "grad_norm": 1.8487250804901123, "learning_rate": 2.167664670658683e-06, "loss": 0.9902, "step": 181 }, { "epoch": 0.032724984266834484, "grad_norm": 1.8931329250335693, "learning_rate": 2.179640718562874e-06, "loss": 1.0714, "step": 182 }, { "epoch": 0.03290479187269622, "grad_norm": 1.9323399066925049, "learning_rate": 2.1916167664670658e-06, "loss": 1.0163, "step": 183 }, { "epoch": 0.03308459947855794, "grad_norm": 1.7458442449569702, "learning_rate": 2.2035928143712574e-06, "loss": 1.0524, "step": 184 }, { "epoch": 0.03326440708441967, "grad_norm": 1.7702080011367798, "learning_rate": 2.215568862275449e-06, "loss": 1.0185, "step": 185 }, { "epoch": 0.0334442146902814, "grad_norm": 1.496293067932129, "learning_rate": 2.2275449101796408e-06, "loss": 1.2443, "step": 186 }, { "epoch": 0.03362402229614313, "grad_norm": 1.9904704093933105, "learning_rate": 2.2395209580838325e-06, "loss": 1.1063, "step": 187 }, { "epoch": 0.03380382990200485, "grad_norm": 1.7300832271575928, "learning_rate": 2.251497005988024e-06, "loss": 0.9928, "step": 188 }, { "epoch": 0.033983637507866586, "grad_norm": 1.767034649848938, "learning_rate": 2.263473053892216e-06, "loss": 1.0288, "step": 189 }, { "epoch": 0.03416344511372831, "grad_norm": 1.7950018644332886, "learning_rate": 2.2754491017964075e-06, "loss": 1.0629, "step": 190 }, { "epoch": 0.03434325271959004, "grad_norm": 1.3579859733581543, "learning_rate": 2.287425149700599e-06, "loss": 1.2375, "step": 191 }, { "epoch": 0.034523060325451764, "grad_norm": 2.513607978820801, "learning_rate": 2.2994011976047904e-06, "loss": 1.009, "step": 192 }, { "epoch": 0.0347028679313135, "grad_norm": 1.6964670419692993, "learning_rate": 2.311377245508982e-06, "loss": 0.976, "step": 193 }, { "epoch": 0.03488267553717522, "grad_norm": 1.8824191093444824, "learning_rate": 2.3233532934131737e-06, "loss": 1.0333, "step": 194 }, { "epoch": 0.03506248314303695, "grad_norm": 1.8568872213363647, "learning_rate": 2.3353293413173654e-06, "loss": 1.0514, "step": 195 }, { "epoch": 0.03524229074889868, "grad_norm": 1.9977149963378906, "learning_rate": 2.347305389221557e-06, "loss": 1.1824, "step": 196 }, { "epoch": 0.03542209835476041, "grad_norm": 1.7727270126342773, "learning_rate": 2.3592814371257487e-06, "loss": 1.0185, "step": 197 }, { "epoch": 0.035601905960622134, "grad_norm": 1.8961751461029053, "learning_rate": 2.3712574850299404e-06, "loss": 1.0632, "step": 198 }, { "epoch": 0.03578171356648386, "grad_norm": 1.8198184967041016, "learning_rate": 2.383233532934132e-06, "loss": 1.0651, "step": 199 }, { "epoch": 0.03596152117234559, "grad_norm": 1.7906861305236816, "learning_rate": 2.3952095808383237e-06, "loss": 1.0801, "step": 200 }, { "epoch": 0.03614132877820732, "grad_norm": 1.8353712558746338, "learning_rate": 2.4071856287425154e-06, "loss": 0.9239, "step": 201 }, { "epoch": 0.036321136384069044, "grad_norm": 1.740576148033142, "learning_rate": 2.419161676646707e-06, "loss": 1.0834, "step": 202 }, { "epoch": 0.03650094398993078, "grad_norm": 1.9365694522857666, "learning_rate": 2.4311377245508983e-06, "loss": 1.1374, "step": 203 }, { "epoch": 0.0366807515957925, "grad_norm": 1.4357374906539917, "learning_rate": 2.44311377245509e-06, "loss": 1.2281, "step": 204 }, { "epoch": 0.03686055920165423, "grad_norm": 1.971795678138733, "learning_rate": 2.4550898203592817e-06, "loss": 1.0005, "step": 205 }, { "epoch": 0.037040366807515955, "grad_norm": 1.4674264192581177, "learning_rate": 2.4670658682634733e-06, "loss": 1.2768, "step": 206 }, { "epoch": 0.03722017441337769, "grad_norm": 1.8854279518127441, "learning_rate": 2.479041916167665e-06, "loss": 1.045, "step": 207 }, { "epoch": 0.037399982019239414, "grad_norm": 1.3653056621551514, "learning_rate": 2.4910179640718567e-06, "loss": 1.2634, "step": 208 }, { "epoch": 0.03757978962510114, "grad_norm": 1.799027442932129, "learning_rate": 2.5029940119760483e-06, "loss": 0.996, "step": 209 }, { "epoch": 0.03775959723096287, "grad_norm": 1.352178931236267, "learning_rate": 2.5149700598802396e-06, "loss": 1.2582, "step": 210 }, { "epoch": 0.0379394048368246, "grad_norm": 1.8600937128067017, "learning_rate": 2.5269461077844317e-06, "loss": 0.9303, "step": 211 }, { "epoch": 0.038119212442686325, "grad_norm": 4.192755699157715, "learning_rate": 2.538922155688623e-06, "loss": 1.0535, "step": 212 }, { "epoch": 0.03829902004854805, "grad_norm": 2.164557695388794, "learning_rate": 2.550898203592815e-06, "loss": 1.0383, "step": 213 }, { "epoch": 0.038478827654409783, "grad_norm": 1.8618016242980957, "learning_rate": 2.5628742514970063e-06, "loss": 1.0047, "step": 214 }, { "epoch": 0.03865863526027151, "grad_norm": 1.8745756149291992, "learning_rate": 2.5748502994011975e-06, "loss": 1.075, "step": 215 }, { "epoch": 0.038838442866133235, "grad_norm": 1.878997802734375, "learning_rate": 2.5868263473053896e-06, "loss": 0.9288, "step": 216 }, { "epoch": 0.03901825047199497, "grad_norm": 1.8612143993377686, "learning_rate": 2.598802395209581e-06, "loss": 1.0094, "step": 217 }, { "epoch": 0.039198058077856694, "grad_norm": 2.028592586517334, "learning_rate": 2.610778443113773e-06, "loss": 1.0381, "step": 218 }, { "epoch": 0.03937786568371842, "grad_norm": 2.113868474960327, "learning_rate": 2.622754491017964e-06, "loss": 1.0355, "step": 219 }, { "epoch": 0.039557673289580146, "grad_norm": 1.7971765995025635, "learning_rate": 2.6347305389221563e-06, "loss": 1.0107, "step": 220 }, { "epoch": 0.03973748089544188, "grad_norm": 1.8805581331253052, "learning_rate": 2.6467065868263475e-06, "loss": 1.0984, "step": 221 }, { "epoch": 0.039917288501303605, "grad_norm": 1.803561806678772, "learning_rate": 2.6586826347305388e-06, "loss": 1.0258, "step": 222 }, { "epoch": 0.04009709610716533, "grad_norm": 1.7759298086166382, "learning_rate": 2.670658682634731e-06, "loss": 1.0678, "step": 223 }, { "epoch": 0.040276903713027064, "grad_norm": 1.7680352926254272, "learning_rate": 2.682634730538922e-06, "loss": 1.0208, "step": 224 }, { "epoch": 0.04045671131888879, "grad_norm": 1.8752058744430542, "learning_rate": 2.694610778443114e-06, "loss": 0.9586, "step": 225 }, { "epoch": 0.040636518924750516, "grad_norm": 1.8080804347991943, "learning_rate": 2.7065868263473054e-06, "loss": 0.9813, "step": 226 }, { "epoch": 0.04081632653061224, "grad_norm": 1.7766075134277344, "learning_rate": 2.7185628742514975e-06, "loss": 0.9945, "step": 227 }, { "epoch": 0.040996134136473975, "grad_norm": 1.789665937423706, "learning_rate": 2.7305389221556888e-06, "loss": 1.0026, "step": 228 }, { "epoch": 0.0411759417423357, "grad_norm": 1.7441178560256958, "learning_rate": 2.74251497005988e-06, "loss": 0.9966, "step": 229 }, { "epoch": 0.041355749348197426, "grad_norm": 1.9020768404006958, "learning_rate": 2.754491017964072e-06, "loss": 1.0537, "step": 230 }, { "epoch": 0.04153555695405916, "grad_norm": 2.0134220123291016, "learning_rate": 2.7664670658682634e-06, "loss": 1.0063, "step": 231 }, { "epoch": 0.041715364559920885, "grad_norm": 1.8504945039749146, "learning_rate": 2.7784431137724555e-06, "loss": 1.0007, "step": 232 }, { "epoch": 0.04189517216578261, "grad_norm": 1.8002153635025024, "learning_rate": 2.7904191616766467e-06, "loss": 0.9925, "step": 233 }, { "epoch": 0.04207497977164434, "grad_norm": 1.9784823656082153, "learning_rate": 2.802395209580839e-06, "loss": 0.9336, "step": 234 }, { "epoch": 0.04225478737750607, "grad_norm": 1.7990520000457764, "learning_rate": 2.81437125748503e-06, "loss": 1.0192, "step": 235 }, { "epoch": 0.042434594983367796, "grad_norm": 1.8544360399246216, "learning_rate": 2.8263473053892217e-06, "loss": 1.0138, "step": 236 }, { "epoch": 0.04261440258922952, "grad_norm": 1.3655637502670288, "learning_rate": 2.8383233532934134e-06, "loss": 1.2583, "step": 237 }, { "epoch": 0.042794210195091255, "grad_norm": 1.9434539079666138, "learning_rate": 2.8502994011976046e-06, "loss": 1.01, "step": 238 }, { "epoch": 0.04297401780095298, "grad_norm": 2.0522148609161377, "learning_rate": 2.8622754491017967e-06, "loss": 1.0674, "step": 239 }, { "epoch": 0.04315382540681471, "grad_norm": 2.0640995502471924, "learning_rate": 2.874251497005988e-06, "loss": 1.0045, "step": 240 }, { "epoch": 0.04333363301267644, "grad_norm": 1.8070186376571655, "learning_rate": 2.88622754491018e-06, "loss": 1.0935, "step": 241 }, { "epoch": 0.043513440618538166, "grad_norm": 1.8166762590408325, "learning_rate": 2.8982035928143713e-06, "loss": 0.9937, "step": 242 }, { "epoch": 0.04369324822439989, "grad_norm": 1.7525320053100586, "learning_rate": 2.910179640718563e-06, "loss": 1.0581, "step": 243 }, { "epoch": 0.04387305583026162, "grad_norm": 1.7346148490905762, "learning_rate": 2.9221556886227546e-06, "loss": 1.0379, "step": 244 }, { "epoch": 0.04405286343612335, "grad_norm": 1.232875943183899, "learning_rate": 2.9341317365269463e-06, "loss": 1.234, "step": 245 }, { "epoch": 0.044232671041985076, "grad_norm": 1.3320603370666504, "learning_rate": 2.946107784431138e-06, "loss": 1.2306, "step": 246 }, { "epoch": 0.0444124786478468, "grad_norm": 1.9109139442443848, "learning_rate": 2.9580838323353297e-06, "loss": 1.016, "step": 247 }, { "epoch": 0.044592286253708535, "grad_norm": 1.6988012790679932, "learning_rate": 2.9700598802395213e-06, "loss": 0.9936, "step": 248 }, { "epoch": 0.04477209385957026, "grad_norm": 1.9009493589401245, "learning_rate": 2.982035928143713e-06, "loss": 1.0523, "step": 249 }, { "epoch": 0.04495190146543199, "grad_norm": 1.8778049945831299, "learning_rate": 2.9940119760479042e-06, "loss": 0.9953, "step": 250 }, { "epoch": 0.04513170907129371, "grad_norm": 2.328206777572632, "learning_rate": 3.005988023952096e-06, "loss": 1.0397, "step": 251 }, { "epoch": 0.045311516677155446, "grad_norm": 1.7371865510940552, "learning_rate": 3.0179640718562876e-06, "loss": 1.0008, "step": 252 }, { "epoch": 0.04549132428301717, "grad_norm": 1.9052715301513672, "learning_rate": 3.0299401197604792e-06, "loss": 1.0047, "step": 253 }, { "epoch": 0.0456711318888789, "grad_norm": 1.7873083353042603, "learning_rate": 3.041916167664671e-06, "loss": 1.0112, "step": 254 }, { "epoch": 0.04585093949474063, "grad_norm": 1.910757064819336, "learning_rate": 3.0538922155688626e-06, "loss": 1.0618, "step": 255 }, { "epoch": 0.04603074710060236, "grad_norm": 1.8874127864837646, "learning_rate": 3.0658682634730543e-06, "loss": 1.0026, "step": 256 }, { "epoch": 0.04621055470646408, "grad_norm": 1.8568166494369507, "learning_rate": 3.0778443113772455e-06, "loss": 0.9521, "step": 257 }, { "epoch": 0.04639036231232581, "grad_norm": 1.7720332145690918, "learning_rate": 3.0898203592814376e-06, "loss": 0.999, "step": 258 }, { "epoch": 0.04657016991818754, "grad_norm": 1.9010640382766724, "learning_rate": 3.101796407185629e-06, "loss": 0.9434, "step": 259 }, { "epoch": 0.04674997752404927, "grad_norm": 1.2647418975830078, "learning_rate": 3.113772455089821e-06, "loss": 1.2501, "step": 260 }, { "epoch": 0.04692978512991099, "grad_norm": 2.0082268714904785, "learning_rate": 3.125748502994012e-06, "loss": 1.0633, "step": 261 }, { "epoch": 0.047109592735772726, "grad_norm": 1.897938847541809, "learning_rate": 3.137724550898204e-06, "loss": 0.9025, "step": 262 }, { "epoch": 0.04728940034163445, "grad_norm": 1.9666591882705688, "learning_rate": 3.1497005988023955e-06, "loss": 0.9965, "step": 263 }, { "epoch": 0.04746920794749618, "grad_norm": 1.8534176349639893, "learning_rate": 3.161676646706587e-06, "loss": 0.9177, "step": 264 }, { "epoch": 0.047649015553357904, "grad_norm": 1.7673977613449097, "learning_rate": 3.173652694610779e-06, "loss": 1.0324, "step": 265 }, { "epoch": 0.04782882315921964, "grad_norm": 1.958561897277832, "learning_rate": 3.18562874251497e-06, "loss": 1.0643, "step": 266 }, { "epoch": 0.04800863076508136, "grad_norm": 1.9395264387130737, "learning_rate": 3.197604790419162e-06, "loss": 1.0769, "step": 267 }, { "epoch": 0.04818843837094309, "grad_norm": 1.927081823348999, "learning_rate": 3.2095808383233534e-06, "loss": 0.9722, "step": 268 }, { "epoch": 0.04836824597680482, "grad_norm": 2.0374391078948975, "learning_rate": 3.2215568862275455e-06, "loss": 1.0221, "step": 269 }, { "epoch": 0.04854805358266655, "grad_norm": 2.0951638221740723, "learning_rate": 3.2335329341317368e-06, "loss": 0.9696, "step": 270 }, { "epoch": 0.048727861188528274, "grad_norm": 1.7062052488327026, "learning_rate": 3.245508982035929e-06, "loss": 1.0167, "step": 271 }, { "epoch": 0.04890766879439, "grad_norm": 1.8104037046432495, "learning_rate": 3.25748502994012e-06, "loss": 0.9782, "step": 272 }, { "epoch": 0.04908747640025173, "grad_norm": 1.8032561540603638, "learning_rate": 3.2694610778443114e-06, "loss": 0.991, "step": 273 }, { "epoch": 0.04926728400611346, "grad_norm": 1.7043286561965942, "learning_rate": 3.2814371257485035e-06, "loss": 1.0436, "step": 274 }, { "epoch": 0.049447091611975184, "grad_norm": 1.3714525699615479, "learning_rate": 3.2934131736526947e-06, "loss": 1.2334, "step": 275 }, { "epoch": 0.04962689921783692, "grad_norm": 1.6780303716659546, "learning_rate": 3.305389221556887e-06, "loss": 0.9795, "step": 276 }, { "epoch": 0.04980670682369864, "grad_norm": 1.9839142560958862, "learning_rate": 3.317365269461078e-06, "loss": 0.9912, "step": 277 }, { "epoch": 0.04998651442956037, "grad_norm": 2.094092607498169, "learning_rate": 3.32934131736527e-06, "loss": 1.0214, "step": 278 }, { "epoch": 0.050166322035422095, "grad_norm": 1.93482506275177, "learning_rate": 3.3413173652694614e-06, "loss": 0.9627, "step": 279 }, { "epoch": 0.05034612964128383, "grad_norm": 1.8203270435333252, "learning_rate": 3.3532934131736526e-06, "loss": 0.9826, "step": 280 }, { "epoch": 0.050525937247145554, "grad_norm": 1.8876349925994873, "learning_rate": 3.3652694610778447e-06, "loss": 1.0268, "step": 281 }, { "epoch": 0.05070574485300728, "grad_norm": 1.9983668327331543, "learning_rate": 3.377245508982036e-06, "loss": 0.9571, "step": 282 }, { "epoch": 0.05088555245886901, "grad_norm": 1.2839100360870361, "learning_rate": 3.389221556886228e-06, "loss": 1.2099, "step": 283 }, { "epoch": 0.05106536006473074, "grad_norm": 1.7760751247406006, "learning_rate": 3.4011976047904193e-06, "loss": 0.991, "step": 284 }, { "epoch": 0.051245167670592465, "grad_norm": 1.9603922367095947, "learning_rate": 3.4131736526946114e-06, "loss": 1.0286, "step": 285 }, { "epoch": 0.05142497527645419, "grad_norm": 1.9318233728408813, "learning_rate": 3.4251497005988026e-06, "loss": 1.0134, "step": 286 }, { "epoch": 0.051604782882315924, "grad_norm": 1.2283018827438354, "learning_rate": 3.437125748502994e-06, "loss": 1.2333, "step": 287 }, { "epoch": 0.05178459048817765, "grad_norm": 1.8698395490646362, "learning_rate": 3.449101796407186e-06, "loss": 0.9447, "step": 288 }, { "epoch": 0.051964398094039375, "grad_norm": 1.8653206825256348, "learning_rate": 3.4610778443113772e-06, "loss": 1.0174, "step": 289 }, { "epoch": 0.05214420569990111, "grad_norm": 1.9340084791183472, "learning_rate": 3.4730538922155693e-06, "loss": 0.9124, "step": 290 }, { "epoch": 0.052324013305762834, "grad_norm": 1.1635940074920654, "learning_rate": 3.4850299401197606e-06, "loss": 1.1889, "step": 291 }, { "epoch": 0.05250382091162456, "grad_norm": 1.835123062133789, "learning_rate": 3.4970059880239527e-06, "loss": 0.9626, "step": 292 }, { "epoch": 0.05268362851748629, "grad_norm": 1.8515353202819824, "learning_rate": 3.508982035928144e-06, "loss": 0.9244, "step": 293 }, { "epoch": 0.05286343612334802, "grad_norm": 1.8440346717834473, "learning_rate": 3.5209580838323356e-06, "loss": 0.8896, "step": 294 }, { "epoch": 0.053043243729209745, "grad_norm": 1.2652190923690796, "learning_rate": 3.5329341317365273e-06, "loss": 1.2381, "step": 295 }, { "epoch": 0.05322305133507147, "grad_norm": 1.999311923980713, "learning_rate": 3.5449101796407185e-06, "loss": 1.0195, "step": 296 }, { "epoch": 0.053402858940933204, "grad_norm": 1.528746247291565, "learning_rate": 3.5568862275449106e-06, "loss": 0.9638, "step": 297 }, { "epoch": 0.05358266654679493, "grad_norm": 1.7926019430160522, "learning_rate": 3.568862275449102e-06, "loss": 1.0392, "step": 298 }, { "epoch": 0.053762474152656656, "grad_norm": 1.1826125383377075, "learning_rate": 3.580838323353294e-06, "loss": 1.189, "step": 299 }, { "epoch": 0.05394228175851839, "grad_norm": 1.9184846878051758, "learning_rate": 3.592814371257485e-06, "loss": 0.9696, "step": 300 }, { "epoch": 0.054122089364380115, "grad_norm": 1.819407343864441, "learning_rate": 3.604790419161677e-06, "loss": 1.0038, "step": 301 }, { "epoch": 0.05430189697024184, "grad_norm": 3.1718478202819824, "learning_rate": 3.6167664670658685e-06, "loss": 1.0045, "step": 302 }, { "epoch": 0.054481704576103566, "grad_norm": 1.7216237783432007, "learning_rate": 3.62874251497006e-06, "loss": 0.9042, "step": 303 }, { "epoch": 0.0546615121819653, "grad_norm": 1.7868740558624268, "learning_rate": 3.640718562874252e-06, "loss": 0.9601, "step": 304 }, { "epoch": 0.054841319787827025, "grad_norm": 1.7744706869125366, "learning_rate": 3.6526946107784435e-06, "loss": 0.9513, "step": 305 }, { "epoch": 0.05502112739368875, "grad_norm": 1.8582239151000977, "learning_rate": 3.664670658682635e-06, "loss": 0.9695, "step": 306 }, { "epoch": 0.055200934999550484, "grad_norm": 1.8090438842773438, "learning_rate": 3.676646706586827e-06, "loss": 1.0161, "step": 307 }, { "epoch": 0.05538074260541221, "grad_norm": 1.728306770324707, "learning_rate": 3.688622754491018e-06, "loss": 1.0201, "step": 308 }, { "epoch": 0.055560550211273936, "grad_norm": 1.959631085395813, "learning_rate": 3.7005988023952098e-06, "loss": 0.9356, "step": 309 }, { "epoch": 0.05574035781713566, "grad_norm": 1.713687777519226, "learning_rate": 3.7125748502994014e-06, "loss": 1.0064, "step": 310 }, { "epoch": 0.055920165422997395, "grad_norm": 1.1644576787948608, "learning_rate": 3.724550898203593e-06, "loss": 1.2698, "step": 311 }, { "epoch": 0.05609997302885912, "grad_norm": 1.7865887880325317, "learning_rate": 3.7365269461077848e-06, "loss": 1.0336, "step": 312 }, { "epoch": 0.05627978063472085, "grad_norm": 1.7576898336410522, "learning_rate": 3.7485029940119765e-06, "loss": 0.9468, "step": 313 }, { "epoch": 0.05645958824058258, "grad_norm": 1.8014644384384155, "learning_rate": 3.760479041916168e-06, "loss": 1.019, "step": 314 }, { "epoch": 0.056639395846444306, "grad_norm": 1.762649655342102, "learning_rate": 3.7724550898203594e-06, "loss": 1.0344, "step": 315 }, { "epoch": 0.05681920345230603, "grad_norm": 1.774033546447754, "learning_rate": 3.7844311377245515e-06, "loss": 0.9927, "step": 316 }, { "epoch": 0.05699901105816776, "grad_norm": 1.7430140972137451, "learning_rate": 3.7964071856287427e-06, "loss": 0.9399, "step": 317 }, { "epoch": 0.05717881866402949, "grad_norm": 1.8123258352279663, "learning_rate": 3.808383233532935e-06, "loss": 1.0053, "step": 318 }, { "epoch": 0.057358626269891216, "grad_norm": 1.8072642087936401, "learning_rate": 3.820359281437126e-06, "loss": 0.9429, "step": 319 }, { "epoch": 0.05753843387575294, "grad_norm": 2.0734384059906006, "learning_rate": 3.832335329341318e-06, "loss": 0.9396, "step": 320 }, { "epoch": 0.057718241481614675, "grad_norm": 1.8345222473144531, "learning_rate": 3.844311377245509e-06, "loss": 0.9307, "step": 321 }, { "epoch": 0.0578980490874764, "grad_norm": 1.7720211744308472, "learning_rate": 3.856287425149701e-06, "loss": 1.0573, "step": 322 }, { "epoch": 0.05807785669333813, "grad_norm": 2.1541390419006348, "learning_rate": 3.868263473053892e-06, "loss": 0.9252, "step": 323 }, { "epoch": 0.05825766429919985, "grad_norm": 1.3607748746871948, "learning_rate": 3.880239520958084e-06, "loss": 1.2272, "step": 324 }, { "epoch": 0.058437471905061586, "grad_norm": 1.337291955947876, "learning_rate": 3.892215568862276e-06, "loss": 1.2233, "step": 325 }, { "epoch": 0.05861727951092331, "grad_norm": 1.359076738357544, "learning_rate": 3.904191616766467e-06, "loss": 1.2832, "step": 326 }, { "epoch": 0.05879708711678504, "grad_norm": 1.8022446632385254, "learning_rate": 3.916167664670659e-06, "loss": 1.0249, "step": 327 }, { "epoch": 0.05897689472264677, "grad_norm": 1.8060572147369385, "learning_rate": 3.928143712574851e-06, "loss": 1.0644, "step": 328 }, { "epoch": 0.0591567023285085, "grad_norm": 1.7969493865966797, "learning_rate": 3.940119760479042e-06, "loss": 0.9076, "step": 329 }, { "epoch": 0.05933650993437022, "grad_norm": 2.876600503921509, "learning_rate": 3.952095808383234e-06, "loss": 0.9933, "step": 330 }, { "epoch": 0.05951631754023195, "grad_norm": 1.8722511529922485, "learning_rate": 3.964071856287426e-06, "loss": 0.9659, "step": 331 }, { "epoch": 0.05969612514609368, "grad_norm": 2.164942979812622, "learning_rate": 3.976047904191617e-06, "loss": 1.0452, "step": 332 }, { "epoch": 0.05987593275195541, "grad_norm": 1.8613132238388062, "learning_rate": 3.988023952095809e-06, "loss": 1.0353, "step": 333 }, { "epoch": 0.06005574035781713, "grad_norm": 1.8592479228973389, "learning_rate": 4.000000000000001e-06, "loss": 0.9541, "step": 334 }, { "epoch": 0.060235547963678866, "grad_norm": 1.7058767080307007, "learning_rate": 4.011976047904192e-06, "loss": 1.104, "step": 335 }, { "epoch": 0.06041535556954059, "grad_norm": 1.8713136911392212, "learning_rate": 4.023952095808383e-06, "loss": 0.9955, "step": 336 }, { "epoch": 0.06059516317540232, "grad_norm": 2.16245436668396, "learning_rate": 4.035928143712575e-06, "loss": 0.8959, "step": 337 }, { "epoch": 0.060774970781264044, "grad_norm": 1.8539565801620483, "learning_rate": 4.0479041916167665e-06, "loss": 0.954, "step": 338 }, { "epoch": 0.06095477838712578, "grad_norm": 1.665842056274414, "learning_rate": 4.059880239520958e-06, "loss": 0.9617, "step": 339 }, { "epoch": 0.0611345859929875, "grad_norm": 1.6645853519439697, "learning_rate": 4.07185628742515e-06, "loss": 0.9518, "step": 340 }, { "epoch": 0.06131439359884923, "grad_norm": 2.025362253189087, "learning_rate": 4.0838323353293415e-06, "loss": 0.9224, "step": 341 }, { "epoch": 0.06149420120471096, "grad_norm": 1.6725510358810425, "learning_rate": 4.095808383233533e-06, "loss": 0.9597, "step": 342 }, { "epoch": 0.06167400881057269, "grad_norm": 1.908176064491272, "learning_rate": 4.107784431137725e-06, "loss": 0.9923, "step": 343 }, { "epoch": 0.061853816416434414, "grad_norm": 1.8928775787353516, "learning_rate": 4.1197604790419165e-06, "loss": 0.9699, "step": 344 }, { "epoch": 0.06203362402229615, "grad_norm": 1.7964937686920166, "learning_rate": 4.131736526946108e-06, "loss": 0.9885, "step": 345 }, { "epoch": 0.06221343162815787, "grad_norm": 1.8506230115890503, "learning_rate": 4.1437125748503e-06, "loss": 0.9735, "step": 346 }, { "epoch": 0.0623932392340196, "grad_norm": 1.8660792112350464, "learning_rate": 4.1556886227544915e-06, "loss": 0.9268, "step": 347 }, { "epoch": 0.06257304683988132, "grad_norm": 1.8252463340759277, "learning_rate": 4.167664670658683e-06, "loss": 1.0097, "step": 348 }, { "epoch": 0.06275285444574305, "grad_norm": 1.8029800653457642, "learning_rate": 4.179640718562875e-06, "loss": 1.0448, "step": 349 }, { "epoch": 0.06293266205160478, "grad_norm": 1.6158336400985718, "learning_rate": 4.191616766467066e-06, "loss": 0.8936, "step": 350 }, { "epoch": 0.06311246965746652, "grad_norm": 1.8164864778518677, "learning_rate": 4.203592814371258e-06, "loss": 0.9749, "step": 351 }, { "epoch": 0.06329227726332824, "grad_norm": 1.7923271656036377, "learning_rate": 4.215568862275449e-06, "loss": 1.021, "step": 352 }, { "epoch": 0.06347208486918997, "grad_norm": 1.2026299238204956, "learning_rate": 4.2275449101796415e-06, "loss": 1.2351, "step": 353 }, { "epoch": 0.0636518924750517, "grad_norm": 1.7774341106414795, "learning_rate": 4.239520958083832e-06, "loss": 0.9799, "step": 354 }, { "epoch": 0.06383170008091342, "grad_norm": 1.8999072313308716, "learning_rate": 4.251497005988025e-06, "loss": 0.9759, "step": 355 }, { "epoch": 0.06401150768677515, "grad_norm": 1.8489001989364624, "learning_rate": 4.263473053892216e-06, "loss": 1.0109, "step": 356 }, { "epoch": 0.06419131529263687, "grad_norm": 1.829334020614624, "learning_rate": 4.275449101796407e-06, "loss": 1.0316, "step": 357 }, { "epoch": 0.06437112289849861, "grad_norm": 1.8905422687530518, "learning_rate": 4.287425149700599e-06, "loss": 0.956, "step": 358 }, { "epoch": 0.06455093050436034, "grad_norm": 1.2876362800598145, "learning_rate": 4.299401197604791e-06, "loss": 1.216, "step": 359 }, { "epoch": 0.06473073811022206, "grad_norm": 1.7320375442504883, "learning_rate": 4.311377245508982e-06, "loss": 0.9691, "step": 360 }, { "epoch": 0.06491054571608379, "grad_norm": 1.776748776435852, "learning_rate": 4.323353293413174e-06, "loss": 0.856, "step": 361 }, { "epoch": 0.06509035332194552, "grad_norm": 1.7252031564712524, "learning_rate": 4.335329341317366e-06, "loss": 1.0816, "step": 362 }, { "epoch": 0.06527016092780724, "grad_norm": 1.8210726976394653, "learning_rate": 4.347305389221557e-06, "loss": 0.9661, "step": 363 }, { "epoch": 0.06544996853366897, "grad_norm": 1.7038756608963013, "learning_rate": 4.359281437125748e-06, "loss": 0.9826, "step": 364 }, { "epoch": 0.06562977613953071, "grad_norm": 1.8570857048034668, "learning_rate": 4.371257485029941e-06, "loss": 1.0082, "step": 365 }, { "epoch": 0.06580958374539243, "grad_norm": 1.6614362001419067, "learning_rate": 4.3832335329341315e-06, "loss": 0.9407, "step": 366 }, { "epoch": 0.06598939135125416, "grad_norm": 1.7028322219848633, "learning_rate": 4.395209580838324e-06, "loss": 0.9558, "step": 367 }, { "epoch": 0.06616919895711589, "grad_norm": 1.6100674867630005, "learning_rate": 4.407185628742515e-06, "loss": 0.9643, "step": 368 }, { "epoch": 0.06634900656297761, "grad_norm": 1.7656373977661133, "learning_rate": 4.419161676646707e-06, "loss": 0.9003, "step": 369 }, { "epoch": 0.06652881416883934, "grad_norm": 1.7512638568878174, "learning_rate": 4.431137724550898e-06, "loss": 1.0196, "step": 370 }, { "epoch": 0.06670862177470108, "grad_norm": 1.7251259088516235, "learning_rate": 4.443113772455091e-06, "loss": 1.0449, "step": 371 }, { "epoch": 0.0668884293805628, "grad_norm": 1.8505330085754395, "learning_rate": 4.4550898203592816e-06, "loss": 0.9643, "step": 372 }, { "epoch": 0.06706823698642453, "grad_norm": 2.084233283996582, "learning_rate": 4.467065868263473e-06, "loss": 0.9276, "step": 373 }, { "epoch": 0.06724804459228625, "grad_norm": 2.026639699935913, "learning_rate": 4.479041916167665e-06, "loss": 0.9876, "step": 374 }, { "epoch": 0.06742785219814798, "grad_norm": 2.3127832412719727, "learning_rate": 4.4910179640718566e-06, "loss": 1.0239, "step": 375 }, { "epoch": 0.0676076598040097, "grad_norm": 1.8194067478179932, "learning_rate": 4.502994011976048e-06, "loss": 0.9656, "step": 376 }, { "epoch": 0.06778746740987143, "grad_norm": 1.7433730363845825, "learning_rate": 4.51497005988024e-06, "loss": 1.0065, "step": 377 }, { "epoch": 0.06796727501573317, "grad_norm": 1.8826082944869995, "learning_rate": 4.526946107784432e-06, "loss": 1.0111, "step": 378 }, { "epoch": 0.0681470826215949, "grad_norm": 1.7901593446731567, "learning_rate": 4.538922155688623e-06, "loss": 0.9594, "step": 379 }, { "epoch": 0.06832689022745662, "grad_norm": 1.7131712436676025, "learning_rate": 4.550898203592815e-06, "loss": 0.9548, "step": 380 }, { "epoch": 0.06850669783331835, "grad_norm": 1.928523063659668, "learning_rate": 4.562874251497007e-06, "loss": 0.9861, "step": 381 }, { "epoch": 0.06868650543918008, "grad_norm": 1.2345408201217651, "learning_rate": 4.574850299401198e-06, "loss": 1.2499, "step": 382 }, { "epoch": 0.0688663130450418, "grad_norm": 1.909363865852356, "learning_rate": 4.58682634730539e-06, "loss": 0.9654, "step": 383 }, { "epoch": 0.06904612065090353, "grad_norm": 1.9810256958007812, "learning_rate": 4.598802395209581e-06, "loss": 0.9701, "step": 384 }, { "epoch": 0.06922592825676527, "grad_norm": 1.7333548069000244, "learning_rate": 4.610778443113773e-06, "loss": 0.9772, "step": 385 }, { "epoch": 0.069405735862627, "grad_norm": 1.6930829286575317, "learning_rate": 4.622754491017964e-06, "loss": 0.9793, "step": 386 }, { "epoch": 0.06958554346848872, "grad_norm": 1.9414986371994019, "learning_rate": 4.634730538922156e-06, "loss": 0.9428, "step": 387 }, { "epoch": 0.06976535107435045, "grad_norm": 1.128265380859375, "learning_rate": 4.6467065868263474e-06, "loss": 1.197, "step": 388 }, { "epoch": 0.06994515868021217, "grad_norm": 1.809678316116333, "learning_rate": 4.658682634730539e-06, "loss": 0.9971, "step": 389 }, { "epoch": 0.0701249662860739, "grad_norm": 1.921159029006958, "learning_rate": 4.670658682634731e-06, "loss": 1.0006, "step": 390 }, { "epoch": 0.07030477389193562, "grad_norm": 1.7511223554611206, "learning_rate": 4.6826347305389224e-06, "loss": 1.0615, "step": 391 }, { "epoch": 0.07048458149779736, "grad_norm": 1.2056549787521362, "learning_rate": 4.694610778443114e-06, "loss": 1.2274, "step": 392 }, { "epoch": 0.07066438910365909, "grad_norm": 1.7070738077163696, "learning_rate": 4.706586826347306e-06, "loss": 0.9849, "step": 393 }, { "epoch": 0.07084419670952082, "grad_norm": 2.0201613903045654, "learning_rate": 4.7185628742514974e-06, "loss": 0.9357, "step": 394 }, { "epoch": 0.07102400431538254, "grad_norm": 1.2165462970733643, "learning_rate": 4.730538922155689e-06, "loss": 1.1895, "step": 395 }, { "epoch": 0.07120381192124427, "grad_norm": 1.7254154682159424, "learning_rate": 4.742514970059881e-06, "loss": 1.0097, "step": 396 }, { "epoch": 0.071383619527106, "grad_norm": 1.8182356357574463, "learning_rate": 4.7544910179640725e-06, "loss": 0.9667, "step": 397 }, { "epoch": 0.07156342713296772, "grad_norm": 1.9337152242660522, "learning_rate": 4.766467065868264e-06, "loss": 0.9085, "step": 398 }, { "epoch": 0.07174323473882946, "grad_norm": 1.8585025072097778, "learning_rate": 4.778443113772456e-06, "loss": 0.9578, "step": 399 }, { "epoch": 0.07192304234469118, "grad_norm": 1.907228708267212, "learning_rate": 4.7904191616766475e-06, "loss": 0.9736, "step": 400 }, { "epoch": 0.07210284995055291, "grad_norm": 1.775893211364746, "learning_rate": 4.802395209580838e-06, "loss": 0.972, "step": 401 }, { "epoch": 0.07228265755641464, "grad_norm": 1.173914909362793, "learning_rate": 4.814371257485031e-06, "loss": 1.2152, "step": 402 }, { "epoch": 0.07246246516227636, "grad_norm": 1.672524094581604, "learning_rate": 4.826347305389222e-06, "loss": 0.8918, "step": 403 }, { "epoch": 0.07264227276813809, "grad_norm": 1.7716736793518066, "learning_rate": 4.838323353293414e-06, "loss": 0.8947, "step": 404 }, { "epoch": 0.07282208037399981, "grad_norm": 1.7595797777175903, "learning_rate": 4.850299401197605e-06, "loss": 0.9059, "step": 405 }, { "epoch": 0.07300188797986155, "grad_norm": 1.1611583232879639, "learning_rate": 4.862275449101797e-06, "loss": 1.2094, "step": 406 }, { "epoch": 0.07318169558572328, "grad_norm": 1.7134979963302612, "learning_rate": 4.874251497005988e-06, "loss": 1.0136, "step": 407 }, { "epoch": 0.073361503191585, "grad_norm": 1.704213261604309, "learning_rate": 4.88622754491018e-06, "loss": 0.9499, "step": 408 }, { "epoch": 0.07354131079744673, "grad_norm": 1.8486324548721313, "learning_rate": 4.898203592814372e-06, "loss": 0.8816, "step": 409 }, { "epoch": 0.07372111840330846, "grad_norm": 1.6840581893920898, "learning_rate": 4.910179640718563e-06, "loss": 1.0306, "step": 410 }, { "epoch": 0.07390092600917018, "grad_norm": 1.8446835279464722, "learning_rate": 4.922155688622755e-06, "loss": 1.0036, "step": 411 }, { "epoch": 0.07408073361503191, "grad_norm": 1.6787819862365723, "learning_rate": 4.934131736526947e-06, "loss": 0.9929, "step": 412 }, { "epoch": 0.07426054122089365, "grad_norm": 1.6239440441131592, "learning_rate": 4.946107784431138e-06, "loss": 0.9668, "step": 413 }, { "epoch": 0.07444034882675538, "grad_norm": 2.063288450241089, "learning_rate": 4.95808383233533e-06, "loss": 0.8949, "step": 414 }, { "epoch": 0.0746201564326171, "grad_norm": 1.8766833543777466, "learning_rate": 4.970059880239521e-06, "loss": 0.967, "step": 415 }, { "epoch": 0.07479996403847883, "grad_norm": 1.8147145509719849, "learning_rate": 4.982035928143713e-06, "loss": 0.9777, "step": 416 }, { "epoch": 0.07497977164434055, "grad_norm": 1.6962347030639648, "learning_rate": 4.994011976047904e-06, "loss": 0.9182, "step": 417 }, { "epoch": 0.07515957925020228, "grad_norm": 1.820277214050293, "learning_rate": 5.005988023952097e-06, "loss": 0.9804, "step": 418 }, { "epoch": 0.075339386856064, "grad_norm": 1.726403832435608, "learning_rate": 5.017964071856288e-06, "loss": 0.8584, "step": 419 }, { "epoch": 0.07551919446192575, "grad_norm": 1.67923903465271, "learning_rate": 5.029940119760479e-06, "loss": 1.0354, "step": 420 }, { "epoch": 0.07569900206778747, "grad_norm": 1.5700708627700806, "learning_rate": 5.041916167664671e-06, "loss": 0.9786, "step": 421 }, { "epoch": 0.0758788096736492, "grad_norm": 1.157231330871582, "learning_rate": 5.053892215568863e-06, "loss": 1.2489, "step": 422 }, { "epoch": 0.07605861727951092, "grad_norm": 1.7661620378494263, "learning_rate": 5.065868263473054e-06, "loss": 0.9121, "step": 423 }, { "epoch": 0.07623842488537265, "grad_norm": 1.2326743602752686, "learning_rate": 5.077844311377246e-06, "loss": 1.1906, "step": 424 }, { "epoch": 0.07641823249123438, "grad_norm": 1.874860167503357, "learning_rate": 5.0898203592814375e-06, "loss": 1.0547, "step": 425 }, { "epoch": 0.0765980400970961, "grad_norm": 1.737432599067688, "learning_rate": 5.10179640718563e-06, "loss": 0.988, "step": 426 }, { "epoch": 0.07677784770295784, "grad_norm": 1.765594720840454, "learning_rate": 5.113772455089821e-06, "loss": 0.9782, "step": 427 }, { "epoch": 0.07695765530881957, "grad_norm": 1.7158070802688599, "learning_rate": 5.1257485029940125e-06, "loss": 0.914, "step": 428 }, { "epoch": 0.07713746291468129, "grad_norm": 1.9856371879577637, "learning_rate": 5.137724550898204e-06, "loss": 0.9008, "step": 429 }, { "epoch": 0.07731727052054302, "grad_norm": 1.826399564743042, "learning_rate": 5.149700598802395e-06, "loss": 1.0229, "step": 430 }, { "epoch": 0.07749707812640474, "grad_norm": 1.7735880613327026, "learning_rate": 5.161676646706587e-06, "loss": 0.9533, "step": 431 }, { "epoch": 0.07767688573226647, "grad_norm": 1.1743147373199463, "learning_rate": 5.173652694610779e-06, "loss": 1.1876, "step": 432 }, { "epoch": 0.0778566933381282, "grad_norm": 1.845294713973999, "learning_rate": 5.185628742514971e-06, "loss": 0.8802, "step": 433 }, { "epoch": 0.07803650094398994, "grad_norm": 1.6761425733566284, "learning_rate": 5.197604790419162e-06, "loss": 1.0351, "step": 434 }, { "epoch": 0.07821630854985166, "grad_norm": 1.7290602922439575, "learning_rate": 5.209580838323353e-06, "loss": 0.8978, "step": 435 }, { "epoch": 0.07839611615571339, "grad_norm": 1.1706349849700928, "learning_rate": 5.221556886227546e-06, "loss": 1.1997, "step": 436 }, { "epoch": 0.07857592376157511, "grad_norm": 1.8006888628005981, "learning_rate": 5.233532934131737e-06, "loss": 0.9721, "step": 437 }, { "epoch": 0.07875573136743684, "grad_norm": 1.732973575592041, "learning_rate": 5.245508982035928e-06, "loss": 0.9943, "step": 438 }, { "epoch": 0.07893553897329857, "grad_norm": 1.1544519662857056, "learning_rate": 5.25748502994012e-06, "loss": 1.1864, "step": 439 }, { "epoch": 0.07911534657916029, "grad_norm": 1.7812857627868652, "learning_rate": 5.2694610778443125e-06, "loss": 0.9464, "step": 440 }, { "epoch": 0.07929515418502203, "grad_norm": 1.7639440298080444, "learning_rate": 5.281437125748503e-06, "loss": 0.9765, "step": 441 }, { "epoch": 0.07947496179088376, "grad_norm": 1.82053804397583, "learning_rate": 5.293413173652695e-06, "loss": 0.9458, "step": 442 }, { "epoch": 0.07965476939674548, "grad_norm": 1.8917347192764282, "learning_rate": 5.305389221556887e-06, "loss": 0.9589, "step": 443 }, { "epoch": 0.07983457700260721, "grad_norm": 1.7603353261947632, "learning_rate": 5.3173652694610775e-06, "loss": 0.9858, "step": 444 }, { "epoch": 0.08001438460846894, "grad_norm": 1.7350261211395264, "learning_rate": 5.32934131736527e-06, "loss": 1.0071, "step": 445 }, { "epoch": 0.08019419221433066, "grad_norm": 1.776847004890442, "learning_rate": 5.341317365269462e-06, "loss": 0.9711, "step": 446 }, { "epoch": 0.08037399982019239, "grad_norm": 1.8468021154403687, "learning_rate": 5.353293413173653e-06, "loss": 0.9826, "step": 447 }, { "epoch": 0.08055380742605413, "grad_norm": 1.7733122110366821, "learning_rate": 5.365269461077844e-06, "loss": 0.9856, "step": 448 }, { "epoch": 0.08073361503191585, "grad_norm": 1.9740803241729736, "learning_rate": 5.377245508982037e-06, "loss": 0.9313, "step": 449 }, { "epoch": 0.08091342263777758, "grad_norm": 1.9943009614944458, "learning_rate": 5.389221556886228e-06, "loss": 0.9238, "step": 450 }, { "epoch": 0.0810932302436393, "grad_norm": 1.2766140699386597, "learning_rate": 5.401197604790419e-06, "loss": 1.186, "step": 451 }, { "epoch": 0.08127303784950103, "grad_norm": 1.7753628492355347, "learning_rate": 5.413173652694611e-06, "loss": 0.9579, "step": 452 }, { "epoch": 0.08145284545536276, "grad_norm": 1.8425410985946655, "learning_rate": 5.4251497005988026e-06, "loss": 1.0028, "step": 453 }, { "epoch": 0.08163265306122448, "grad_norm": 1.6580477952957153, "learning_rate": 5.437125748502995e-06, "loss": 0.9362, "step": 454 }, { "epoch": 0.08181246066708622, "grad_norm": 1.5811421871185303, "learning_rate": 5.449101796407186e-06, "loss": 0.7823, "step": 455 }, { "epoch": 0.08199226827294795, "grad_norm": 1.1689233779907227, "learning_rate": 5.4610778443113776e-06, "loss": 1.1821, "step": 456 }, { "epoch": 0.08217207587880968, "grad_norm": 1.8087161779403687, "learning_rate": 5.473053892215569e-06, "loss": 0.958, "step": 457 }, { "epoch": 0.0823518834846714, "grad_norm": 1.7053877115249634, "learning_rate": 5.48502994011976e-06, "loss": 0.8984, "step": 458 }, { "epoch": 0.08253169109053313, "grad_norm": 1.7924336194992065, "learning_rate": 5.4970059880239526e-06, "loss": 0.8955, "step": 459 }, { "epoch": 0.08271149869639485, "grad_norm": 1.637673258781433, "learning_rate": 5.508982035928144e-06, "loss": 0.9694, "step": 460 }, { "epoch": 0.08289130630225658, "grad_norm": 1.8700332641601562, "learning_rate": 5.520958083832336e-06, "loss": 0.998, "step": 461 }, { "epoch": 0.08307111390811832, "grad_norm": 1.7869279384613037, "learning_rate": 5.532934131736527e-06, "loss": 0.9597, "step": 462 }, { "epoch": 0.08325092151398004, "grad_norm": 1.729970097541809, "learning_rate": 5.544910179640719e-06, "loss": 0.9003, "step": 463 }, { "epoch": 0.08343072911984177, "grad_norm": 1.6275534629821777, "learning_rate": 5.556886227544911e-06, "loss": 0.8882, "step": 464 }, { "epoch": 0.0836105367257035, "grad_norm": 1.151465654373169, "learning_rate": 5.568862275449102e-06, "loss": 1.207, "step": 465 }, { "epoch": 0.08379034433156522, "grad_norm": 1.1187152862548828, "learning_rate": 5.580838323353293e-06, "loss": 1.2011, "step": 466 }, { "epoch": 0.08397015193742695, "grad_norm": 1.7261106967926025, "learning_rate": 5.592814371257486e-06, "loss": 0.9622, "step": 467 }, { "epoch": 0.08414995954328867, "grad_norm": 1.9694921970367432, "learning_rate": 5.604790419161678e-06, "loss": 0.9597, "step": 468 }, { "epoch": 0.08432976714915041, "grad_norm": 1.7559677362442017, "learning_rate": 5.616766467065868e-06, "loss": 0.925, "step": 469 }, { "epoch": 0.08450957475501214, "grad_norm": 1.8473690748214722, "learning_rate": 5.62874251497006e-06, "loss": 0.9827, "step": 470 }, { "epoch": 0.08468938236087387, "grad_norm": 1.817365288734436, "learning_rate": 5.640718562874253e-06, "loss": 0.9749, "step": 471 }, { "epoch": 0.08486918996673559, "grad_norm": 1.7400645017623901, "learning_rate": 5.6526946107784434e-06, "loss": 0.9274, "step": 472 }, { "epoch": 0.08504899757259732, "grad_norm": 1.7256238460540771, "learning_rate": 5.664670658682635e-06, "loss": 0.9827, "step": 473 }, { "epoch": 0.08522880517845904, "grad_norm": 1.741178035736084, "learning_rate": 5.676646706586827e-06, "loss": 0.9085, "step": 474 }, { "epoch": 0.08540861278432077, "grad_norm": 1.7428057193756104, "learning_rate": 5.6886227544910184e-06, "loss": 0.9165, "step": 475 }, { "epoch": 0.08558842039018251, "grad_norm": 1.7900421619415283, "learning_rate": 5.700598802395209e-06, "loss": 0.9521, "step": 476 }, { "epoch": 0.08576822799604424, "grad_norm": 1.84506356716156, "learning_rate": 5.712574850299402e-06, "loss": 0.909, "step": 477 }, { "epoch": 0.08594803560190596, "grad_norm": 1.809199333190918, "learning_rate": 5.7245508982035934e-06, "loss": 0.9999, "step": 478 }, { "epoch": 0.08612784320776769, "grad_norm": 1.8774253129959106, "learning_rate": 5.736526946107784e-06, "loss": 1.0266, "step": 479 }, { "epoch": 0.08630765081362941, "grad_norm": 1.8902053833007812, "learning_rate": 5.748502994011976e-06, "loss": 0.9501, "step": 480 }, { "epoch": 0.08648745841949114, "grad_norm": 1.7941503524780273, "learning_rate": 5.7604790419161685e-06, "loss": 0.9402, "step": 481 }, { "epoch": 0.08666726602535288, "grad_norm": 3.154217004776001, "learning_rate": 5.77245508982036e-06, "loss": 0.9632, "step": 482 }, { "epoch": 0.0868470736312146, "grad_norm": 1.734717845916748, "learning_rate": 5.784431137724551e-06, "loss": 0.9196, "step": 483 }, { "epoch": 0.08702688123707633, "grad_norm": 1.7539483308792114, "learning_rate": 5.796407185628743e-06, "loss": 0.9539, "step": 484 }, { "epoch": 0.08720668884293806, "grad_norm": 1.6769403219223022, "learning_rate": 5.808383233532935e-06, "loss": 0.9208, "step": 485 }, { "epoch": 0.08738649644879978, "grad_norm": 1.7643227577209473, "learning_rate": 5.820359281437126e-06, "loss": 0.9323, "step": 486 }, { "epoch": 0.08756630405466151, "grad_norm": 1.4807307720184326, "learning_rate": 5.832335329341318e-06, "loss": 1.1958, "step": 487 }, { "epoch": 0.08774611166052323, "grad_norm": 1.742467999458313, "learning_rate": 5.844311377245509e-06, "loss": 0.9414, "step": 488 }, { "epoch": 0.08792591926638497, "grad_norm": 2.0256545543670654, "learning_rate": 5.856287425149702e-06, "loss": 0.8647, "step": 489 }, { "epoch": 0.0881057268722467, "grad_norm": 1.9930633306503296, "learning_rate": 5.868263473053893e-06, "loss": 0.9543, "step": 490 }, { "epoch": 0.08828553447810843, "grad_norm": 1.9071711301803589, "learning_rate": 5.880239520958084e-06, "loss": 0.9898, "step": 491 }, { "epoch": 0.08846534208397015, "grad_norm": 1.793324589729309, "learning_rate": 5.892215568862276e-06, "loss": 0.9572, "step": 492 }, { "epoch": 0.08864514968983188, "grad_norm": 1.1457830667495728, "learning_rate": 5.904191616766467e-06, "loss": 1.2145, "step": 493 }, { "epoch": 0.0888249572956936, "grad_norm": 1.889729380607605, "learning_rate": 5.916167664670659e-06, "loss": 1.0656, "step": 494 }, { "epoch": 0.08900476490155533, "grad_norm": 1.8089038133621216, "learning_rate": 5.928143712574851e-06, "loss": 0.9851, "step": 495 }, { "epoch": 0.08918457250741707, "grad_norm": 1.6494346857070923, "learning_rate": 5.940119760479043e-06, "loss": 0.8725, "step": 496 }, { "epoch": 0.0893643801132788, "grad_norm": 1.7822129726409912, "learning_rate": 5.9520958083832335e-06, "loss": 0.9302, "step": 497 }, { "epoch": 0.08954418771914052, "grad_norm": 1.7356148958206177, "learning_rate": 5.964071856287426e-06, "loss": 0.9904, "step": 498 }, { "epoch": 0.08972399532500225, "grad_norm": 1.9197896718978882, "learning_rate": 5.976047904191618e-06, "loss": 0.9013, "step": 499 }, { "epoch": 0.08990380293086397, "grad_norm": 1.772370457649231, "learning_rate": 5.9880239520958085e-06, "loss": 0.9536, "step": 500 }, { "epoch": 0.08990380293086397, "eval_loss": 0.9652984738349915, "eval_runtime": 157.658, "eval_samples_per_second": 91.223, "eval_steps_per_second": 1.427, "step": 500 }, { "epoch": 0.0900836105367257, "grad_norm": 1.25033438205719, "learning_rate": 6e-06, "loss": 1.2378, "step": 501 }, { "epoch": 0.09026341814258743, "grad_norm": 1.8959858417510986, "learning_rate": 6.011976047904192e-06, "loss": 0.9434, "step": 502 }, { "epoch": 0.09044322574844917, "grad_norm": 1.7064205408096313, "learning_rate": 6.023952095808384e-06, "loss": 0.9109, "step": 503 }, { "epoch": 0.09062303335431089, "grad_norm": 1.797195315361023, "learning_rate": 6.035928143712575e-06, "loss": 0.8785, "step": 504 }, { "epoch": 0.09080284096017262, "grad_norm": 1.8160744905471802, "learning_rate": 6.047904191616767e-06, "loss": 0.9633, "step": 505 }, { "epoch": 0.09098264856603434, "grad_norm": 1.8054287433624268, "learning_rate": 6.0598802395209585e-06, "loss": 0.8637, "step": 506 }, { "epoch": 0.09116245617189607, "grad_norm": 1.6033968925476074, "learning_rate": 6.071856287425149e-06, "loss": 0.9063, "step": 507 }, { "epoch": 0.0913422637777578, "grad_norm": 1.9071788787841797, "learning_rate": 6.083832335329342e-06, "loss": 0.9903, "step": 508 }, { "epoch": 0.09152207138361952, "grad_norm": 1.8765738010406494, "learning_rate": 6.0958083832335335e-06, "loss": 0.9576, "step": 509 }, { "epoch": 0.09170187898948126, "grad_norm": 1.1142810583114624, "learning_rate": 6.107784431137725e-06, "loss": 1.1549, "step": 510 }, { "epoch": 0.09188168659534299, "grad_norm": 1.782086730003357, "learning_rate": 6.119760479041916e-06, "loss": 0.9529, "step": 511 }, { "epoch": 0.09206149420120471, "grad_norm": 1.7321724891662598, "learning_rate": 6.1317365269461085e-06, "loss": 0.9732, "step": 512 }, { "epoch": 0.09224130180706644, "grad_norm": 1.6088876724243164, "learning_rate": 6.1437125748503e-06, "loss": 0.8624, "step": 513 }, { "epoch": 0.09242110941292817, "grad_norm": 1.7550534009933472, "learning_rate": 6.155688622754491e-06, "loss": 0.8594, "step": 514 }, { "epoch": 0.09260091701878989, "grad_norm": 1.8055754899978638, "learning_rate": 6.167664670658683e-06, "loss": 0.919, "step": 515 }, { "epoch": 0.09278072462465162, "grad_norm": 1.2034647464752197, "learning_rate": 6.179640718562875e-06, "loss": 1.1998, "step": 516 }, { "epoch": 0.09296053223051336, "grad_norm": 1.7007194757461548, "learning_rate": 6.191616766467067e-06, "loss": 0.8855, "step": 517 }, { "epoch": 0.09314033983637508, "grad_norm": 1.1249313354492188, "learning_rate": 6.203592814371258e-06, "loss": 1.213, "step": 518 }, { "epoch": 0.09332014744223681, "grad_norm": 1.6970165967941284, "learning_rate": 6.215568862275449e-06, "loss": 0.8374, "step": 519 }, { "epoch": 0.09349995504809853, "grad_norm": 1.6151877641677856, "learning_rate": 6.227544910179642e-06, "loss": 0.9546, "step": 520 }, { "epoch": 0.09367976265396026, "grad_norm": 1.991220474243164, "learning_rate": 6.239520958083833e-06, "loss": 0.9468, "step": 521 }, { "epoch": 0.09385957025982199, "grad_norm": 1.6759806871414185, "learning_rate": 6.251497005988024e-06, "loss": 0.9874, "step": 522 }, { "epoch": 0.09403937786568371, "grad_norm": 1.7612195014953613, "learning_rate": 6.263473053892216e-06, "loss": 0.9513, "step": 523 }, { "epoch": 0.09421918547154545, "grad_norm": 1.7410379648208618, "learning_rate": 6.275449101796408e-06, "loss": 0.9067, "step": 524 }, { "epoch": 0.09439899307740718, "grad_norm": 1.708205223083496, "learning_rate": 6.2874251497005985e-06, "loss": 0.9076, "step": 525 }, { "epoch": 0.0945788006832689, "grad_norm": 1.6215263605117798, "learning_rate": 6.299401197604791e-06, "loss": 0.9459, "step": 526 }, { "epoch": 0.09475860828913063, "grad_norm": 1.671597957611084, "learning_rate": 6.311377245508983e-06, "loss": 0.9693, "step": 527 }, { "epoch": 0.09493841589499236, "grad_norm": 1.643906831741333, "learning_rate": 6.323353293413174e-06, "loss": 0.9527, "step": 528 }, { "epoch": 0.09511822350085408, "grad_norm": 1.8455469608306885, "learning_rate": 6.335329341317365e-06, "loss": 0.9352, "step": 529 }, { "epoch": 0.09529803110671581, "grad_norm": 1.6933040618896484, "learning_rate": 6.347305389221558e-06, "loss": 0.9139, "step": 530 }, { "epoch": 0.09547783871257755, "grad_norm": 1.619672179222107, "learning_rate": 6.359281437125749e-06, "loss": 0.9752, "step": 531 }, { "epoch": 0.09565764631843927, "grad_norm": 1.6876964569091797, "learning_rate": 6.37125748502994e-06, "loss": 0.8992, "step": 532 }, { "epoch": 0.095837453924301, "grad_norm": 1.6739263534545898, "learning_rate": 6.383233532934132e-06, "loss": 0.9758, "step": 533 }, { "epoch": 0.09601726153016273, "grad_norm": 1.607524037361145, "learning_rate": 6.395209580838324e-06, "loss": 0.9208, "step": 534 }, { "epoch": 0.09619706913602445, "grad_norm": 1.7267475128173828, "learning_rate": 6.407185628742516e-06, "loss": 0.9361, "step": 535 }, { "epoch": 0.09637687674188618, "grad_norm": 1.8006112575531006, "learning_rate": 6.419161676646707e-06, "loss": 0.9391, "step": 536 }, { "epoch": 0.0965566843477479, "grad_norm": 1.1835089921951294, "learning_rate": 6.4311377245508986e-06, "loss": 1.1797, "step": 537 }, { "epoch": 0.09673649195360964, "grad_norm": 1.734359622001648, "learning_rate": 6.443113772455091e-06, "loss": 0.8727, "step": 538 }, { "epoch": 0.09691629955947137, "grad_norm": 1.0672255754470825, "learning_rate": 6.455089820359282e-06, "loss": 1.2344, "step": 539 }, { "epoch": 0.0970961071653331, "grad_norm": 1.960439682006836, "learning_rate": 6.4670658682634736e-06, "loss": 0.9983, "step": 540 }, { "epoch": 0.09727591477119482, "grad_norm": 1.6380573511123657, "learning_rate": 6.479041916167665e-06, "loss": 1.0443, "step": 541 }, { "epoch": 0.09745572237705655, "grad_norm": 1.8760093450546265, "learning_rate": 6.491017964071858e-06, "loss": 0.9085, "step": 542 }, { "epoch": 0.09763552998291827, "grad_norm": 1.8025975227355957, "learning_rate": 6.5029940119760486e-06, "loss": 0.9263, "step": 543 }, { "epoch": 0.09781533758878, "grad_norm": 1.731257438659668, "learning_rate": 6.51497005988024e-06, "loss": 0.9095, "step": 544 }, { "epoch": 0.09799514519464174, "grad_norm": 1.742108941078186, "learning_rate": 6.526946107784432e-06, "loss": 0.8706, "step": 545 }, { "epoch": 0.09817495280050346, "grad_norm": 1.7484378814697266, "learning_rate": 6.538922155688623e-06, "loss": 0.9432, "step": 546 }, { "epoch": 0.09835476040636519, "grad_norm": 1.7532520294189453, "learning_rate": 6.550898203592814e-06, "loss": 0.9645, "step": 547 }, { "epoch": 0.09853456801222692, "grad_norm": 1.6894311904907227, "learning_rate": 6.562874251497007e-06, "loss": 0.9599, "step": 548 }, { "epoch": 0.09871437561808864, "grad_norm": 1.602371335029602, "learning_rate": 6.574850299401199e-06, "loss": 0.9431, "step": 549 }, { "epoch": 0.09889418322395037, "grad_norm": 1.6339343786239624, "learning_rate": 6.586826347305389e-06, "loss": 0.8837, "step": 550 }, { "epoch": 0.0990739908298121, "grad_norm": 1.8822232484817505, "learning_rate": 6.598802395209581e-06, "loss": 0.9601, "step": 551 }, { "epoch": 0.09925379843567383, "grad_norm": 1.2791152000427246, "learning_rate": 6.610778443113774e-06, "loss": 1.1814, "step": 552 }, { "epoch": 0.09943360604153556, "grad_norm": 1.5952163934707642, "learning_rate": 6.6227544910179644e-06, "loss": 0.9145, "step": 553 }, { "epoch": 0.09961341364739729, "grad_norm": 1.8071157932281494, "learning_rate": 6.634730538922156e-06, "loss": 0.9101, "step": 554 }, { "epoch": 0.09979322125325901, "grad_norm": 1.624515414237976, "learning_rate": 6.646706586826348e-06, "loss": 0.8201, "step": 555 }, { "epoch": 0.09997302885912074, "grad_norm": 1.7250467538833618, "learning_rate": 6.65868263473054e-06, "loss": 0.9655, "step": 556 }, { "epoch": 0.10015283646498246, "grad_norm": 1.6801848411560059, "learning_rate": 6.670658682634731e-06, "loss": 0.936, "step": 557 }, { "epoch": 0.10033264407084419, "grad_norm": 1.7285845279693604, "learning_rate": 6.682634730538923e-06, "loss": 0.8923, "step": 558 }, { "epoch": 0.10051245167670593, "grad_norm": 1.668720006942749, "learning_rate": 6.6946107784431144e-06, "loss": 0.9211, "step": 559 }, { "epoch": 0.10069225928256766, "grad_norm": 1.7113367319107056, "learning_rate": 6.706586826347305e-06, "loss": 0.932, "step": 560 }, { "epoch": 0.10087206688842938, "grad_norm": 1.1733331680297852, "learning_rate": 6.718562874251498e-06, "loss": 1.1756, "step": 561 }, { "epoch": 0.10105187449429111, "grad_norm": 1.6508244276046753, "learning_rate": 6.7305389221556894e-06, "loss": 0.9011, "step": 562 }, { "epoch": 0.10123168210015283, "grad_norm": 1.7112051248550415, "learning_rate": 6.742514970059881e-06, "loss": 0.9045, "step": 563 }, { "epoch": 0.10141148970601456, "grad_norm": 1.649211049079895, "learning_rate": 6.754491017964072e-06, "loss": 0.9406, "step": 564 }, { "epoch": 0.10159129731187629, "grad_norm": 1.714600682258606, "learning_rate": 6.7664670658682645e-06, "loss": 0.9422, "step": 565 }, { "epoch": 0.10177110491773803, "grad_norm": 1.681504726409912, "learning_rate": 6.778443113772456e-06, "loss": 0.9129, "step": 566 }, { "epoch": 0.10195091252359975, "grad_norm": 2.029299020767212, "learning_rate": 6.790419161676647e-06, "loss": 0.9583, "step": 567 }, { "epoch": 0.10213072012946148, "grad_norm": 1.775206208229065, "learning_rate": 6.802395209580839e-06, "loss": 0.9289, "step": 568 }, { "epoch": 0.1023105277353232, "grad_norm": 1.6537646055221558, "learning_rate": 6.81437125748503e-06, "loss": 0.9113, "step": 569 }, { "epoch": 0.10249033534118493, "grad_norm": 1.7979551553726196, "learning_rate": 6.826347305389223e-06, "loss": 0.8819, "step": 570 }, { "epoch": 0.10267014294704666, "grad_norm": 1.595937728881836, "learning_rate": 6.838323353293414e-06, "loss": 0.9048, "step": 571 }, { "epoch": 0.10284995055290838, "grad_norm": 1.6814571619033813, "learning_rate": 6.850299401197605e-06, "loss": 0.999, "step": 572 }, { "epoch": 0.10302975815877012, "grad_norm": 1.6198382377624512, "learning_rate": 6.862275449101797e-06, "loss": 0.9272, "step": 573 }, { "epoch": 0.10320956576463185, "grad_norm": 1.6672788858413696, "learning_rate": 6.874251497005988e-06, "loss": 0.8991, "step": 574 }, { "epoch": 0.10338937337049357, "grad_norm": 1.8261932134628296, "learning_rate": 6.88622754491018e-06, "loss": 0.9227, "step": 575 }, { "epoch": 0.1035691809763553, "grad_norm": 1.8468694686889648, "learning_rate": 6.898203592814372e-06, "loss": 1.0039, "step": 576 }, { "epoch": 0.10374898858221702, "grad_norm": 1.705546259880066, "learning_rate": 6.910179640718564e-06, "loss": 0.9177, "step": 577 }, { "epoch": 0.10392879618807875, "grad_norm": 1.7098388671875, "learning_rate": 6.9221556886227545e-06, "loss": 0.9037, "step": 578 }, { "epoch": 0.10410860379394048, "grad_norm": 1.553935170173645, "learning_rate": 6.934131736526947e-06, "loss": 0.9945, "step": 579 }, { "epoch": 0.10428841139980222, "grad_norm": 1.623223900794983, "learning_rate": 6.946107784431139e-06, "loss": 0.9174, "step": 580 }, { "epoch": 0.10446821900566394, "grad_norm": 1.8053520917892456, "learning_rate": 6.9580838323353295e-06, "loss": 0.8904, "step": 581 }, { "epoch": 0.10464802661152567, "grad_norm": 1.7996903657913208, "learning_rate": 6.970059880239521e-06, "loss": 0.9782, "step": 582 }, { "epoch": 0.1048278342173874, "grad_norm": 1.309459924697876, "learning_rate": 6.982035928143714e-06, "loss": 1.1603, "step": 583 }, { "epoch": 0.10500764182324912, "grad_norm": 1.843898892402649, "learning_rate": 6.994011976047905e-06, "loss": 0.9366, "step": 584 }, { "epoch": 0.10518744942911085, "grad_norm": 1.8080729246139526, "learning_rate": 7.005988023952096e-06, "loss": 0.9601, "step": 585 }, { "epoch": 0.10536725703497259, "grad_norm": 1.5783809423446655, "learning_rate": 7.017964071856288e-06, "loss": 1.0563, "step": 586 }, { "epoch": 0.10554706464083431, "grad_norm": 1.792447805404663, "learning_rate": 7.02994011976048e-06, "loss": 0.9083, "step": 587 }, { "epoch": 0.10572687224669604, "grad_norm": 1.6422568559646606, "learning_rate": 7.041916167664671e-06, "loss": 0.8406, "step": 588 }, { "epoch": 0.10590667985255776, "grad_norm": 1.667769432067871, "learning_rate": 7.053892215568863e-06, "loss": 0.9206, "step": 589 }, { "epoch": 0.10608648745841949, "grad_norm": 1.2106565237045288, "learning_rate": 7.0658682634730545e-06, "loss": 1.1865, "step": 590 }, { "epoch": 0.10626629506428122, "grad_norm": 1.1058661937713623, "learning_rate": 7.077844311377246e-06, "loss": 1.2193, "step": 591 }, { "epoch": 0.10644610267014294, "grad_norm": 1.7327882051467896, "learning_rate": 7.089820359281437e-06, "loss": 0.8768, "step": 592 }, { "epoch": 0.10662591027600468, "grad_norm": 1.8195631504058838, "learning_rate": 7.1017964071856295e-06, "loss": 0.953, "step": 593 }, { "epoch": 0.10680571788186641, "grad_norm": 1.7245984077453613, "learning_rate": 7.113772455089821e-06, "loss": 0.8526, "step": 594 }, { "epoch": 0.10698552548772813, "grad_norm": 1.5383367538452148, "learning_rate": 7.125748502994012e-06, "loss": 0.8913, "step": 595 }, { "epoch": 0.10716533309358986, "grad_norm": 1.9088881015777588, "learning_rate": 7.137724550898204e-06, "loss": 1.0186, "step": 596 }, { "epoch": 0.10734514069945159, "grad_norm": 1.712821364402771, "learning_rate": 7.149700598802396e-06, "loss": 0.8599, "step": 597 }, { "epoch": 0.10752494830531331, "grad_norm": 1.7849771976470947, "learning_rate": 7.161676646706588e-06, "loss": 0.9229, "step": 598 }, { "epoch": 0.10770475591117504, "grad_norm": 1.742863655090332, "learning_rate": 7.173652694610779e-06, "loss": 0.9041, "step": 599 }, { "epoch": 0.10788456351703678, "grad_norm": 1.7075930833816528, "learning_rate": 7.18562874251497e-06, "loss": 0.9891, "step": 600 }, { "epoch": 0.1080643711228985, "grad_norm": 2.0451431274414062, "learning_rate": 7.197604790419163e-06, "loss": 0.9462, "step": 601 }, { "epoch": 0.10824417872876023, "grad_norm": 1.64388108253479, "learning_rate": 7.209580838323354e-06, "loss": 0.9201, "step": 602 }, { "epoch": 0.10842398633462196, "grad_norm": 1.5722357034683228, "learning_rate": 7.221556886227545e-06, "loss": 0.9479, "step": 603 }, { "epoch": 0.10860379394048368, "grad_norm": 1.6903825998306274, "learning_rate": 7.233532934131737e-06, "loss": 0.934, "step": 604 }, { "epoch": 0.10878360154634541, "grad_norm": 1.775304913520813, "learning_rate": 7.2455089820359295e-06, "loss": 0.8648, "step": 605 }, { "epoch": 0.10896340915220713, "grad_norm": 1.9869139194488525, "learning_rate": 7.25748502994012e-06, "loss": 0.9034, "step": 606 }, { "epoch": 0.10914321675806887, "grad_norm": 1.3685516119003296, "learning_rate": 7.269461077844312e-06, "loss": 1.1702, "step": 607 }, { "epoch": 0.1093230243639306, "grad_norm": 1.8733711242675781, "learning_rate": 7.281437125748504e-06, "loss": 0.8779, "step": 608 }, { "epoch": 0.10950283196979232, "grad_norm": 1.7073224782943726, "learning_rate": 7.2934131736526945e-06, "loss": 0.9362, "step": 609 }, { "epoch": 0.10968263957565405, "grad_norm": 1.7093414068222046, "learning_rate": 7.305389221556887e-06, "loss": 0.9317, "step": 610 }, { "epoch": 0.10986244718151578, "grad_norm": 1.6927595138549805, "learning_rate": 7.317365269461079e-06, "loss": 0.9941, "step": 611 }, { "epoch": 0.1100422547873775, "grad_norm": 1.6943131685256958, "learning_rate": 7.32934131736527e-06, "loss": 0.916, "step": 612 }, { "epoch": 0.11022206239323923, "grad_norm": 1.878015398979187, "learning_rate": 7.341317365269461e-06, "loss": 0.9167, "step": 613 }, { "epoch": 0.11040186999910097, "grad_norm": 1.7987638711929321, "learning_rate": 7.353293413173654e-06, "loss": 0.8948, "step": 614 }, { "epoch": 0.1105816776049627, "grad_norm": 1.7079964876174927, "learning_rate": 7.365269461077845e-06, "loss": 0.9028, "step": 615 }, { "epoch": 0.11076148521082442, "grad_norm": 1.2145227193832397, "learning_rate": 7.377245508982036e-06, "loss": 1.1981, "step": 616 }, { "epoch": 0.11094129281668615, "grad_norm": 1.657059907913208, "learning_rate": 7.389221556886228e-06, "loss": 0.7899, "step": 617 }, { "epoch": 0.11112110042254787, "grad_norm": 1.7246140241622925, "learning_rate": 7.4011976047904196e-06, "loss": 0.8936, "step": 618 }, { "epoch": 0.1113009080284096, "grad_norm": 1.252634882926941, "learning_rate": 7.413173652694612e-06, "loss": 1.1866, "step": 619 }, { "epoch": 0.11148071563427132, "grad_norm": 1.7987686395645142, "learning_rate": 7.425149700598803e-06, "loss": 0.9441, "step": 620 }, { "epoch": 0.11166052324013306, "grad_norm": 1.9371306896209717, "learning_rate": 7.4371257485029946e-06, "loss": 0.8554, "step": 621 }, { "epoch": 0.11184033084599479, "grad_norm": 1.7206170558929443, "learning_rate": 7.449101796407186e-06, "loss": 0.9046, "step": 622 }, { "epoch": 0.11202013845185652, "grad_norm": 1.6890044212341309, "learning_rate": 7.461077844311377e-06, "loss": 0.9545, "step": 623 }, { "epoch": 0.11219994605771824, "grad_norm": 1.656599998474121, "learning_rate": 7.4730538922155696e-06, "loss": 0.9051, "step": 624 }, { "epoch": 0.11237975366357997, "grad_norm": 1.7126538753509521, "learning_rate": 7.485029940119761e-06, "loss": 0.9196, "step": 625 }, { "epoch": 0.1125595612694417, "grad_norm": 1.6825708150863647, "learning_rate": 7.497005988023953e-06, "loss": 0.9698, "step": 626 }, { "epoch": 0.11273936887530342, "grad_norm": 1.8400715589523315, "learning_rate": 7.508982035928144e-06, "loss": 0.9151, "step": 627 }, { "epoch": 0.11291917648116516, "grad_norm": 1.8362531661987305, "learning_rate": 7.520958083832336e-06, "loss": 0.9051, "step": 628 }, { "epoch": 0.11309898408702689, "grad_norm": 1.6815141439437866, "learning_rate": 7.532934131736528e-06, "loss": 0.8758, "step": 629 }, { "epoch": 0.11327879169288861, "grad_norm": 1.560853123664856, "learning_rate": 7.544910179640719e-06, "loss": 0.8963, "step": 630 }, { "epoch": 0.11345859929875034, "grad_norm": 1.858787178993225, "learning_rate": 7.55688622754491e-06, "loss": 0.8827, "step": 631 }, { "epoch": 0.11363840690461206, "grad_norm": 1.8936269283294678, "learning_rate": 7.568862275449103e-06, "loss": 0.9255, "step": 632 }, { "epoch": 0.11381821451047379, "grad_norm": 1.6040605306625366, "learning_rate": 7.580838323353295e-06, "loss": 1.0273, "step": 633 }, { "epoch": 0.11399802211633552, "grad_norm": 1.8140875101089478, "learning_rate": 7.592814371257485e-06, "loss": 0.9389, "step": 634 }, { "epoch": 0.11417782972219725, "grad_norm": 1.7815375328063965, "learning_rate": 7.604790419161677e-06, "loss": 0.9038, "step": 635 }, { "epoch": 0.11435763732805898, "grad_norm": 1.7183239459991455, "learning_rate": 7.61676646706587e-06, "loss": 0.8942, "step": 636 }, { "epoch": 0.1145374449339207, "grad_norm": 1.7068403959274292, "learning_rate": 7.6287425149700604e-06, "loss": 0.8586, "step": 637 }, { "epoch": 0.11471725253978243, "grad_norm": 1.7292920351028442, "learning_rate": 7.640718562874251e-06, "loss": 0.8779, "step": 638 }, { "epoch": 0.11489706014564416, "grad_norm": 1.3148462772369385, "learning_rate": 7.652694610778444e-06, "loss": 1.1493, "step": 639 }, { "epoch": 0.11507686775150588, "grad_norm": 1.6647124290466309, "learning_rate": 7.664670658682636e-06, "loss": 0.9266, "step": 640 }, { "epoch": 0.11525667535736761, "grad_norm": 1.8224247694015503, "learning_rate": 7.676646706586827e-06, "loss": 0.8507, "step": 641 }, { "epoch": 0.11543648296322935, "grad_norm": 1.6930984258651733, "learning_rate": 7.688622754491018e-06, "loss": 0.9004, "step": 642 }, { "epoch": 0.11561629056909108, "grad_norm": 1.2172038555145264, "learning_rate": 7.70059880239521e-06, "loss": 1.2172, "step": 643 }, { "epoch": 0.1157960981749528, "grad_norm": 1.6325420141220093, "learning_rate": 7.712574850299401e-06, "loss": 0.8864, "step": 644 }, { "epoch": 0.11597590578081453, "grad_norm": 1.5989042520523071, "learning_rate": 7.724550898203594e-06, "loss": 0.964, "step": 645 }, { "epoch": 0.11615571338667625, "grad_norm": 1.7646042108535767, "learning_rate": 7.736526946107785e-06, "loss": 0.9448, "step": 646 }, { "epoch": 0.11633552099253798, "grad_norm": 1.5868765115737915, "learning_rate": 7.748502994011977e-06, "loss": 0.9036, "step": 647 }, { "epoch": 0.1165153285983997, "grad_norm": 1.8089622259140015, "learning_rate": 7.760479041916168e-06, "loss": 0.9329, "step": 648 }, { "epoch": 0.11669513620426145, "grad_norm": 1.2025281190872192, "learning_rate": 7.77245508982036e-06, "loss": 1.1728, "step": 649 }, { "epoch": 0.11687494381012317, "grad_norm": 1.663069725036621, "learning_rate": 7.784431137724551e-06, "loss": 0.9152, "step": 650 }, { "epoch": 0.1170547514159849, "grad_norm": 1.6660956144332886, "learning_rate": 7.796407185628742e-06, "loss": 0.8835, "step": 651 }, { "epoch": 0.11723455902184662, "grad_norm": 1.7383280992507935, "learning_rate": 7.808383233532935e-06, "loss": 0.888, "step": 652 }, { "epoch": 0.11741436662770835, "grad_norm": 1.7670828104019165, "learning_rate": 7.820359281437127e-06, "loss": 0.9376, "step": 653 }, { "epoch": 0.11759417423357008, "grad_norm": 1.6167714595794678, "learning_rate": 7.832335329341318e-06, "loss": 0.9053, "step": 654 }, { "epoch": 0.1177739818394318, "grad_norm": 1.0857747793197632, "learning_rate": 7.844311377245509e-06, "loss": 1.226, "step": 655 }, { "epoch": 0.11795378944529354, "grad_norm": 1.5674372911453247, "learning_rate": 7.856287425149701e-06, "loss": 0.8602, "step": 656 }, { "epoch": 0.11813359705115527, "grad_norm": 1.8643226623535156, "learning_rate": 7.868263473053894e-06, "loss": 0.8547, "step": 657 }, { "epoch": 0.118313404657017, "grad_norm": 2.0989503860473633, "learning_rate": 7.880239520958085e-06, "loss": 0.9605, "step": 658 }, { "epoch": 0.11849321226287872, "grad_norm": 1.6440629959106445, "learning_rate": 7.892215568862275e-06, "loss": 0.8852, "step": 659 }, { "epoch": 0.11867301986874045, "grad_norm": 1.6983898878097534, "learning_rate": 7.904191616766468e-06, "loss": 0.9011, "step": 660 }, { "epoch": 0.11885282747460217, "grad_norm": 1.7091680765151978, "learning_rate": 7.91616766467066e-06, "loss": 0.8303, "step": 661 }, { "epoch": 0.1190326350804639, "grad_norm": 1.8236558437347412, "learning_rate": 7.928143712574851e-06, "loss": 0.9599, "step": 662 }, { "epoch": 0.11921244268632564, "grad_norm": 1.7206841707229614, "learning_rate": 7.940119760479042e-06, "loss": 0.8984, "step": 663 }, { "epoch": 0.11939225029218736, "grad_norm": 1.791481852531433, "learning_rate": 7.952095808383235e-06, "loss": 0.9087, "step": 664 }, { "epoch": 0.11957205789804909, "grad_norm": 1.7144094705581665, "learning_rate": 7.964071856287425e-06, "loss": 0.9845, "step": 665 }, { "epoch": 0.11975186550391081, "grad_norm": 1.7448219060897827, "learning_rate": 7.976047904191618e-06, "loss": 0.9445, "step": 666 }, { "epoch": 0.11993167310977254, "grad_norm": 1.5518908500671387, "learning_rate": 7.988023952095809e-06, "loss": 0.9103, "step": 667 }, { "epoch": 0.12011148071563427, "grad_norm": 1.677140712738037, "learning_rate": 8.000000000000001e-06, "loss": 0.8925, "step": 668 }, { "epoch": 0.12029128832149599, "grad_norm": 1.7833728790283203, "learning_rate": 8.011976047904192e-06, "loss": 0.8581, "step": 669 }, { "epoch": 0.12047109592735773, "grad_norm": 1.7539681196212769, "learning_rate": 8.023952095808385e-06, "loss": 0.8694, "step": 670 }, { "epoch": 0.12065090353321946, "grad_norm": 1.7064322233200073, "learning_rate": 8.035928143712575e-06, "loss": 0.9114, "step": 671 }, { "epoch": 0.12083071113908118, "grad_norm": 1.0922974348068237, "learning_rate": 8.047904191616766e-06, "loss": 1.1648, "step": 672 }, { "epoch": 0.12101051874494291, "grad_norm": 1.7483936548233032, "learning_rate": 8.059880239520959e-06, "loss": 0.8682, "step": 673 }, { "epoch": 0.12119032635080464, "grad_norm": 1.6814199686050415, "learning_rate": 8.07185628742515e-06, "loss": 0.9387, "step": 674 }, { "epoch": 0.12137013395666636, "grad_norm": 1.701964020729065, "learning_rate": 8.083832335329342e-06, "loss": 0.8503, "step": 675 }, { "epoch": 0.12154994156252809, "grad_norm": 1.8799073696136475, "learning_rate": 8.095808383233533e-06, "loss": 0.9102, "step": 676 }, { "epoch": 0.12172974916838983, "grad_norm": 1.833511471748352, "learning_rate": 8.107784431137726e-06, "loss": 0.9227, "step": 677 }, { "epoch": 0.12190955677425155, "grad_norm": 1.2730658054351807, "learning_rate": 8.119760479041916e-06, "loss": 1.1696, "step": 678 }, { "epoch": 0.12208936438011328, "grad_norm": 1.7450326681137085, "learning_rate": 8.131736526946107e-06, "loss": 0.9385, "step": 679 }, { "epoch": 0.122269171985975, "grad_norm": 1.8322423696517944, "learning_rate": 8.1437125748503e-06, "loss": 0.8916, "step": 680 }, { "epoch": 0.12244897959183673, "grad_norm": 1.782846450805664, "learning_rate": 8.155688622754492e-06, "loss": 0.955, "step": 681 }, { "epoch": 0.12262878719769846, "grad_norm": 1.593672752380371, "learning_rate": 8.167664670658683e-06, "loss": 0.9029, "step": 682 }, { "epoch": 0.12280859480356018, "grad_norm": 1.8674789667129517, "learning_rate": 8.179640718562874e-06, "loss": 0.9503, "step": 683 }, { "epoch": 0.12298840240942192, "grad_norm": 1.595565915107727, "learning_rate": 8.191616766467066e-06, "loss": 0.8918, "step": 684 }, { "epoch": 0.12316821001528365, "grad_norm": 1.5958307981491089, "learning_rate": 8.203592814371259e-06, "loss": 0.931, "step": 685 }, { "epoch": 0.12334801762114538, "grad_norm": 1.0696120262145996, "learning_rate": 8.21556886227545e-06, "loss": 1.2092, "step": 686 }, { "epoch": 0.1235278252270071, "grad_norm": 1.716304898262024, "learning_rate": 8.22754491017964e-06, "loss": 1.0175, "step": 687 }, { "epoch": 0.12370763283286883, "grad_norm": 1.6433049440383911, "learning_rate": 8.239520958083833e-06, "loss": 0.9533, "step": 688 }, { "epoch": 0.12388744043873055, "grad_norm": 1.1821225881576538, "learning_rate": 8.251497005988026e-06, "loss": 1.2129, "step": 689 }, { "epoch": 0.1240672480445923, "grad_norm": 1.9456857442855835, "learning_rate": 8.263473053892216e-06, "loss": 0.903, "step": 690 }, { "epoch": 0.12424705565045402, "grad_norm": 1.7631253004074097, "learning_rate": 8.275449101796407e-06, "loss": 0.8715, "step": 691 }, { "epoch": 0.12442686325631575, "grad_norm": 1.944530963897705, "learning_rate": 8.2874251497006e-06, "loss": 0.9664, "step": 692 }, { "epoch": 0.12460667086217747, "grad_norm": 1.1291948556900024, "learning_rate": 8.29940119760479e-06, "loss": 1.1779, "step": 693 }, { "epoch": 0.1247864784680392, "grad_norm": 1.1590688228607178, "learning_rate": 8.311377245508983e-06, "loss": 1.1347, "step": 694 }, { "epoch": 0.12496628607390092, "grad_norm": 1.7269508838653564, "learning_rate": 8.323353293413174e-06, "loss": 0.9856, "step": 695 }, { "epoch": 0.12514609367976265, "grad_norm": 1.9557743072509766, "learning_rate": 8.335329341317366e-06, "loss": 0.8773, "step": 696 }, { "epoch": 0.12532590128562437, "grad_norm": 2.11094331741333, "learning_rate": 8.347305389221557e-06, "loss": 0.9114, "step": 697 }, { "epoch": 0.1255057088914861, "grad_norm": 1.8058596849441528, "learning_rate": 8.35928143712575e-06, "loss": 0.8985, "step": 698 }, { "epoch": 0.12568551649734783, "grad_norm": 1.6604065895080566, "learning_rate": 8.37125748502994e-06, "loss": 0.913, "step": 699 }, { "epoch": 0.12586532410320955, "grad_norm": 1.6354767084121704, "learning_rate": 8.383233532934131e-06, "loss": 0.8933, "step": 700 }, { "epoch": 0.1260451317090713, "grad_norm": 1.6978248357772827, "learning_rate": 8.395209580838324e-06, "loss": 0.9488, "step": 701 }, { "epoch": 0.12622493931493303, "grad_norm": 1.6754833459854126, "learning_rate": 8.407185628742516e-06, "loss": 0.8168, "step": 702 }, { "epoch": 0.12640474692079476, "grad_norm": 1.2115848064422607, "learning_rate": 8.419161676646707e-06, "loss": 1.1915, "step": 703 }, { "epoch": 0.12658455452665648, "grad_norm": 1.803572654724121, "learning_rate": 8.431137724550898e-06, "loss": 0.9299, "step": 704 }, { "epoch": 0.1267643621325182, "grad_norm": 2.1086959838867188, "learning_rate": 8.44311377245509e-06, "loss": 0.9254, "step": 705 }, { "epoch": 0.12694416973837994, "grad_norm": 1.5685288906097412, "learning_rate": 8.455089820359283e-06, "loss": 0.9357, "step": 706 }, { "epoch": 0.12712397734424166, "grad_norm": 1.589245080947876, "learning_rate": 8.467065868263474e-06, "loss": 0.8997, "step": 707 }, { "epoch": 0.1273037849501034, "grad_norm": 1.9821219444274902, "learning_rate": 8.479041916167665e-06, "loss": 0.9197, "step": 708 }, { "epoch": 0.12748359255596511, "grad_norm": 1.8668076992034912, "learning_rate": 8.491017964071857e-06, "loss": 0.8795, "step": 709 }, { "epoch": 0.12766340016182684, "grad_norm": 1.7147759199142456, "learning_rate": 8.50299401197605e-06, "loss": 0.9823, "step": 710 }, { "epoch": 0.12784320776768857, "grad_norm": 1.7136192321777344, "learning_rate": 8.51497005988024e-06, "loss": 0.9612, "step": 711 }, { "epoch": 0.1280230153735503, "grad_norm": 1.137777328491211, "learning_rate": 8.526946107784431e-06, "loss": 1.1749, "step": 712 }, { "epoch": 0.12820282297941202, "grad_norm": 1.6053001880645752, "learning_rate": 8.538922155688624e-06, "loss": 0.8028, "step": 713 }, { "epoch": 0.12838263058527374, "grad_norm": 1.5663429498672485, "learning_rate": 8.550898203592815e-06, "loss": 0.8606, "step": 714 }, { "epoch": 0.1285624381911355, "grad_norm": 1.6331963539123535, "learning_rate": 8.562874251497007e-06, "loss": 0.9291, "step": 715 }, { "epoch": 0.12874224579699722, "grad_norm": 1.7670644521713257, "learning_rate": 8.574850299401198e-06, "loss": 0.8292, "step": 716 }, { "epoch": 0.12892205340285895, "grad_norm": 1.311509609222412, "learning_rate": 8.58682634730539e-06, "loss": 1.1626, "step": 717 }, { "epoch": 0.12910186100872068, "grad_norm": 1.5790375471115112, "learning_rate": 8.598802395209581e-06, "loss": 0.9195, "step": 718 }, { "epoch": 0.1292816686145824, "grad_norm": 1.667245864868164, "learning_rate": 8.610778443113774e-06, "loss": 0.8552, "step": 719 }, { "epoch": 0.12946147622044413, "grad_norm": 1.7787532806396484, "learning_rate": 8.622754491017965e-06, "loss": 0.9105, "step": 720 }, { "epoch": 0.12964128382630585, "grad_norm": 1.6838781833648682, "learning_rate": 8.634730538922156e-06, "loss": 0.8991, "step": 721 }, { "epoch": 0.12982109143216758, "grad_norm": 1.1670130491256714, "learning_rate": 8.646706586826348e-06, "loss": 1.2172, "step": 722 }, { "epoch": 0.1300008990380293, "grad_norm": 1.7782422304153442, "learning_rate": 8.658682634730539e-06, "loss": 0.879, "step": 723 }, { "epoch": 0.13018070664389103, "grad_norm": 1.6383814811706543, "learning_rate": 8.670658682634731e-06, "loss": 0.9289, "step": 724 }, { "epoch": 0.13036051424975276, "grad_norm": 1.1330771446228027, "learning_rate": 8.682634730538922e-06, "loss": 1.15, "step": 725 }, { "epoch": 0.13054032185561448, "grad_norm": 1.163445234298706, "learning_rate": 8.694610778443115e-06, "loss": 1.1595, "step": 726 }, { "epoch": 0.1307201294614762, "grad_norm": 1.7039626836776733, "learning_rate": 8.706586826347306e-06, "loss": 0.9716, "step": 727 }, { "epoch": 0.13089993706733793, "grad_norm": 1.6515072584152222, "learning_rate": 8.718562874251496e-06, "loss": 0.8614, "step": 728 }, { "epoch": 0.1310797446731997, "grad_norm": 1.8716365098953247, "learning_rate": 8.730538922155689e-06, "loss": 0.9715, "step": 729 }, { "epoch": 0.13125955227906141, "grad_norm": 1.869797706604004, "learning_rate": 8.742514970059881e-06, "loss": 0.9993, "step": 730 }, { "epoch": 0.13143935988492314, "grad_norm": 1.1600539684295654, "learning_rate": 8.754491017964072e-06, "loss": 1.1831, "step": 731 }, { "epoch": 0.13161916749078487, "grad_norm": 1.6215397119522095, "learning_rate": 8.766467065868263e-06, "loss": 0.8842, "step": 732 }, { "epoch": 0.1317989750966466, "grad_norm": 2.0701000690460205, "learning_rate": 8.778443113772456e-06, "loss": 0.8465, "step": 733 }, { "epoch": 0.13197878270250832, "grad_norm": 1.124329686164856, "learning_rate": 8.790419161676648e-06, "loss": 1.1869, "step": 734 }, { "epoch": 0.13215859030837004, "grad_norm": 1.6839227676391602, "learning_rate": 8.802395209580839e-06, "loss": 0.9848, "step": 735 }, { "epoch": 0.13233839791423177, "grad_norm": 1.6085541248321533, "learning_rate": 8.81437125748503e-06, "loss": 0.8705, "step": 736 }, { "epoch": 0.1325182055200935, "grad_norm": 1.6759018898010254, "learning_rate": 8.826347305389222e-06, "loss": 0.9603, "step": 737 }, { "epoch": 0.13269801312595522, "grad_norm": 1.0987036228179932, "learning_rate": 8.838323353293415e-06, "loss": 1.1665, "step": 738 }, { "epoch": 0.13287782073181695, "grad_norm": 1.0948333740234375, "learning_rate": 8.850299401197606e-06, "loss": 1.1985, "step": 739 }, { "epoch": 0.13305762833767867, "grad_norm": 1.7482980489730835, "learning_rate": 8.862275449101796e-06, "loss": 0.8817, "step": 740 }, { "epoch": 0.1332374359435404, "grad_norm": 1.8336200714111328, "learning_rate": 8.874251497005989e-06, "loss": 0.9615, "step": 741 }, { "epoch": 0.13341724354940215, "grad_norm": 1.5685487985610962, "learning_rate": 8.886227544910181e-06, "loss": 0.8594, "step": 742 }, { "epoch": 0.13359705115526388, "grad_norm": 1.6760684251785278, "learning_rate": 8.898203592814372e-06, "loss": 0.8854, "step": 743 }, { "epoch": 0.1337768587611256, "grad_norm": 1.647925615310669, "learning_rate": 8.910179640718563e-06, "loss": 0.8402, "step": 744 }, { "epoch": 0.13395666636698733, "grad_norm": 1.6536997556686401, "learning_rate": 8.922155688622756e-06, "loss": 0.947, "step": 745 }, { "epoch": 0.13413647397284906, "grad_norm": 1.2453731298446655, "learning_rate": 8.934131736526946e-06, "loss": 1.1417, "step": 746 }, { "epoch": 0.13431628157871078, "grad_norm": 1.5731548070907593, "learning_rate": 8.946107784431139e-06, "loss": 0.8699, "step": 747 }, { "epoch": 0.1344960891845725, "grad_norm": 1.5908924341201782, "learning_rate": 8.95808383233533e-06, "loss": 0.9368, "step": 748 }, { "epoch": 0.13467589679043424, "grad_norm": 1.674372911453247, "learning_rate": 8.970059880239522e-06, "loss": 0.8708, "step": 749 }, { "epoch": 0.13485570439629596, "grad_norm": 1.6201938390731812, "learning_rate": 8.982035928143713e-06, "loss": 0.8297, "step": 750 }, { "epoch": 0.1350355120021577, "grad_norm": 1.7801542282104492, "learning_rate": 8.994011976047906e-06, "loss": 0.962, "step": 751 }, { "epoch": 0.1352153196080194, "grad_norm": 1.6217364072799683, "learning_rate": 9.005988023952096e-06, "loss": 0.9156, "step": 752 }, { "epoch": 0.13539512721388114, "grad_norm": 1.0644770860671997, "learning_rate": 9.017964071856287e-06, "loss": 1.1976, "step": 753 }, { "epoch": 0.13557493481974286, "grad_norm": 1.599856972694397, "learning_rate": 9.02994011976048e-06, "loss": 0.9226, "step": 754 }, { "epoch": 0.1357547424256046, "grad_norm": 1.8621551990509033, "learning_rate": 9.041916167664672e-06, "loss": 0.864, "step": 755 }, { "epoch": 0.13593455003146634, "grad_norm": 1.8763076066970825, "learning_rate": 9.053892215568863e-06, "loss": 0.9182, "step": 756 }, { "epoch": 0.13611435763732807, "grad_norm": 1.7178279161453247, "learning_rate": 9.065868263473054e-06, "loss": 0.9269, "step": 757 }, { "epoch": 0.1362941652431898, "grad_norm": 1.5957297086715698, "learning_rate": 9.077844311377247e-06, "loss": 0.9309, "step": 758 }, { "epoch": 0.13647397284905152, "grad_norm": 1.760522723197937, "learning_rate": 9.089820359281439e-06, "loss": 0.9266, "step": 759 }, { "epoch": 0.13665378045491325, "grad_norm": 1.1156530380249023, "learning_rate": 9.10179640718563e-06, "loss": 1.1215, "step": 760 }, { "epoch": 0.13683358806077497, "grad_norm": 1.1009567975997925, "learning_rate": 9.11377245508982e-06, "loss": 1.195, "step": 761 }, { "epoch": 0.1370133956666367, "grad_norm": 1.856793999671936, "learning_rate": 9.125748502994013e-06, "loss": 0.9487, "step": 762 }, { "epoch": 0.13719320327249843, "grad_norm": 1.1568714380264282, "learning_rate": 9.137724550898206e-06, "loss": 1.1495, "step": 763 }, { "epoch": 0.13737301087836015, "grad_norm": 1.7343820333480835, "learning_rate": 9.149700598802397e-06, "loss": 0.9274, "step": 764 }, { "epoch": 0.13755281848422188, "grad_norm": 1.6459097862243652, "learning_rate": 9.161676646706587e-06, "loss": 0.94, "step": 765 }, { "epoch": 0.1377326260900836, "grad_norm": 1.6052502393722534, "learning_rate": 9.17365269461078e-06, "loss": 0.8783, "step": 766 }, { "epoch": 0.13791243369594533, "grad_norm": 1.6470067501068115, "learning_rate": 9.18562874251497e-06, "loss": 0.8949, "step": 767 }, { "epoch": 0.13809224130180706, "grad_norm": 1.638452410697937, "learning_rate": 9.197604790419162e-06, "loss": 0.8714, "step": 768 }, { "epoch": 0.13827204890766878, "grad_norm": 1.604805827140808, "learning_rate": 9.209580838323354e-06, "loss": 0.9314, "step": 769 }, { "epoch": 0.13845185651353054, "grad_norm": 1.592829942703247, "learning_rate": 9.221556886227547e-06, "loss": 0.9121, "step": 770 }, { "epoch": 0.13863166411939226, "grad_norm": 1.688280463218689, "learning_rate": 9.233532934131737e-06, "loss": 0.879, "step": 771 }, { "epoch": 0.138811471725254, "grad_norm": 1.2308454513549805, "learning_rate": 9.245508982035928e-06, "loss": 1.2111, "step": 772 }, { "epoch": 0.1389912793311157, "grad_norm": 1.5916779041290283, "learning_rate": 9.25748502994012e-06, "loss": 0.8629, "step": 773 }, { "epoch": 0.13917108693697744, "grad_norm": 1.0837105512619019, "learning_rate": 9.269461077844312e-06, "loss": 1.1692, "step": 774 }, { "epoch": 0.13935089454283917, "grad_norm": 1.818879246711731, "learning_rate": 9.281437125748504e-06, "loss": 0.8191, "step": 775 }, { "epoch": 0.1395307021487009, "grad_norm": 1.7791944742202759, "learning_rate": 9.293413173652695e-06, "loss": 0.9264, "step": 776 }, { "epoch": 0.13971050975456262, "grad_norm": 1.2195417881011963, "learning_rate": 9.305389221556887e-06, "loss": 1.1559, "step": 777 }, { "epoch": 0.13989031736042434, "grad_norm": 1.7337082624435425, "learning_rate": 9.317365269461078e-06, "loss": 0.9078, "step": 778 }, { "epoch": 0.14007012496628607, "grad_norm": 1.7652170658111572, "learning_rate": 9.32934131736527e-06, "loss": 0.9456, "step": 779 }, { "epoch": 0.1402499325721478, "grad_norm": 1.6184152364730835, "learning_rate": 9.341317365269462e-06, "loss": 0.9018, "step": 780 }, { "epoch": 0.14042974017800952, "grad_norm": 1.6790177822113037, "learning_rate": 9.353293413173652e-06, "loss": 0.9629, "step": 781 }, { "epoch": 0.14060954778387125, "grad_norm": 1.6409486532211304, "learning_rate": 9.365269461077845e-06, "loss": 0.9952, "step": 782 }, { "epoch": 0.14078935538973297, "grad_norm": 1.6394462585449219, "learning_rate": 9.377245508982037e-06, "loss": 0.8639, "step": 783 }, { "epoch": 0.14096916299559473, "grad_norm": 1.8344107866287231, "learning_rate": 9.389221556886228e-06, "loss": 0.9067, "step": 784 }, { "epoch": 0.14114897060145645, "grad_norm": 1.7477550506591797, "learning_rate": 9.401197604790419e-06, "loss": 0.8738, "step": 785 }, { "epoch": 0.14132877820731818, "grad_norm": 1.709398627281189, "learning_rate": 9.413173652694612e-06, "loss": 0.9366, "step": 786 }, { "epoch": 0.1415085858131799, "grad_norm": 1.3044098615646362, "learning_rate": 9.425149700598804e-06, "loss": 1.1449, "step": 787 }, { "epoch": 0.14168839341904163, "grad_norm": 1.7309869527816772, "learning_rate": 9.437125748502995e-06, "loss": 0.903, "step": 788 }, { "epoch": 0.14186820102490336, "grad_norm": 1.240317463874817, "learning_rate": 9.449101796407186e-06, "loss": 1.1893, "step": 789 }, { "epoch": 0.14204800863076508, "grad_norm": 1.5676567554473877, "learning_rate": 9.461077844311378e-06, "loss": 0.88, "step": 790 }, { "epoch": 0.1422278162366268, "grad_norm": 1.0955392122268677, "learning_rate": 9.47305389221557e-06, "loss": 1.1324, "step": 791 }, { "epoch": 0.14240762384248853, "grad_norm": 1.752990961074829, "learning_rate": 9.485029940119762e-06, "loss": 0.9297, "step": 792 }, { "epoch": 0.14258743144835026, "grad_norm": 1.5850292444229126, "learning_rate": 9.497005988023952e-06, "loss": 0.8261, "step": 793 }, { "epoch": 0.142767239054212, "grad_norm": 1.8541446924209595, "learning_rate": 9.508982035928145e-06, "loss": 0.8945, "step": 794 }, { "epoch": 0.1429470466600737, "grad_norm": 1.7501115798950195, "learning_rate": 9.520958083832336e-06, "loss": 0.8731, "step": 795 }, { "epoch": 0.14312685426593544, "grad_norm": 1.33210289478302, "learning_rate": 9.532934131736528e-06, "loss": 1.1209, "step": 796 }, { "epoch": 0.14330666187179716, "grad_norm": 1.90280020236969, "learning_rate": 9.544910179640719e-06, "loss": 0.9655, "step": 797 }, { "epoch": 0.14348646947765892, "grad_norm": 1.0862611532211304, "learning_rate": 9.556886227544912e-06, "loss": 1.1412, "step": 798 }, { "epoch": 0.14366627708352064, "grad_norm": 1.6400477886199951, "learning_rate": 9.568862275449102e-06, "loss": 0.9132, "step": 799 }, { "epoch": 0.14384608468938237, "grad_norm": 1.718255877494812, "learning_rate": 9.580838323353295e-06, "loss": 0.8711, "step": 800 }, { "epoch": 0.1440258922952441, "grad_norm": 1.7030354738235474, "learning_rate": 9.592814371257486e-06, "loss": 0.821, "step": 801 }, { "epoch": 0.14420569990110582, "grad_norm": 1.557396650314331, "learning_rate": 9.604790419161677e-06, "loss": 0.9358, "step": 802 }, { "epoch": 0.14438550750696755, "grad_norm": 1.678359866142273, "learning_rate": 9.616766467065869e-06, "loss": 0.8825, "step": 803 }, { "epoch": 0.14456531511282927, "grad_norm": 1.6736701726913452, "learning_rate": 9.628742514970062e-06, "loss": 0.8884, "step": 804 }, { "epoch": 0.144745122718691, "grad_norm": 1.7970751523971558, "learning_rate": 9.640718562874252e-06, "loss": 0.8629, "step": 805 }, { "epoch": 0.14492493032455273, "grad_norm": 1.5644656419754028, "learning_rate": 9.652694610778443e-06, "loss": 0.9066, "step": 806 }, { "epoch": 0.14510473793041445, "grad_norm": 1.799957275390625, "learning_rate": 9.664670658682636e-06, "loss": 0.8542, "step": 807 }, { "epoch": 0.14528454553627618, "grad_norm": 1.6616685390472412, "learning_rate": 9.676646706586828e-06, "loss": 0.9105, "step": 808 }, { "epoch": 0.1454643531421379, "grad_norm": 1.6520678997039795, "learning_rate": 9.688622754491019e-06, "loss": 0.8763, "step": 809 }, { "epoch": 0.14564416074799963, "grad_norm": 1.676315426826477, "learning_rate": 9.70059880239521e-06, "loss": 0.8658, "step": 810 }, { "epoch": 0.14582396835386136, "grad_norm": 1.740313172340393, "learning_rate": 9.712574850299402e-06, "loss": 0.8613, "step": 811 }, { "epoch": 0.1460037759597231, "grad_norm": 1.7001864910125732, "learning_rate": 9.724550898203593e-06, "loss": 0.9063, "step": 812 }, { "epoch": 0.14618358356558483, "grad_norm": 1.557870626449585, "learning_rate": 9.736526946107784e-06, "loss": 0.8776, "step": 813 }, { "epoch": 0.14636339117144656, "grad_norm": 1.6237103939056396, "learning_rate": 9.748502994011977e-06, "loss": 0.88, "step": 814 }, { "epoch": 0.1465431987773083, "grad_norm": 1.496505618095398, "learning_rate": 9.760479041916169e-06, "loss": 1.1832, "step": 815 }, { "epoch": 0.14672300638317, "grad_norm": 1.9449577331542969, "learning_rate": 9.77245508982036e-06, "loss": 0.8984, "step": 816 }, { "epoch": 0.14690281398903174, "grad_norm": 1.5544767379760742, "learning_rate": 9.78443113772455e-06, "loss": 0.8537, "step": 817 }, { "epoch": 0.14708262159489346, "grad_norm": 1.1278598308563232, "learning_rate": 9.796407185628743e-06, "loss": 1.1641, "step": 818 }, { "epoch": 0.1472624292007552, "grad_norm": 1.1885986328125, "learning_rate": 9.808383233532936e-06, "loss": 1.2037, "step": 819 }, { "epoch": 0.14744223680661692, "grad_norm": 1.1701807975769043, "learning_rate": 9.820359281437127e-06, "loss": 1.1352, "step": 820 }, { "epoch": 0.14762204441247864, "grad_norm": 1.7142229080200195, "learning_rate": 9.832335329341317e-06, "loss": 0.961, "step": 821 }, { "epoch": 0.14780185201834037, "grad_norm": 1.9468085765838623, "learning_rate": 9.84431137724551e-06, "loss": 0.8774, "step": 822 }, { "epoch": 0.1479816596242021, "grad_norm": 1.6670204401016235, "learning_rate": 9.8562874251497e-06, "loss": 0.8977, "step": 823 }, { "epoch": 0.14816146723006382, "grad_norm": 1.375156283378601, "learning_rate": 9.868263473053893e-06, "loss": 1.2069, "step": 824 }, { "epoch": 0.14834127483592555, "grad_norm": 1.6005761623382568, "learning_rate": 9.880239520958084e-06, "loss": 0.9401, "step": 825 }, { "epoch": 0.1485210824417873, "grad_norm": 1.6468349695205688, "learning_rate": 9.892215568862277e-06, "loss": 0.8949, "step": 826 }, { "epoch": 0.14870089004764903, "grad_norm": 1.584210753440857, "learning_rate": 9.904191616766467e-06, "loss": 0.8699, "step": 827 }, { "epoch": 0.14888069765351075, "grad_norm": 1.6524523496627808, "learning_rate": 9.91616766467066e-06, "loss": 0.9155, "step": 828 }, { "epoch": 0.14906050525937248, "grad_norm": 1.6948436498641968, "learning_rate": 9.92814371257485e-06, "loss": 0.8588, "step": 829 }, { "epoch": 0.1492403128652342, "grad_norm": 1.1491180658340454, "learning_rate": 9.940119760479042e-06, "loss": 1.183, "step": 830 }, { "epoch": 0.14942012047109593, "grad_norm": 1.5616167783737183, "learning_rate": 9.952095808383234e-06, "loss": 0.9519, "step": 831 }, { "epoch": 0.14959992807695766, "grad_norm": 1.782588243484497, "learning_rate": 9.964071856287427e-06, "loss": 0.9006, "step": 832 }, { "epoch": 0.14977973568281938, "grad_norm": 1.6365635395050049, "learning_rate": 9.976047904191617e-06, "loss": 0.8625, "step": 833 }, { "epoch": 0.1499595432886811, "grad_norm": 1.8911563158035278, "learning_rate": 9.988023952095808e-06, "loss": 0.9044, "step": 834 }, { "epoch": 0.15013935089454283, "grad_norm": 1.760403037071228, "learning_rate": 1e-05, "loss": 0.9584, "step": 835 }, { "epoch": 0.15031915850040456, "grad_norm": 1.1228047609329224, "learning_rate": 9.999999966078281e-06, "loss": 1.15, "step": 836 }, { "epoch": 0.15049896610626629, "grad_norm": 1.5400469303131104, "learning_rate": 9.999999864313122e-06, "loss": 0.8898, "step": 837 }, { "epoch": 0.150678773712128, "grad_norm": 1.6896965503692627, "learning_rate": 9.999999694704527e-06, "loss": 0.9482, "step": 838 }, { "epoch": 0.15085858131798974, "grad_norm": 3.2907912731170654, "learning_rate": 9.999999457252496e-06, "loss": 0.9071, "step": 839 }, { "epoch": 0.1510383889238515, "grad_norm": 1.7653732299804688, "learning_rate": 9.999999151957031e-06, "loss": 0.9739, "step": 840 }, { "epoch": 0.15121819652971322, "grad_norm": 1.5792851448059082, "learning_rate": 9.99999877881814e-06, "loss": 0.9255, "step": 841 }, { "epoch": 0.15139800413557494, "grad_norm": 1.6126718521118164, "learning_rate": 9.999998337835829e-06, "loss": 0.8821, "step": 842 }, { "epoch": 0.15157781174143667, "grad_norm": 1.784956455230713, "learning_rate": 9.999997829010098e-06, "loss": 0.9324, "step": 843 }, { "epoch": 0.1517576193472984, "grad_norm": 1.768819808959961, "learning_rate": 9.999997252340957e-06, "loss": 0.9126, "step": 844 }, { "epoch": 0.15193742695316012, "grad_norm": 1.593666434288025, "learning_rate": 9.999996607828415e-06, "loss": 0.8448, "step": 845 }, { "epoch": 0.15211723455902185, "grad_norm": 1.8704057931900024, "learning_rate": 9.999995895472478e-06, "loss": 0.8726, "step": 846 }, { "epoch": 0.15229704216488357, "grad_norm": 1.120916724205017, "learning_rate": 9.99999511527316e-06, "loss": 1.1844, "step": 847 }, { "epoch": 0.1524768497707453, "grad_norm": 1.03933846950531, "learning_rate": 9.999994267230468e-06, "loss": 1.1805, "step": 848 }, { "epoch": 0.15265665737660702, "grad_norm": 2.1447055339813232, "learning_rate": 9.999993351344413e-06, "loss": 0.9226, "step": 849 }, { "epoch": 0.15283646498246875, "grad_norm": 1.5641721487045288, "learning_rate": 9.99999236761501e-06, "loss": 0.9303, "step": 850 }, { "epoch": 0.15301627258833048, "grad_norm": 1.0928938388824463, "learning_rate": 9.999991316042273e-06, "loss": 1.1653, "step": 851 }, { "epoch": 0.1531960801941922, "grad_norm": 1.6288896799087524, "learning_rate": 9.999990196626212e-06, "loss": 0.946, "step": 852 }, { "epoch": 0.15337588780005396, "grad_norm": 1.6293364763259888, "learning_rate": 9.999989009366847e-06, "loss": 0.8169, "step": 853 }, { "epoch": 0.15355569540591568, "grad_norm": 1.1066607236862183, "learning_rate": 9.999987754264188e-06, "loss": 1.1871, "step": 854 }, { "epoch": 0.1537355030117774, "grad_norm": 2.590117931365967, "learning_rate": 9.999986431318258e-06, "loss": 0.9836, "step": 855 }, { "epoch": 0.15391531061763913, "grad_norm": 1.6151695251464844, "learning_rate": 9.999985040529074e-06, "loss": 0.8657, "step": 856 }, { "epoch": 0.15409511822350086, "grad_norm": 1.1561942100524902, "learning_rate": 9.999983581896653e-06, "loss": 1.1678, "step": 857 }, { "epoch": 0.15427492582936259, "grad_norm": 1.717209815979004, "learning_rate": 9.999982055421015e-06, "loss": 0.911, "step": 858 }, { "epoch": 0.1544547334352243, "grad_norm": 1.8031543493270874, "learning_rate": 9.999980461102181e-06, "loss": 0.9541, "step": 859 }, { "epoch": 0.15463454104108604, "grad_norm": 1.798417329788208, "learning_rate": 9.999978798940174e-06, "loss": 0.9482, "step": 860 }, { "epoch": 0.15481434864694776, "grad_norm": 1.7429611682891846, "learning_rate": 9.999977068935014e-06, "loss": 0.989, "step": 861 }, { "epoch": 0.1549941562528095, "grad_norm": 1.0684311389923096, "learning_rate": 9.999975271086726e-06, "loss": 1.1959, "step": 862 }, { "epoch": 0.15517396385867122, "grad_norm": 1.8235560655593872, "learning_rate": 9.999973405395334e-06, "loss": 1.0076, "step": 863 }, { "epoch": 0.15535377146453294, "grad_norm": 1.8459577560424805, "learning_rate": 9.999971471860864e-06, "loss": 0.8917, "step": 864 }, { "epoch": 0.15553357907039467, "grad_norm": 1.5937936305999756, "learning_rate": 9.999969470483342e-06, "loss": 0.9731, "step": 865 }, { "epoch": 0.1557133866762564, "grad_norm": 2.906646251678467, "learning_rate": 9.999967401262794e-06, "loss": 0.9344, "step": 866 }, { "epoch": 0.15589319428211815, "grad_norm": 1.6071604490280151, "learning_rate": 9.999965264199251e-06, "loss": 0.9211, "step": 867 }, { "epoch": 0.15607300188797987, "grad_norm": 1.6870508193969727, "learning_rate": 9.99996305929274e-06, "loss": 0.8899, "step": 868 }, { "epoch": 0.1562528094938416, "grad_norm": 1.6707837581634521, "learning_rate": 9.999960786543288e-06, "loss": 0.9056, "step": 869 }, { "epoch": 0.15643261709970332, "grad_norm": 1.6469038724899292, "learning_rate": 9.99995844595093e-06, "loss": 1.0175, "step": 870 }, { "epoch": 0.15661242470556505, "grad_norm": 1.6145095825195312, "learning_rate": 9.999956037515696e-06, "loss": 0.8978, "step": 871 }, { "epoch": 0.15679223231142678, "grad_norm": 2.902569532394409, "learning_rate": 9.999953561237621e-06, "loss": 0.9229, "step": 872 }, { "epoch": 0.1569720399172885, "grad_norm": 1.5609970092773438, "learning_rate": 9.999951017116735e-06, "loss": 0.9044, "step": 873 }, { "epoch": 0.15715184752315023, "grad_norm": 1.5339311361312866, "learning_rate": 9.999948405153077e-06, "loss": 0.8875, "step": 874 }, { "epoch": 0.15733165512901195, "grad_norm": 1.7532583475112915, "learning_rate": 9.999945725346677e-06, "loss": 0.9524, "step": 875 }, { "epoch": 0.15751146273487368, "grad_norm": 1.6817582845687866, "learning_rate": 9.999942977697575e-06, "loss": 0.861, "step": 876 }, { "epoch": 0.1576912703407354, "grad_norm": 1.9629334211349487, "learning_rate": 9.999940162205808e-06, "loss": 0.8911, "step": 877 }, { "epoch": 0.15787107794659713, "grad_norm": 1.6026605367660522, "learning_rate": 9.999937278871412e-06, "loss": 0.8765, "step": 878 }, { "epoch": 0.15805088555245886, "grad_norm": 1.8111087083816528, "learning_rate": 9.99993432769443e-06, "loss": 0.9062, "step": 879 }, { "epoch": 0.15823069315832058, "grad_norm": 1.6473742723464966, "learning_rate": 9.999931308674898e-06, "loss": 0.885, "step": 880 }, { "epoch": 0.15841050076418234, "grad_norm": 1.5693467855453491, "learning_rate": 9.99992822181286e-06, "loss": 0.8562, "step": 881 }, { "epoch": 0.15859030837004406, "grad_norm": 1.6468217372894287, "learning_rate": 9.999925067108356e-06, "loss": 0.8985, "step": 882 }, { "epoch": 0.1587701159759058, "grad_norm": 1.5316112041473389, "learning_rate": 9.999921844561428e-06, "loss": 0.9054, "step": 883 }, { "epoch": 0.15894992358176752, "grad_norm": 1.838244915008545, "learning_rate": 9.999918554172124e-06, "loss": 0.9002, "step": 884 }, { "epoch": 0.15912973118762924, "grad_norm": 1.632173776626587, "learning_rate": 9.999915195940484e-06, "loss": 0.9084, "step": 885 }, { "epoch": 0.15930953879349097, "grad_norm": 1.5732061862945557, "learning_rate": 9.999911769866554e-06, "loss": 0.8333, "step": 886 }, { "epoch": 0.1594893463993527, "grad_norm": 1.6002764701843262, "learning_rate": 9.999908275950386e-06, "loss": 0.8594, "step": 887 }, { "epoch": 0.15966915400521442, "grad_norm": 1.6441974639892578, "learning_rate": 9.99990471419202e-06, "loss": 0.871, "step": 888 }, { "epoch": 0.15984896161107615, "grad_norm": 1.5336180925369263, "learning_rate": 9.999901084591508e-06, "loss": 0.8964, "step": 889 }, { "epoch": 0.16002876921693787, "grad_norm": 1.5239474773406982, "learning_rate": 9.9998973871489e-06, "loss": 0.8985, "step": 890 }, { "epoch": 0.1602085768227996, "grad_norm": 1.6180617809295654, "learning_rate": 9.999893621864242e-06, "loss": 0.9694, "step": 891 }, { "epoch": 0.16038838442866132, "grad_norm": 1.7340353727340698, "learning_rate": 9.99988978873759e-06, "loss": 0.9174, "step": 892 }, { "epoch": 0.16056819203452305, "grad_norm": 1.3016571998596191, "learning_rate": 9.999885887768996e-06, "loss": 1.1507, "step": 893 }, { "epoch": 0.16074799964038478, "grad_norm": 1.2251194715499878, "learning_rate": 9.99988191895851e-06, "loss": 1.1767, "step": 894 }, { "epoch": 0.16092780724624653, "grad_norm": 1.6412121057510376, "learning_rate": 9.999877882306185e-06, "loss": 0.9287, "step": 895 }, { "epoch": 0.16110761485210826, "grad_norm": 1.6278122663497925, "learning_rate": 9.99987377781208e-06, "loss": 0.8849, "step": 896 }, { "epoch": 0.16128742245796998, "grad_norm": 1.735060691833496, "learning_rate": 9.999869605476246e-06, "loss": 0.969, "step": 897 }, { "epoch": 0.1614672300638317, "grad_norm": 1.660732626914978, "learning_rate": 9.999865365298744e-06, "loss": 0.9107, "step": 898 }, { "epoch": 0.16164703766969343, "grad_norm": 1.571639060974121, "learning_rate": 9.99986105727963e-06, "loss": 0.96, "step": 899 }, { "epoch": 0.16182684527555516, "grad_norm": 1.654334306716919, "learning_rate": 9.99985668141896e-06, "loss": 0.8597, "step": 900 }, { "epoch": 0.16200665288141688, "grad_norm": 1.6658620834350586, "learning_rate": 9.999852237716796e-06, "loss": 0.8797, "step": 901 }, { "epoch": 0.1621864604872786, "grad_norm": 1.7261749505996704, "learning_rate": 9.999847726173198e-06, "loss": 0.9481, "step": 902 }, { "epoch": 0.16236626809314034, "grad_norm": 1.7549182176589966, "learning_rate": 9.999843146788226e-06, "loss": 0.9294, "step": 903 }, { "epoch": 0.16254607569900206, "grad_norm": 1.6836693286895752, "learning_rate": 9.999838499561944e-06, "loss": 0.9363, "step": 904 }, { "epoch": 0.1627258833048638, "grad_norm": 1.6022480726242065, "learning_rate": 9.999833784494413e-06, "loss": 0.8986, "step": 905 }, { "epoch": 0.16290569091072551, "grad_norm": 1.7040376663208008, "learning_rate": 9.9998290015857e-06, "loss": 0.9077, "step": 906 }, { "epoch": 0.16308549851658724, "grad_norm": 1.680681586265564, "learning_rate": 9.999824150835866e-06, "loss": 0.9318, "step": 907 }, { "epoch": 0.16326530612244897, "grad_norm": 1.862923502922058, "learning_rate": 9.999819232244978e-06, "loss": 0.9418, "step": 908 }, { "epoch": 0.16344511372831072, "grad_norm": 1.603014588356018, "learning_rate": 9.999814245813105e-06, "loss": 0.8913, "step": 909 }, { "epoch": 0.16362492133417245, "grad_norm": 1.791925311088562, "learning_rate": 9.999809191540313e-06, "loss": 1.2051, "step": 910 }, { "epoch": 0.16380472894003417, "grad_norm": 1.6699202060699463, "learning_rate": 9.99980406942667e-06, "loss": 0.98, "step": 911 }, { "epoch": 0.1639845365458959, "grad_norm": 1.6682188510894775, "learning_rate": 9.999798879472247e-06, "loss": 0.9063, "step": 912 }, { "epoch": 0.16416434415175762, "grad_norm": 1.6632331609725952, "learning_rate": 9.999793621677114e-06, "loss": 0.8745, "step": 913 }, { "epoch": 0.16434415175761935, "grad_norm": 1.7936145067214966, "learning_rate": 9.999788296041341e-06, "loss": 0.8242, "step": 914 }, { "epoch": 0.16452395936348108, "grad_norm": 2.7354063987731934, "learning_rate": 9.999782902565001e-06, "loss": 1.0047, "step": 915 }, { "epoch": 0.1647037669693428, "grad_norm": 1.8283084630966187, "learning_rate": 9.999777441248169e-06, "loss": 0.91, "step": 916 }, { "epoch": 0.16488357457520453, "grad_norm": 1.695401668548584, "learning_rate": 9.999771912090916e-06, "loss": 0.8371, "step": 917 }, { "epoch": 0.16506338218106625, "grad_norm": 1.7074859142303467, "learning_rate": 9.99976631509332e-06, "loss": 0.8645, "step": 918 }, { "epoch": 0.16524318978692798, "grad_norm": 1.7065132856369019, "learning_rate": 9.999760650255453e-06, "loss": 0.9084, "step": 919 }, { "epoch": 0.1654229973927897, "grad_norm": 1.6055792570114136, "learning_rate": 9.999754917577396e-06, "loss": 0.8687, "step": 920 }, { "epoch": 0.16560280499865143, "grad_norm": 1.6533136367797852, "learning_rate": 9.999749117059226e-06, "loss": 0.9054, "step": 921 }, { "epoch": 0.16578261260451316, "grad_norm": 1.677803635597229, "learning_rate": 9.99974324870102e-06, "loss": 0.8929, "step": 922 }, { "epoch": 0.1659624202103749, "grad_norm": 1.641364336013794, "learning_rate": 9.999737312502858e-06, "loss": 0.9197, "step": 923 }, { "epoch": 0.16614222781623664, "grad_norm": 1.5688506364822388, "learning_rate": 9.99973130846482e-06, "loss": 0.8917, "step": 924 }, { "epoch": 0.16632203542209836, "grad_norm": 2.09116792678833, "learning_rate": 9.99972523658699e-06, "loss": 0.9764, "step": 925 }, { "epoch": 0.1665018430279601, "grad_norm": 1.7110642194747925, "learning_rate": 9.99971909686945e-06, "loss": 0.8978, "step": 926 }, { "epoch": 0.16668165063382182, "grad_norm": 1.96330988407135, "learning_rate": 9.999712889312278e-06, "loss": 0.8623, "step": 927 }, { "epoch": 0.16686145823968354, "grad_norm": 1.5831985473632812, "learning_rate": 9.999706613915567e-06, "loss": 0.9091, "step": 928 }, { "epoch": 0.16704126584554527, "grad_norm": 1.6490305662155151, "learning_rate": 9.999700270679395e-06, "loss": 0.87, "step": 929 }, { "epoch": 0.167221073451407, "grad_norm": 1.651734709739685, "learning_rate": 9.999693859603852e-06, "loss": 0.8612, "step": 930 }, { "epoch": 0.16740088105726872, "grad_norm": 1.6825096607208252, "learning_rate": 9.999687380689022e-06, "loss": 0.9229, "step": 931 }, { "epoch": 0.16758068866313044, "grad_norm": 1.7025812864303589, "learning_rate": 9.999680833934996e-06, "loss": 0.8123, "step": 932 }, { "epoch": 0.16776049626899217, "grad_norm": 1.925761342048645, "learning_rate": 9.99967421934186e-06, "loss": 0.871, "step": 933 }, { "epoch": 0.1679403038748539, "grad_norm": 1.279720425605774, "learning_rate": 9.999667536909706e-06, "loss": 1.1847, "step": 934 }, { "epoch": 0.16812011148071562, "grad_norm": 1.8242624998092651, "learning_rate": 9.999660786638625e-06, "loss": 0.8599, "step": 935 }, { "epoch": 0.16829991908657735, "grad_norm": 1.786484956741333, "learning_rate": 9.999653968528705e-06, "loss": 0.8924, "step": 936 }, { "epoch": 0.1684797266924391, "grad_norm": 1.7764737606048584, "learning_rate": 9.999647082580042e-06, "loss": 0.9317, "step": 937 }, { "epoch": 0.16865953429830083, "grad_norm": 1.5874989032745361, "learning_rate": 9.999640128792728e-06, "loss": 0.8721, "step": 938 }, { "epoch": 0.16883934190416255, "grad_norm": 1.5211771726608276, "learning_rate": 9.999633107166858e-06, "loss": 0.8761, "step": 939 }, { "epoch": 0.16901914951002428, "grad_norm": 1.6138713359832764, "learning_rate": 9.999626017702526e-06, "loss": 1.0016, "step": 940 }, { "epoch": 0.169198957115886, "grad_norm": 1.7499299049377441, "learning_rate": 9.999618860399831e-06, "loss": 0.8928, "step": 941 }, { "epoch": 0.16937876472174773, "grad_norm": 1.1348704099655151, "learning_rate": 9.999611635258868e-06, "loss": 1.1754, "step": 942 }, { "epoch": 0.16955857232760946, "grad_norm": 1.8094009160995483, "learning_rate": 9.999604342279733e-06, "loss": 0.8874, "step": 943 }, { "epoch": 0.16973837993347118, "grad_norm": 1.6054401397705078, "learning_rate": 9.99959698146253e-06, "loss": 0.9099, "step": 944 }, { "epoch": 0.1699181875393329, "grad_norm": 1.6623528003692627, "learning_rate": 9.999589552807354e-06, "loss": 0.9144, "step": 945 }, { "epoch": 0.17009799514519464, "grad_norm": 1.6336300373077393, "learning_rate": 9.999582056314309e-06, "loss": 0.9136, "step": 946 }, { "epoch": 0.17027780275105636, "grad_norm": 1.6803486347198486, "learning_rate": 9.999574491983494e-06, "loss": 0.9229, "step": 947 }, { "epoch": 0.1704576103569181, "grad_norm": 1.5887150764465332, "learning_rate": 9.999566859815015e-06, "loss": 0.8764, "step": 948 }, { "epoch": 0.1706374179627798, "grad_norm": 1.721700668334961, "learning_rate": 9.999559159808974e-06, "loss": 0.8485, "step": 949 }, { "epoch": 0.17081722556864154, "grad_norm": 1.6357169151306152, "learning_rate": 9.999551391965475e-06, "loss": 0.9029, "step": 950 }, { "epoch": 0.1709970331745033, "grad_norm": 1.5963736772537231, "learning_rate": 9.999543556284623e-06, "loss": 0.9064, "step": 951 }, { "epoch": 0.17117684078036502, "grad_norm": 1.7369756698608398, "learning_rate": 9.999535652766526e-06, "loss": 0.781, "step": 952 }, { "epoch": 0.17135664838622675, "grad_norm": 1.7001618146896362, "learning_rate": 9.99952768141129e-06, "loss": 0.8588, "step": 953 }, { "epoch": 0.17153645599208847, "grad_norm": 1.8208949565887451, "learning_rate": 9.999519642219022e-06, "loss": 0.9444, "step": 954 }, { "epoch": 0.1717162635979502, "grad_norm": 1.5861033201217651, "learning_rate": 9.999511535189834e-06, "loss": 0.8767, "step": 955 }, { "epoch": 0.17189607120381192, "grad_norm": 1.7812976837158203, "learning_rate": 9.999503360323834e-06, "loss": 0.953, "step": 956 }, { "epoch": 0.17207587880967365, "grad_norm": 1.0884466171264648, "learning_rate": 9.999495117621134e-06, "loss": 1.1959, "step": 957 }, { "epoch": 0.17225568641553538, "grad_norm": 1.6038401126861572, "learning_rate": 9.999486807081844e-06, "loss": 0.8841, "step": 958 }, { "epoch": 0.1724354940213971, "grad_norm": 1.0930010080337524, "learning_rate": 9.99947842870608e-06, "loss": 1.1903, "step": 959 }, { "epoch": 0.17261530162725883, "grad_norm": 1.7692792415618896, "learning_rate": 9.999469982493953e-06, "loss": 0.8714, "step": 960 }, { "epoch": 0.17279510923312055, "grad_norm": 1.7157231569290161, "learning_rate": 9.999461468445578e-06, "loss": 0.9031, "step": 961 }, { "epoch": 0.17297491683898228, "grad_norm": 1.679172396659851, "learning_rate": 9.99945288656107e-06, "loss": 0.8263, "step": 962 }, { "epoch": 0.173154724444844, "grad_norm": 1.6186696290969849, "learning_rate": 9.999444236840548e-06, "loss": 0.8536, "step": 963 }, { "epoch": 0.17333453205070576, "grad_norm": 1.646657109260559, "learning_rate": 9.999435519284126e-06, "loss": 0.8672, "step": 964 }, { "epoch": 0.17351433965656748, "grad_norm": 1.6129273176193237, "learning_rate": 9.999426733891925e-06, "loss": 0.8704, "step": 965 }, { "epoch": 0.1736941472624292, "grad_norm": 1.5442465543746948, "learning_rate": 9.999417880664063e-06, "loss": 0.899, "step": 966 }, { "epoch": 0.17387395486829094, "grad_norm": 1.7713760137557983, "learning_rate": 9.999408959600661e-06, "loss": 0.9429, "step": 967 }, { "epoch": 0.17405376247415266, "grad_norm": 1.7584141492843628, "learning_rate": 9.999399970701838e-06, "loss": 0.8387, "step": 968 }, { "epoch": 0.1742335700800144, "grad_norm": 1.4714386463165283, "learning_rate": 9.999390913967717e-06, "loss": 0.9172, "step": 969 }, { "epoch": 0.17441337768587611, "grad_norm": 1.3058216571807861, "learning_rate": 9.99938178939842e-06, "loss": 1.148, "step": 970 }, { "epoch": 0.17459318529173784, "grad_norm": 1.7354816198349, "learning_rate": 9.999372596994076e-06, "loss": 0.826, "step": 971 }, { "epoch": 0.17477299289759957, "grad_norm": 1.5679794549942017, "learning_rate": 9.999363336754804e-06, "loss": 0.9173, "step": 972 }, { "epoch": 0.1749528005034613, "grad_norm": 1.593621850013733, "learning_rate": 9.999354008680731e-06, "loss": 0.8588, "step": 973 }, { "epoch": 0.17513260810932302, "grad_norm": 1.5361448526382446, "learning_rate": 9.999344612771984e-06, "loss": 0.9575, "step": 974 }, { "epoch": 0.17531241571518474, "grad_norm": 1.5433800220489502, "learning_rate": 9.999335149028691e-06, "loss": 0.8204, "step": 975 }, { "epoch": 0.17549222332104647, "grad_norm": 1.5435829162597656, "learning_rate": 9.999325617450978e-06, "loss": 0.833, "step": 976 }, { "epoch": 0.1756720309269082, "grad_norm": 1.7522393465042114, "learning_rate": 9.999316018038977e-06, "loss": 0.9146, "step": 977 }, { "epoch": 0.17585183853276995, "grad_norm": 1.62409245967865, "learning_rate": 9.999306350792819e-06, "loss": 0.8456, "step": 978 }, { "epoch": 0.17603164613863168, "grad_norm": 1.7683223485946655, "learning_rate": 9.999296615712632e-06, "loss": 0.9218, "step": 979 }, { "epoch": 0.1762114537444934, "grad_norm": 1.5593290328979492, "learning_rate": 9.99928681279855e-06, "loss": 0.9298, "step": 980 }, { "epoch": 0.17639126135035513, "grad_norm": 1.5610779523849487, "learning_rate": 9.999276942050706e-06, "loss": 0.8953, "step": 981 }, { "epoch": 0.17657106895621685, "grad_norm": 1.6097569465637207, "learning_rate": 9.999267003469233e-06, "loss": 0.879, "step": 982 }, { "epoch": 0.17675087656207858, "grad_norm": 1.5620101690292358, "learning_rate": 9.999256997054267e-06, "loss": 0.9443, "step": 983 }, { "epoch": 0.1769306841679403, "grad_norm": 1.671699047088623, "learning_rate": 9.999246922805943e-06, "loss": 0.872, "step": 984 }, { "epoch": 0.17711049177380203, "grad_norm": 1.5820989608764648, "learning_rate": 9.999236780724399e-06, "loss": 0.8676, "step": 985 }, { "epoch": 0.17729029937966376, "grad_norm": 1.740915298461914, "learning_rate": 9.99922657080977e-06, "loss": 0.9459, "step": 986 }, { "epoch": 0.17747010698552548, "grad_norm": 1.6252776384353638, "learning_rate": 9.999216293062196e-06, "loss": 0.9203, "step": 987 }, { "epoch": 0.1776499145913872, "grad_norm": 1.0759941339492798, "learning_rate": 9.999205947481818e-06, "loss": 1.1878, "step": 988 }, { "epoch": 0.17782972219724894, "grad_norm": 1.5474833250045776, "learning_rate": 9.999195534068775e-06, "loss": 0.9009, "step": 989 }, { "epoch": 0.17800952980311066, "grad_norm": 1.5780308246612549, "learning_rate": 9.999185052823207e-06, "loss": 0.9462, "step": 990 }, { "epoch": 0.1781893374089724, "grad_norm": 1.09675931930542, "learning_rate": 9.999174503745259e-06, "loss": 1.1922, "step": 991 }, { "epoch": 0.17836914501483414, "grad_norm": 1.6499273777008057, "learning_rate": 9.99916388683507e-06, "loss": 0.9688, "step": 992 }, { "epoch": 0.17854895262069587, "grad_norm": 1.7624759674072266, "learning_rate": 9.999153202092788e-06, "loss": 0.9752, "step": 993 }, { "epoch": 0.1787287602265576, "grad_norm": 1.5798685550689697, "learning_rate": 9.999142449518558e-06, "loss": 0.9086, "step": 994 }, { "epoch": 0.17890856783241932, "grad_norm": 1.6150418519973755, "learning_rate": 9.999131629112522e-06, "loss": 0.9189, "step": 995 }, { "epoch": 0.17908837543828104, "grad_norm": 1.6734986305236816, "learning_rate": 9.999120740874832e-06, "loss": 0.8588, "step": 996 }, { "epoch": 0.17926818304414277, "grad_norm": 1.6782318353652954, "learning_rate": 9.999109784805631e-06, "loss": 0.9199, "step": 997 }, { "epoch": 0.1794479906500045, "grad_norm": 1.5958701372146606, "learning_rate": 9.99909876090507e-06, "loss": 0.8681, "step": 998 }, { "epoch": 0.17962779825586622, "grad_norm": 1.6387134790420532, "learning_rate": 9.9990876691733e-06, "loss": 0.8887, "step": 999 }, { "epoch": 0.17980760586172795, "grad_norm": 1.602623462677002, "learning_rate": 9.999076509610468e-06, "loss": 0.8426, "step": 1000 }, { "epoch": 0.17980760586172795, "eval_loss": 0.9183032512664795, "eval_runtime": 148.6607, "eval_samples_per_second": 96.744, "eval_steps_per_second": 1.514, "step": 1000 }, { "epoch": 0.17998741346758967, "grad_norm": 1.7411108016967773, "learning_rate": 9.999065282216728e-06, "loss": 0.8596, "step": 1001 }, { "epoch": 0.1801672210734514, "grad_norm": 1.6955132484436035, "learning_rate": 9.999053986992232e-06, "loss": 0.8816, "step": 1002 }, { "epoch": 0.18034702867931313, "grad_norm": 1.744231939315796, "learning_rate": 9.999042623937132e-06, "loss": 0.9028, "step": 1003 }, { "epoch": 0.18052683628517485, "grad_norm": 1.537526249885559, "learning_rate": 9.999031193051582e-06, "loss": 0.8384, "step": 1004 }, { "epoch": 0.18070664389103658, "grad_norm": 1.5824346542358398, "learning_rate": 9.99901969433574e-06, "loss": 0.846, "step": 1005 }, { "epoch": 0.18088645149689833, "grad_norm": 1.9211928844451904, "learning_rate": 9.99900812778976e-06, "loss": 0.9801, "step": 1006 }, { "epoch": 0.18106625910276006, "grad_norm": 1.214981198310852, "learning_rate": 9.998996493413798e-06, "loss": 1.1851, "step": 1007 }, { "epoch": 0.18124606670862178, "grad_norm": 1.650638461112976, "learning_rate": 9.998984791208014e-06, "loss": 0.8325, "step": 1008 }, { "epoch": 0.1814258743144835, "grad_norm": 1.7220962047576904, "learning_rate": 9.998973021172564e-06, "loss": 0.8894, "step": 1009 }, { "epoch": 0.18160568192034524, "grad_norm": 1.5800890922546387, "learning_rate": 9.998961183307612e-06, "loss": 0.9061, "step": 1010 }, { "epoch": 0.18178548952620696, "grad_norm": 1.5993961095809937, "learning_rate": 9.998949277613315e-06, "loss": 0.873, "step": 1011 }, { "epoch": 0.1819652971320687, "grad_norm": 1.6695700883865356, "learning_rate": 9.998937304089835e-06, "loss": 0.9163, "step": 1012 }, { "epoch": 0.1821451047379304, "grad_norm": 1.6762257814407349, "learning_rate": 9.998925262737335e-06, "loss": 0.8568, "step": 1013 }, { "epoch": 0.18232491234379214, "grad_norm": 1.7990026473999023, "learning_rate": 9.99891315355598e-06, "loss": 0.8914, "step": 1014 }, { "epoch": 0.18250471994965387, "grad_norm": 1.5492894649505615, "learning_rate": 9.998900976545932e-06, "loss": 0.8316, "step": 1015 }, { "epoch": 0.1826845275555156, "grad_norm": 1.2771953344345093, "learning_rate": 9.998888731707356e-06, "loss": 1.1521, "step": 1016 }, { "epoch": 0.18286433516137732, "grad_norm": 1.5947216749191284, "learning_rate": 9.998876419040419e-06, "loss": 0.8162, "step": 1017 }, { "epoch": 0.18304414276723904, "grad_norm": 1.5767942667007446, "learning_rate": 9.99886403854529e-06, "loss": 0.8673, "step": 1018 }, { "epoch": 0.18322395037310077, "grad_norm": 1.0852261781692505, "learning_rate": 9.998851590222134e-06, "loss": 1.1319, "step": 1019 }, { "epoch": 0.18340375797896252, "grad_norm": 1.6498562097549438, "learning_rate": 9.99883907407112e-06, "loss": 0.8547, "step": 1020 }, { "epoch": 0.18358356558482425, "grad_norm": 1.6787192821502686, "learning_rate": 9.99882649009242e-06, "loss": 0.8585, "step": 1021 }, { "epoch": 0.18376337319068597, "grad_norm": 1.7477960586547852, "learning_rate": 9.998813838286206e-06, "loss": 0.9349, "step": 1022 }, { "epoch": 0.1839431807965477, "grad_norm": 5.2886834144592285, "learning_rate": 9.998801118652644e-06, "loss": 0.8734, "step": 1023 }, { "epoch": 0.18412298840240943, "grad_norm": 1.641409158706665, "learning_rate": 9.99878833119191e-06, "loss": 0.8722, "step": 1024 }, { "epoch": 0.18430279600827115, "grad_norm": 1.64620840549469, "learning_rate": 9.99877547590418e-06, "loss": 0.8417, "step": 1025 }, { "epoch": 0.18448260361413288, "grad_norm": 1.6898720264434814, "learning_rate": 9.998762552789625e-06, "loss": 0.9155, "step": 1026 }, { "epoch": 0.1846624112199946, "grad_norm": 1.6501636505126953, "learning_rate": 9.99874956184842e-06, "loss": 0.9013, "step": 1027 }, { "epoch": 0.18484221882585633, "grad_norm": 1.1634230613708496, "learning_rate": 9.998736503080743e-06, "loss": 1.1622, "step": 1028 }, { "epoch": 0.18502202643171806, "grad_norm": 1.662308692932129, "learning_rate": 9.998723376486773e-06, "loss": 0.8973, "step": 1029 }, { "epoch": 0.18520183403757978, "grad_norm": 1.6258147954940796, "learning_rate": 9.998710182066681e-06, "loss": 0.9104, "step": 1030 }, { "epoch": 0.1853816416434415, "grad_norm": 1.6954081058502197, "learning_rate": 9.998696919820654e-06, "loss": 0.8976, "step": 1031 }, { "epoch": 0.18556144924930323, "grad_norm": 1.5806288719177246, "learning_rate": 9.998683589748868e-06, "loss": 0.8217, "step": 1032 }, { "epoch": 0.18574125685516496, "grad_norm": 1.6406086683273315, "learning_rate": 9.998670191851507e-06, "loss": 0.9262, "step": 1033 }, { "epoch": 0.1859210644610267, "grad_norm": 1.2507425546646118, "learning_rate": 9.998656726128748e-06, "loss": 1.1651, "step": 1034 }, { "epoch": 0.18610087206688844, "grad_norm": 1.8227027654647827, "learning_rate": 9.998643192580776e-06, "loss": 0.901, "step": 1035 }, { "epoch": 0.18628067967275017, "grad_norm": 1.6803414821624756, "learning_rate": 9.998629591207776e-06, "loss": 0.9856, "step": 1036 }, { "epoch": 0.1864604872786119, "grad_norm": 1.0775681734085083, "learning_rate": 9.99861592200993e-06, "loss": 1.138, "step": 1037 }, { "epoch": 0.18664029488447362, "grad_norm": 1.620021104812622, "learning_rate": 9.998602184987425e-06, "loss": 0.8188, "step": 1038 }, { "epoch": 0.18682010249033534, "grad_norm": 1.512020230293274, "learning_rate": 9.998588380140448e-06, "loss": 0.9001, "step": 1039 }, { "epoch": 0.18699991009619707, "grad_norm": 1.2458845376968384, "learning_rate": 9.998574507469185e-06, "loss": 1.1136, "step": 1040 }, { "epoch": 0.1871797177020588, "grad_norm": 1.6155869960784912, "learning_rate": 9.998560566973824e-06, "loss": 0.8485, "step": 1041 }, { "epoch": 0.18735952530792052, "grad_norm": 1.543664574623108, "learning_rate": 9.998546558654556e-06, "loss": 0.8624, "step": 1042 }, { "epoch": 0.18753933291378225, "grad_norm": 1.192292332649231, "learning_rate": 9.99853248251157e-06, "loss": 1.1595, "step": 1043 }, { "epoch": 0.18771914051964397, "grad_norm": 1.0700554847717285, "learning_rate": 9.998518338545058e-06, "loss": 1.146, "step": 1044 }, { "epoch": 0.1878989481255057, "grad_norm": 1.6881253719329834, "learning_rate": 9.998504126755208e-06, "loss": 0.9039, "step": 1045 }, { "epoch": 0.18807875573136743, "grad_norm": 1.6121493577957153, "learning_rate": 9.998489847142217e-06, "loss": 0.8059, "step": 1046 }, { "epoch": 0.18825856333722915, "grad_norm": 1.552985668182373, "learning_rate": 9.998475499706278e-06, "loss": 0.9138, "step": 1047 }, { "epoch": 0.1884383709430909, "grad_norm": 1.570412516593933, "learning_rate": 9.998461084447585e-06, "loss": 0.9151, "step": 1048 }, { "epoch": 0.18861817854895263, "grad_norm": 1.7463573217391968, "learning_rate": 9.998446601366335e-06, "loss": 0.9244, "step": 1049 }, { "epoch": 0.18879798615481436, "grad_norm": 1.151336431503296, "learning_rate": 9.998432050462721e-06, "loss": 1.1347, "step": 1050 }, { "epoch": 0.18897779376067608, "grad_norm": 1.569960117340088, "learning_rate": 9.998417431736942e-06, "loss": 0.8826, "step": 1051 }, { "epoch": 0.1891576013665378, "grad_norm": 1.1872659921646118, "learning_rate": 9.9984027451892e-06, "loss": 1.1296, "step": 1052 }, { "epoch": 0.18933740897239953, "grad_norm": 1.5450249910354614, "learning_rate": 9.99838799081969e-06, "loss": 0.8782, "step": 1053 }, { "epoch": 0.18951721657826126, "grad_norm": 1.625982403755188, "learning_rate": 9.998373168628614e-06, "loss": 0.9444, "step": 1054 }, { "epoch": 0.189697024184123, "grad_norm": 1.7033522129058838, "learning_rate": 9.998358278616171e-06, "loss": 0.8464, "step": 1055 }, { "epoch": 0.1898768317899847, "grad_norm": 1.5572997331619263, "learning_rate": 9.998343320782566e-06, "loss": 0.9236, "step": 1056 }, { "epoch": 0.19005663939584644, "grad_norm": 1.6434959173202515, "learning_rate": 9.998328295128002e-06, "loss": 0.8575, "step": 1057 }, { "epoch": 0.19023644700170816, "grad_norm": 1.5354764461517334, "learning_rate": 9.998313201652679e-06, "loss": 0.8454, "step": 1058 }, { "epoch": 0.1904162546075699, "grad_norm": 1.2630836963653564, "learning_rate": 9.998298040356807e-06, "loss": 1.1462, "step": 1059 }, { "epoch": 0.19059606221343162, "grad_norm": 1.6169867515563965, "learning_rate": 9.998282811240585e-06, "loss": 0.8725, "step": 1060 }, { "epoch": 0.19077586981929337, "grad_norm": 1.1894028186798096, "learning_rate": 9.998267514304228e-06, "loss": 1.1461, "step": 1061 }, { "epoch": 0.1909556774251551, "grad_norm": 1.734787106513977, "learning_rate": 9.998252149547937e-06, "loss": 0.9187, "step": 1062 }, { "epoch": 0.19113548503101682, "grad_norm": 1.618569016456604, "learning_rate": 9.998236716971923e-06, "loss": 0.9044, "step": 1063 }, { "epoch": 0.19131529263687855, "grad_norm": 1.8119103908538818, "learning_rate": 9.998221216576395e-06, "loss": 0.9071, "step": 1064 }, { "epoch": 0.19149510024274027, "grad_norm": 1.5174676179885864, "learning_rate": 9.998205648361563e-06, "loss": 0.9714, "step": 1065 }, { "epoch": 0.191674907848602, "grad_norm": 1.8551888465881348, "learning_rate": 9.998190012327639e-06, "loss": 0.9156, "step": 1066 }, { "epoch": 0.19185471545446373, "grad_norm": 1.6056581735610962, "learning_rate": 9.998174308474836e-06, "loss": 0.8566, "step": 1067 }, { "epoch": 0.19203452306032545, "grad_norm": 1.615844964981079, "learning_rate": 9.998158536803365e-06, "loss": 0.8454, "step": 1068 }, { "epoch": 0.19221433066618718, "grad_norm": 1.3629721403121948, "learning_rate": 9.998142697313441e-06, "loss": 1.137, "step": 1069 }, { "epoch": 0.1923941382720489, "grad_norm": 1.7302526235580444, "learning_rate": 9.998126790005278e-06, "loss": 0.8561, "step": 1070 }, { "epoch": 0.19257394587791063, "grad_norm": 1.657616138458252, "learning_rate": 9.998110814879095e-06, "loss": 0.8301, "step": 1071 }, { "epoch": 0.19275375348377236, "grad_norm": 1.5904783010482788, "learning_rate": 9.998094771935105e-06, "loss": 0.8657, "step": 1072 }, { "epoch": 0.19293356108963408, "grad_norm": 1.7585985660552979, "learning_rate": 9.998078661173527e-06, "loss": 0.8097, "step": 1073 }, { "epoch": 0.1931133686954958, "grad_norm": 1.6980520486831665, "learning_rate": 9.99806248259458e-06, "loss": 0.8704, "step": 1074 }, { "epoch": 0.19329317630135756, "grad_norm": 1.7188807725906372, "learning_rate": 9.998046236198482e-06, "loss": 0.9572, "step": 1075 }, { "epoch": 0.1934729839072193, "grad_norm": 1.504024624824524, "learning_rate": 9.998029921985455e-06, "loss": 0.8317, "step": 1076 }, { "epoch": 0.193652791513081, "grad_norm": 1.7599520683288574, "learning_rate": 9.998013539955722e-06, "loss": 0.8422, "step": 1077 }, { "epoch": 0.19383259911894274, "grad_norm": 1.765140175819397, "learning_rate": 9.997997090109501e-06, "loss": 0.9594, "step": 1078 }, { "epoch": 0.19401240672480446, "grad_norm": 1.1714606285095215, "learning_rate": 9.99798057244702e-06, "loss": 1.1178, "step": 1079 }, { "epoch": 0.1941922143306662, "grad_norm": 1.6227011680603027, "learning_rate": 9.9979639869685e-06, "loss": 0.9006, "step": 1080 }, { "epoch": 0.19437202193652792, "grad_norm": 1.6475753784179688, "learning_rate": 9.997947333674165e-06, "loss": 0.9523, "step": 1081 }, { "epoch": 0.19455182954238964, "grad_norm": 1.55050528049469, "learning_rate": 9.997930612564244e-06, "loss": 0.806, "step": 1082 }, { "epoch": 0.19473163714825137, "grad_norm": 1.5492823123931885, "learning_rate": 9.997913823638963e-06, "loss": 0.9024, "step": 1083 }, { "epoch": 0.1949114447541131, "grad_norm": 1.6204050779342651, "learning_rate": 9.997896966898548e-06, "loss": 0.8924, "step": 1084 }, { "epoch": 0.19509125235997482, "grad_norm": 1.6708194017410278, "learning_rate": 9.99788004234323e-06, "loss": 0.9226, "step": 1085 }, { "epoch": 0.19527105996583655, "grad_norm": 1.7732868194580078, "learning_rate": 9.997863049973238e-06, "loss": 0.8662, "step": 1086 }, { "epoch": 0.19545086757169827, "grad_norm": 1.55925714969635, "learning_rate": 9.997845989788801e-06, "loss": 0.9153, "step": 1087 }, { "epoch": 0.19563067517756, "grad_norm": 1.660542607307434, "learning_rate": 9.997828861790153e-06, "loss": 0.9396, "step": 1088 }, { "epoch": 0.19581048278342175, "grad_norm": 1.632063388824463, "learning_rate": 9.997811665977523e-06, "loss": 0.9594, "step": 1089 }, { "epoch": 0.19599029038928348, "grad_norm": 1.5915675163269043, "learning_rate": 9.99779440235115e-06, "loss": 0.8487, "step": 1090 }, { "epoch": 0.1961700979951452, "grad_norm": 1.5595672130584717, "learning_rate": 9.997777070911264e-06, "loss": 0.8534, "step": 1091 }, { "epoch": 0.19634990560100693, "grad_norm": 1.8705416917800903, "learning_rate": 9.997759671658098e-06, "loss": 0.8875, "step": 1092 }, { "epoch": 0.19652971320686866, "grad_norm": 1.659284234046936, "learning_rate": 9.997742204591893e-06, "loss": 0.8923, "step": 1093 }, { "epoch": 0.19670952081273038, "grad_norm": 1.5662376880645752, "learning_rate": 9.997724669712885e-06, "loss": 0.8495, "step": 1094 }, { "epoch": 0.1968893284185921, "grad_norm": 1.6521917581558228, "learning_rate": 9.997707067021309e-06, "loss": 0.896, "step": 1095 }, { "epoch": 0.19706913602445383, "grad_norm": 2.0394954681396484, "learning_rate": 9.997689396517408e-06, "loss": 0.8507, "step": 1096 }, { "epoch": 0.19724894363031556, "grad_norm": 1.644618272781372, "learning_rate": 9.997671658201417e-06, "loss": 0.8661, "step": 1097 }, { "epoch": 0.19742875123617729, "grad_norm": 1.4969673156738281, "learning_rate": 9.99765385207358e-06, "loss": 0.8982, "step": 1098 }, { "epoch": 0.197608558842039, "grad_norm": 1.6305469274520874, "learning_rate": 9.997635978134138e-06, "loss": 0.8505, "step": 1099 }, { "epoch": 0.19778836644790074, "grad_norm": 1.8852152824401855, "learning_rate": 9.997618036383334e-06, "loss": 0.8907, "step": 1100 }, { "epoch": 0.19796817405376246, "grad_norm": 1.682416558265686, "learning_rate": 9.99760002682141e-06, "loss": 0.8845, "step": 1101 }, { "epoch": 0.1981479816596242, "grad_norm": 1.7051578760147095, "learning_rate": 9.997581949448611e-06, "loss": 0.9691, "step": 1102 }, { "epoch": 0.19832778926548594, "grad_norm": 1.6864702701568604, "learning_rate": 9.997563804265184e-06, "loss": 0.8688, "step": 1103 }, { "epoch": 0.19850759687134767, "grad_norm": 1.6818500757217407, "learning_rate": 9.997545591271373e-06, "loss": 0.939, "step": 1104 }, { "epoch": 0.1986874044772094, "grad_norm": 1.7603830099105835, "learning_rate": 9.997527310467426e-06, "loss": 0.9204, "step": 1105 }, { "epoch": 0.19886721208307112, "grad_norm": 1.6578097343444824, "learning_rate": 9.99750896185359e-06, "loss": 0.872, "step": 1106 }, { "epoch": 0.19904701968893285, "grad_norm": 1.8017244338989258, "learning_rate": 9.997490545430113e-06, "loss": 0.8672, "step": 1107 }, { "epoch": 0.19922682729479457, "grad_norm": 1.675238013267517, "learning_rate": 9.99747206119725e-06, "loss": 0.8548, "step": 1108 }, { "epoch": 0.1994066349006563, "grad_norm": 1.6066076755523682, "learning_rate": 9.997453509155247e-06, "loss": 0.8067, "step": 1109 }, { "epoch": 0.19958644250651802, "grad_norm": 1.6439664363861084, "learning_rate": 9.997434889304358e-06, "loss": 0.8575, "step": 1110 }, { "epoch": 0.19976625011237975, "grad_norm": 1.6457700729370117, "learning_rate": 9.997416201644833e-06, "loss": 0.9155, "step": 1111 }, { "epoch": 0.19994605771824148, "grad_norm": 1.6409355401992798, "learning_rate": 9.99739744617693e-06, "loss": 0.8383, "step": 1112 }, { "epoch": 0.2001258653241032, "grad_norm": 1.8003982305526733, "learning_rate": 9.997378622900899e-06, "loss": 0.8557, "step": 1113 }, { "epoch": 0.20030567292996493, "grad_norm": 1.5634821653366089, "learning_rate": 9.997359731816998e-06, "loss": 0.8741, "step": 1114 }, { "epoch": 0.20048548053582665, "grad_norm": 1.6003674268722534, "learning_rate": 9.997340772925484e-06, "loss": 0.9108, "step": 1115 }, { "epoch": 0.20066528814168838, "grad_norm": 1.4700613021850586, "learning_rate": 9.997321746226612e-06, "loss": 0.8986, "step": 1116 }, { "epoch": 0.20084509574755013, "grad_norm": 1.7219440937042236, "learning_rate": 9.99730265172064e-06, "loss": 0.8588, "step": 1117 }, { "epoch": 0.20102490335341186, "grad_norm": 1.7132158279418945, "learning_rate": 9.997283489407827e-06, "loss": 0.8956, "step": 1118 }, { "epoch": 0.20120471095927359, "grad_norm": 1.2306156158447266, "learning_rate": 9.997264259288437e-06, "loss": 1.1133, "step": 1119 }, { "epoch": 0.2013845185651353, "grad_norm": 1.195668339729309, "learning_rate": 9.997244961362727e-06, "loss": 1.1448, "step": 1120 }, { "epoch": 0.20156432617099704, "grad_norm": 1.6790287494659424, "learning_rate": 9.997225595630961e-06, "loss": 0.9524, "step": 1121 }, { "epoch": 0.20174413377685876, "grad_norm": 1.679117202758789, "learning_rate": 9.9972061620934e-06, "loss": 0.9219, "step": 1122 }, { "epoch": 0.2019239413827205, "grad_norm": 1.6016193628311157, "learning_rate": 9.997186660750307e-06, "loss": 0.9337, "step": 1123 }, { "epoch": 0.20210374898858222, "grad_norm": 1.53429114818573, "learning_rate": 9.997167091601949e-06, "loss": 0.875, "step": 1124 }, { "epoch": 0.20228355659444394, "grad_norm": 1.3457406759262085, "learning_rate": 9.99714745464859e-06, "loss": 1.1477, "step": 1125 }, { "epoch": 0.20246336420030567, "grad_norm": 1.5627310276031494, "learning_rate": 9.997127749890498e-06, "loss": 0.8497, "step": 1126 }, { "epoch": 0.2026431718061674, "grad_norm": 1.5689365863800049, "learning_rate": 9.99710797732794e-06, "loss": 0.8575, "step": 1127 }, { "epoch": 0.20282297941202912, "grad_norm": 1.8022481203079224, "learning_rate": 9.997088136961182e-06, "loss": 0.8419, "step": 1128 }, { "epoch": 0.20300278701789085, "grad_norm": 1.637697458267212, "learning_rate": 9.997068228790496e-06, "loss": 0.9169, "step": 1129 }, { "epoch": 0.20318259462375257, "grad_norm": 1.5360288619995117, "learning_rate": 9.99704825281615e-06, "loss": 0.888, "step": 1130 }, { "epoch": 0.20336240222961433, "grad_norm": 1.6470123529434204, "learning_rate": 9.997028209038417e-06, "loss": 0.9064, "step": 1131 }, { "epoch": 0.20354220983547605, "grad_norm": 1.6743104457855225, "learning_rate": 9.997008097457567e-06, "loss": 0.8176, "step": 1132 }, { "epoch": 0.20372201744133778, "grad_norm": 1.641903281211853, "learning_rate": 9.996987918073875e-06, "loss": 0.8925, "step": 1133 }, { "epoch": 0.2039018250471995, "grad_norm": 1.7318191528320312, "learning_rate": 9.996967670887612e-06, "loss": 0.869, "step": 1134 }, { "epoch": 0.20408163265306123, "grad_norm": 5.048027038574219, "learning_rate": 9.996947355899056e-06, "loss": 0.9511, "step": 1135 }, { "epoch": 0.20426144025892295, "grad_norm": 1.7337167263031006, "learning_rate": 9.99692697310848e-06, "loss": 0.8626, "step": 1136 }, { "epoch": 0.20444124786478468, "grad_norm": 1.6298738718032837, "learning_rate": 9.996906522516164e-06, "loss": 0.8429, "step": 1137 }, { "epoch": 0.2046210554706464, "grad_norm": 1.8349233865737915, "learning_rate": 9.99688600412238e-06, "loss": 0.8881, "step": 1138 }, { "epoch": 0.20480086307650813, "grad_norm": 1.248417615890503, "learning_rate": 9.99686541792741e-06, "loss": 1.1451, "step": 1139 }, { "epoch": 0.20498067068236986, "grad_norm": 1.4980885982513428, "learning_rate": 9.996844763931535e-06, "loss": 0.9249, "step": 1140 }, { "epoch": 0.20516047828823158, "grad_norm": 1.7017689943313599, "learning_rate": 9.996824042135032e-06, "loss": 0.9266, "step": 1141 }, { "epoch": 0.2053402858940933, "grad_norm": 1.6361243724822998, "learning_rate": 9.996803252538183e-06, "loss": 0.8955, "step": 1142 }, { "epoch": 0.20552009349995504, "grad_norm": 1.6854652166366577, "learning_rate": 9.99678239514127e-06, "loss": 0.9181, "step": 1143 }, { "epoch": 0.20569990110581676, "grad_norm": 1.7236719131469727, "learning_rate": 9.996761469944576e-06, "loss": 0.8995, "step": 1144 }, { "epoch": 0.20587970871167852, "grad_norm": 1.7499579191207886, "learning_rate": 9.996740476948386e-06, "loss": 0.9357, "step": 1145 }, { "epoch": 0.20605951631754024, "grad_norm": 1.7791534662246704, "learning_rate": 9.996719416152985e-06, "loss": 0.854, "step": 1146 }, { "epoch": 0.20623932392340197, "grad_norm": 1.5543245077133179, "learning_rate": 9.996698287558656e-06, "loss": 0.8971, "step": 1147 }, { "epoch": 0.2064191315292637, "grad_norm": 1.2614222764968872, "learning_rate": 9.99667709116569e-06, "loss": 1.1826, "step": 1148 }, { "epoch": 0.20659893913512542, "grad_norm": 1.5204534530639648, "learning_rate": 9.996655826974369e-06, "loss": 0.8894, "step": 1149 }, { "epoch": 0.20677874674098715, "grad_norm": 1.6531468629837036, "learning_rate": 9.996634494984987e-06, "loss": 0.8678, "step": 1150 }, { "epoch": 0.20695855434684887, "grad_norm": 1.6424745321273804, "learning_rate": 9.99661309519783e-06, "loss": 0.8728, "step": 1151 }, { "epoch": 0.2071383619527106, "grad_norm": 1.556736707687378, "learning_rate": 9.99659162761319e-06, "loss": 0.9278, "step": 1152 }, { "epoch": 0.20731816955857232, "grad_norm": 1.5637335777282715, "learning_rate": 9.996570092231359e-06, "loss": 0.8866, "step": 1153 }, { "epoch": 0.20749797716443405, "grad_norm": 1.5742286443710327, "learning_rate": 9.996548489052627e-06, "loss": 0.8626, "step": 1154 }, { "epoch": 0.20767778477029578, "grad_norm": 1.5626260042190552, "learning_rate": 9.996526818077288e-06, "loss": 0.9046, "step": 1155 }, { "epoch": 0.2078575923761575, "grad_norm": 1.214654564857483, "learning_rate": 9.996505079305637e-06, "loss": 1.1128, "step": 1156 }, { "epoch": 0.20803739998201923, "grad_norm": 1.7192391157150269, "learning_rate": 9.996483272737967e-06, "loss": 0.8978, "step": 1157 }, { "epoch": 0.20821720758788095, "grad_norm": 1.5840582847595215, "learning_rate": 9.996461398374576e-06, "loss": 0.8531, "step": 1158 }, { "epoch": 0.2083970151937427, "grad_norm": 1.554832100868225, "learning_rate": 9.996439456215758e-06, "loss": 0.8496, "step": 1159 }, { "epoch": 0.20857682279960443, "grad_norm": 1.5588065385818481, "learning_rate": 9.996417446261815e-06, "loss": 0.869, "step": 1160 }, { "epoch": 0.20875663040546616, "grad_norm": 1.0271199941635132, "learning_rate": 9.996395368513042e-06, "loss": 1.1536, "step": 1161 }, { "epoch": 0.20893643801132789, "grad_norm": 1.7855751514434814, "learning_rate": 9.99637322296974e-06, "loss": 0.8616, "step": 1162 }, { "epoch": 0.2091162456171896, "grad_norm": 1.7539931535720825, "learning_rate": 9.99635100963221e-06, "loss": 0.8661, "step": 1163 }, { "epoch": 0.20929605322305134, "grad_norm": 1.5385878086090088, "learning_rate": 9.996328728500752e-06, "loss": 0.8706, "step": 1164 }, { "epoch": 0.20947586082891306, "grad_norm": 1.4935142993927002, "learning_rate": 9.996306379575668e-06, "loss": 0.8817, "step": 1165 }, { "epoch": 0.2096556684347748, "grad_norm": 1.1621630191802979, "learning_rate": 9.996283962857265e-06, "loss": 1.159, "step": 1166 }, { "epoch": 0.20983547604063651, "grad_norm": 1.643636703491211, "learning_rate": 9.996261478345842e-06, "loss": 0.8016, "step": 1167 }, { "epoch": 0.21001528364649824, "grad_norm": 1.5599431991577148, "learning_rate": 9.996238926041709e-06, "loss": 0.8869, "step": 1168 }, { "epoch": 0.21019509125235997, "grad_norm": 1.6441482305526733, "learning_rate": 9.996216305945166e-06, "loss": 0.9466, "step": 1169 }, { "epoch": 0.2103748988582217, "grad_norm": 1.4829707145690918, "learning_rate": 9.996193618056526e-06, "loss": 0.9027, "step": 1170 }, { "epoch": 0.21055470646408342, "grad_norm": 1.536405086517334, "learning_rate": 9.996170862376094e-06, "loss": 0.8344, "step": 1171 }, { "epoch": 0.21073451406994517, "grad_norm": 1.6735633611679077, "learning_rate": 9.996148038904178e-06, "loss": 0.8406, "step": 1172 }, { "epoch": 0.2109143216758069, "grad_norm": 1.6547268629074097, "learning_rate": 9.99612514764109e-06, "loss": 0.9013, "step": 1173 }, { "epoch": 0.21109412928166862, "grad_norm": 1.7029242515563965, "learning_rate": 9.996102188587138e-06, "loss": 0.8428, "step": 1174 }, { "epoch": 0.21127393688753035, "grad_norm": 1.6346205472946167, "learning_rate": 9.996079161742635e-06, "loss": 0.8342, "step": 1175 }, { "epoch": 0.21145374449339208, "grad_norm": 1.5694924592971802, "learning_rate": 9.996056067107895e-06, "loss": 0.8655, "step": 1176 }, { "epoch": 0.2116335520992538, "grad_norm": 1.6010578870773315, "learning_rate": 9.996032904683229e-06, "loss": 0.8723, "step": 1177 }, { "epoch": 0.21181335970511553, "grad_norm": 1.57174551486969, "learning_rate": 9.996009674468951e-06, "loss": 0.9148, "step": 1178 }, { "epoch": 0.21199316731097725, "grad_norm": 1.651734709739685, "learning_rate": 9.995986376465378e-06, "loss": 0.8918, "step": 1179 }, { "epoch": 0.21217297491683898, "grad_norm": 1.7309068441390991, "learning_rate": 9.995963010672824e-06, "loss": 0.9292, "step": 1180 }, { "epoch": 0.2123527825227007, "grad_norm": 1.5808395147323608, "learning_rate": 9.99593957709161e-06, "loss": 0.891, "step": 1181 }, { "epoch": 0.21253259012856243, "grad_norm": 1.5946437120437622, "learning_rate": 9.99591607572205e-06, "loss": 0.8484, "step": 1182 }, { "epoch": 0.21271239773442416, "grad_norm": 1.5785659551620483, "learning_rate": 9.995892506564461e-06, "loss": 0.8234, "step": 1183 }, { "epoch": 0.21289220534028588, "grad_norm": 1.6010757684707642, "learning_rate": 9.99586886961917e-06, "loss": 0.8931, "step": 1184 }, { "epoch": 0.2130720129461476, "grad_norm": 1.59376060962677, "learning_rate": 9.995845164886493e-06, "loss": 0.8452, "step": 1185 }, { "epoch": 0.21325182055200936, "grad_norm": 1.2045360803604126, "learning_rate": 9.995821392366751e-06, "loss": 1.1468, "step": 1186 }, { "epoch": 0.2134316281578711, "grad_norm": 1.8576120138168335, "learning_rate": 9.99579755206027e-06, "loss": 0.9917, "step": 1187 }, { "epoch": 0.21361143576373282, "grad_norm": 1.6176151037216187, "learning_rate": 9.99577364396737e-06, "loss": 0.9385, "step": 1188 }, { "epoch": 0.21379124336959454, "grad_norm": 1.5586296319961548, "learning_rate": 9.995749668088378e-06, "loss": 0.8332, "step": 1189 }, { "epoch": 0.21397105097545627, "grad_norm": 1.6651971340179443, "learning_rate": 9.995725624423615e-06, "loss": 0.8932, "step": 1190 }, { "epoch": 0.214150858581318, "grad_norm": 1.5643041133880615, "learning_rate": 9.995701512973413e-06, "loss": 0.8537, "step": 1191 }, { "epoch": 0.21433066618717972, "grad_norm": 1.595707654953003, "learning_rate": 9.995677333738097e-06, "loss": 0.8863, "step": 1192 }, { "epoch": 0.21451047379304145, "grad_norm": 1.68520987033844, "learning_rate": 9.995653086717993e-06, "loss": 0.8828, "step": 1193 }, { "epoch": 0.21469028139890317, "grad_norm": 1.627916693687439, "learning_rate": 9.995628771913432e-06, "loss": 0.9152, "step": 1194 }, { "epoch": 0.2148700890047649, "grad_norm": 1.5128651857376099, "learning_rate": 9.995604389324742e-06, "loss": 0.8096, "step": 1195 }, { "epoch": 0.21504989661062662, "grad_norm": 1.615482211112976, "learning_rate": 9.995579938952259e-06, "loss": 0.9051, "step": 1196 }, { "epoch": 0.21522970421648835, "grad_norm": 1.5266584157943726, "learning_rate": 9.995555420796309e-06, "loss": 0.8075, "step": 1197 }, { "epoch": 0.21540951182235007, "grad_norm": 1.6775375604629517, "learning_rate": 9.995530834857226e-06, "loss": 0.8731, "step": 1198 }, { "epoch": 0.2155893194282118, "grad_norm": 1.7140687704086304, "learning_rate": 9.995506181135345e-06, "loss": 0.8507, "step": 1199 }, { "epoch": 0.21576912703407355, "grad_norm": 1.887406587600708, "learning_rate": 9.995481459631e-06, "loss": 0.8979, "step": 1200 }, { "epoch": 0.21594893463993528, "grad_norm": 1.7167541980743408, "learning_rate": 9.995456670344526e-06, "loss": 0.8407, "step": 1201 }, { "epoch": 0.216128742245797, "grad_norm": 1.5895886421203613, "learning_rate": 9.995431813276262e-06, "loss": 0.8527, "step": 1202 }, { "epoch": 0.21630854985165873, "grad_norm": 1.6785030364990234, "learning_rate": 9.99540688842654e-06, "loss": 0.8546, "step": 1203 }, { "epoch": 0.21648835745752046, "grad_norm": 1.5813006162643433, "learning_rate": 9.995381895795703e-06, "loss": 0.854, "step": 1204 }, { "epoch": 0.21666816506338218, "grad_norm": 1.7541733980178833, "learning_rate": 9.995356835384087e-06, "loss": 0.8577, "step": 1205 }, { "epoch": 0.2168479726692439, "grad_norm": 1.8044145107269287, "learning_rate": 9.995331707192035e-06, "loss": 0.8277, "step": 1206 }, { "epoch": 0.21702778027510564, "grad_norm": 1.649144172668457, "learning_rate": 9.995306511219885e-06, "loss": 0.8404, "step": 1207 }, { "epoch": 0.21720758788096736, "grad_norm": 1.625285029411316, "learning_rate": 9.99528124746798e-06, "loss": 0.9508, "step": 1208 }, { "epoch": 0.2173873954868291, "grad_norm": 1.6760425567626953, "learning_rate": 9.995255915936664e-06, "loss": 0.837, "step": 1209 }, { "epoch": 0.21756720309269081, "grad_norm": 1.6611120700836182, "learning_rate": 9.995230516626278e-06, "loss": 0.9396, "step": 1210 }, { "epoch": 0.21774701069855254, "grad_norm": 1.604872703552246, "learning_rate": 9.99520504953717e-06, "loss": 0.901, "step": 1211 }, { "epoch": 0.21792681830441427, "grad_norm": 1.0954478979110718, "learning_rate": 9.995179514669683e-06, "loss": 1.1507, "step": 1212 }, { "epoch": 0.218106625910276, "grad_norm": 1.6085578203201294, "learning_rate": 9.995153912024164e-06, "loss": 0.8522, "step": 1213 }, { "epoch": 0.21828643351613775, "grad_norm": 1.041385293006897, "learning_rate": 9.995128241600963e-06, "loss": 1.133, "step": 1214 }, { "epoch": 0.21846624112199947, "grad_norm": 1.081011176109314, "learning_rate": 9.995102503400423e-06, "loss": 1.1577, "step": 1215 }, { "epoch": 0.2186460487278612, "grad_norm": 1.6865074634552002, "learning_rate": 9.995076697422898e-06, "loss": 0.9423, "step": 1216 }, { "epoch": 0.21882585633372292, "grad_norm": 1.5516531467437744, "learning_rate": 9.995050823668738e-06, "loss": 0.8186, "step": 1217 }, { "epoch": 0.21900566393958465, "grad_norm": 1.6800824403762817, "learning_rate": 9.99502488213829e-06, "loss": 0.8721, "step": 1218 }, { "epoch": 0.21918547154544638, "grad_norm": 1.7509928941726685, "learning_rate": 9.994998872831908e-06, "loss": 0.8687, "step": 1219 }, { "epoch": 0.2193652791513081, "grad_norm": 1.5287500619888306, "learning_rate": 9.994972795749946e-06, "loss": 0.8113, "step": 1220 }, { "epoch": 0.21954508675716983, "grad_norm": 1.8345990180969238, "learning_rate": 9.994946650892759e-06, "loss": 0.8887, "step": 1221 }, { "epoch": 0.21972489436303155, "grad_norm": 1.6994520425796509, "learning_rate": 9.994920438260698e-06, "loss": 0.7968, "step": 1222 }, { "epoch": 0.21990470196889328, "grad_norm": 1.6789871454238892, "learning_rate": 9.994894157854122e-06, "loss": 0.828, "step": 1223 }, { "epoch": 0.220084509574755, "grad_norm": 1.5617468357086182, "learning_rate": 9.994867809673385e-06, "loss": 0.8811, "step": 1224 }, { "epoch": 0.22026431718061673, "grad_norm": 1.5777920484542847, "learning_rate": 9.994841393718847e-06, "loss": 0.8847, "step": 1225 }, { "epoch": 0.22044412478647846, "grad_norm": 1.5813560485839844, "learning_rate": 9.994814909990864e-06, "loss": 0.8695, "step": 1226 }, { "epoch": 0.22062393239234018, "grad_norm": 1.5594406127929688, "learning_rate": 9.994788358489797e-06, "loss": 0.86, "step": 1227 }, { "epoch": 0.22080373999820194, "grad_norm": 1.3219443559646606, "learning_rate": 9.994761739216008e-06, "loss": 1.1274, "step": 1228 }, { "epoch": 0.22098354760406366, "grad_norm": 1.5841752290725708, "learning_rate": 9.994735052169852e-06, "loss": 0.8658, "step": 1229 }, { "epoch": 0.2211633552099254, "grad_norm": 1.0670095682144165, "learning_rate": 9.994708297351698e-06, "loss": 1.1087, "step": 1230 }, { "epoch": 0.22134316281578711, "grad_norm": 1.1275286674499512, "learning_rate": 9.994681474761907e-06, "loss": 1.1476, "step": 1231 }, { "epoch": 0.22152297042164884, "grad_norm": 1.606529712677002, "learning_rate": 9.99465458440084e-06, "loss": 0.8113, "step": 1232 }, { "epoch": 0.22170277802751057, "grad_norm": 1.79930579662323, "learning_rate": 9.994627626268863e-06, "loss": 0.8883, "step": 1233 }, { "epoch": 0.2218825856333723, "grad_norm": 1.531972885131836, "learning_rate": 9.994600600366344e-06, "loss": 0.8428, "step": 1234 }, { "epoch": 0.22206239323923402, "grad_norm": 1.5411536693572998, "learning_rate": 9.99457350669365e-06, "loss": 0.8807, "step": 1235 }, { "epoch": 0.22224220084509574, "grad_norm": 1.609501838684082, "learning_rate": 9.994546345251144e-06, "loss": 0.8286, "step": 1236 }, { "epoch": 0.22242200845095747, "grad_norm": 1.5899090766906738, "learning_rate": 9.994519116039202e-06, "loss": 0.8738, "step": 1237 }, { "epoch": 0.2226018160568192, "grad_norm": 1.6042814254760742, "learning_rate": 9.994491819058186e-06, "loss": 0.8997, "step": 1238 }, { "epoch": 0.22278162366268092, "grad_norm": 1.6078020334243774, "learning_rate": 9.994464454308468e-06, "loss": 0.8893, "step": 1239 }, { "epoch": 0.22296143126854265, "grad_norm": 1.692533016204834, "learning_rate": 9.994437021790424e-06, "loss": 0.8168, "step": 1240 }, { "epoch": 0.22314123887440437, "grad_norm": 1.612770676612854, "learning_rate": 9.99440952150442e-06, "loss": 0.8521, "step": 1241 }, { "epoch": 0.22332104648026613, "grad_norm": 1.65276038646698, "learning_rate": 9.994381953450835e-06, "loss": 0.8255, "step": 1242 }, { "epoch": 0.22350085408612785, "grad_norm": 1.5059032440185547, "learning_rate": 9.99435431763004e-06, "loss": 0.8047, "step": 1243 }, { "epoch": 0.22368066169198958, "grad_norm": 1.5415090322494507, "learning_rate": 9.994326614042408e-06, "loss": 0.8904, "step": 1244 }, { "epoch": 0.2238604692978513, "grad_norm": 1.5879424810409546, "learning_rate": 9.994298842688318e-06, "loss": 0.9091, "step": 1245 }, { "epoch": 0.22404027690371303, "grad_norm": 1.572386384010315, "learning_rate": 9.994271003568146e-06, "loss": 0.8207, "step": 1246 }, { "epoch": 0.22422008450957476, "grad_norm": 1.7026622295379639, "learning_rate": 9.99424309668227e-06, "loss": 0.8773, "step": 1247 }, { "epoch": 0.22439989211543648, "grad_norm": 1.6302940845489502, "learning_rate": 9.994215122031069e-06, "loss": 0.9246, "step": 1248 }, { "epoch": 0.2245796997212982, "grad_norm": 1.6091431379318237, "learning_rate": 9.994187079614922e-06, "loss": 0.7967, "step": 1249 }, { "epoch": 0.22475950732715994, "grad_norm": 1.5278217792510986, "learning_rate": 9.994158969434207e-06, "loss": 0.849, "step": 1250 }, { "epoch": 0.22493931493302166, "grad_norm": 1.6184066534042358, "learning_rate": 9.994130791489309e-06, "loss": 0.9032, "step": 1251 }, { "epoch": 0.2251191225388834, "grad_norm": 1.5587689876556396, "learning_rate": 9.994102545780608e-06, "loss": 0.8929, "step": 1252 }, { "epoch": 0.2252989301447451, "grad_norm": 1.5628466606140137, "learning_rate": 9.99407423230849e-06, "loss": 0.8577, "step": 1253 }, { "epoch": 0.22547873775060684, "grad_norm": 1.4630910158157349, "learning_rate": 9.994045851073338e-06, "loss": 0.8412, "step": 1254 }, { "epoch": 0.22565854535646857, "grad_norm": 1.528404951095581, "learning_rate": 9.994017402075535e-06, "loss": 1.1754, "step": 1255 }, { "epoch": 0.22583835296233032, "grad_norm": 1.3221665620803833, "learning_rate": 9.99398888531547e-06, "loss": 1.1686, "step": 1256 }, { "epoch": 0.22601816056819204, "grad_norm": 1.6099722385406494, "learning_rate": 9.993960300793527e-06, "loss": 0.8636, "step": 1257 }, { "epoch": 0.22619796817405377, "grad_norm": 1.6200709342956543, "learning_rate": 9.993931648510097e-06, "loss": 0.9604, "step": 1258 }, { "epoch": 0.2263777757799155, "grad_norm": 1.647566795349121, "learning_rate": 9.993902928465568e-06, "loss": 0.8909, "step": 1259 }, { "epoch": 0.22655758338577722, "grad_norm": 1.3549002408981323, "learning_rate": 9.993874140660329e-06, "loss": 1.1742, "step": 1260 }, { "epoch": 0.22673739099163895, "grad_norm": 1.5532987117767334, "learning_rate": 9.99384528509477e-06, "loss": 0.8682, "step": 1261 }, { "epoch": 0.22691719859750067, "grad_norm": 1.677850365638733, "learning_rate": 9.993816361769282e-06, "loss": 0.8676, "step": 1262 }, { "epoch": 0.2270970062033624, "grad_norm": 1.5291597843170166, "learning_rate": 9.993787370684257e-06, "loss": 0.876, "step": 1263 }, { "epoch": 0.22727681380922413, "grad_norm": 1.620069146156311, "learning_rate": 9.993758311840093e-06, "loss": 0.8903, "step": 1264 }, { "epoch": 0.22745662141508585, "grad_norm": 1.5968198776245117, "learning_rate": 9.993729185237181e-06, "loss": 0.8766, "step": 1265 }, { "epoch": 0.22763642902094758, "grad_norm": 1.2669708728790283, "learning_rate": 9.993699990875916e-06, "loss": 1.1273, "step": 1266 }, { "epoch": 0.2278162366268093, "grad_norm": 1.5142184495925903, "learning_rate": 9.993670728756695e-06, "loss": 0.898, "step": 1267 }, { "epoch": 0.22799604423267103, "grad_norm": 1.6084306240081787, "learning_rate": 9.993641398879911e-06, "loss": 0.8414, "step": 1268 }, { "epoch": 0.22817585183853276, "grad_norm": 1.583457589149475, "learning_rate": 9.99361200124597e-06, "loss": 0.8334, "step": 1269 }, { "epoch": 0.2283556594443945, "grad_norm": 1.1681764125823975, "learning_rate": 9.993582535855265e-06, "loss": 1.1821, "step": 1270 }, { "epoch": 0.22853546705025624, "grad_norm": 1.5214698314666748, "learning_rate": 9.993553002708197e-06, "loss": 0.8637, "step": 1271 }, { "epoch": 0.22871527465611796, "grad_norm": 1.1507858037948608, "learning_rate": 9.993523401805167e-06, "loss": 1.0876, "step": 1272 }, { "epoch": 0.2288950822619797, "grad_norm": 1.7407139539718628, "learning_rate": 9.993493733146577e-06, "loss": 0.8863, "step": 1273 }, { "epoch": 0.2290748898678414, "grad_norm": 1.643062710762024, "learning_rate": 9.993463996732828e-06, "loss": 0.8355, "step": 1274 }, { "epoch": 0.22925469747370314, "grad_norm": 1.1721699237823486, "learning_rate": 9.993434192564326e-06, "loss": 1.148, "step": 1275 }, { "epoch": 0.22943450507956487, "grad_norm": 1.6167521476745605, "learning_rate": 9.993404320641474e-06, "loss": 0.8987, "step": 1276 }, { "epoch": 0.2296143126854266, "grad_norm": 1.6452369689941406, "learning_rate": 9.993374380964676e-06, "loss": 0.8572, "step": 1277 }, { "epoch": 0.22979412029128832, "grad_norm": 1.4449687004089355, "learning_rate": 9.993344373534342e-06, "loss": 0.9547, "step": 1278 }, { "epoch": 0.22997392789715004, "grad_norm": 2.758622169494629, "learning_rate": 9.993314298350874e-06, "loss": 0.7911, "step": 1279 }, { "epoch": 0.23015373550301177, "grad_norm": 2.0881059169769287, "learning_rate": 9.993284155414684e-06, "loss": 0.9302, "step": 1280 }, { "epoch": 0.2303335431088735, "grad_norm": 1.592190146446228, "learning_rate": 9.99325394472618e-06, "loss": 0.9506, "step": 1281 }, { "epoch": 0.23051335071473522, "grad_norm": 1.7048077583312988, "learning_rate": 9.993223666285773e-06, "loss": 0.9176, "step": 1282 }, { "epoch": 0.23069315832059697, "grad_norm": 1.8975619077682495, "learning_rate": 9.993193320093871e-06, "loss": 0.8531, "step": 1283 }, { "epoch": 0.2308729659264587, "grad_norm": 1.9146668910980225, "learning_rate": 9.993162906150889e-06, "loss": 0.898, "step": 1284 }, { "epoch": 0.23105277353232043, "grad_norm": 1.6907196044921875, "learning_rate": 9.993132424457238e-06, "loss": 0.9045, "step": 1285 }, { "epoch": 0.23123258113818215, "grad_norm": 1.6301555633544922, "learning_rate": 9.993101875013329e-06, "loss": 0.8396, "step": 1286 }, { "epoch": 0.23141238874404388, "grad_norm": 1.670201063156128, "learning_rate": 9.993071257819582e-06, "loss": 0.8416, "step": 1287 }, { "epoch": 0.2315921963499056, "grad_norm": 1.6633896827697754, "learning_rate": 9.993040572876407e-06, "loss": 0.8854, "step": 1288 }, { "epoch": 0.23177200395576733, "grad_norm": 1.5613206624984741, "learning_rate": 9.993009820184226e-06, "loss": 0.9044, "step": 1289 }, { "epoch": 0.23195181156162906, "grad_norm": 1.3299496173858643, "learning_rate": 9.99297899974345e-06, "loss": 1.1285, "step": 1290 }, { "epoch": 0.23213161916749078, "grad_norm": 1.2371331453323364, "learning_rate": 9.992948111554504e-06, "loss": 1.146, "step": 1291 }, { "epoch": 0.2323114267733525, "grad_norm": 1.6364248991012573, "learning_rate": 9.992917155617801e-06, "loss": 0.8682, "step": 1292 }, { "epoch": 0.23249123437921423, "grad_norm": 1.7077888250350952, "learning_rate": 9.992886131933764e-06, "loss": 0.8425, "step": 1293 }, { "epoch": 0.23267104198507596, "grad_norm": 1.524035096168518, "learning_rate": 9.992855040502814e-06, "loss": 0.869, "step": 1294 }, { "epoch": 0.2328508495909377, "grad_norm": 1.5425270795822144, "learning_rate": 9.992823881325372e-06, "loss": 0.8745, "step": 1295 }, { "epoch": 0.2330306571967994, "grad_norm": 1.586506962776184, "learning_rate": 9.992792654401861e-06, "loss": 0.8552, "step": 1296 }, { "epoch": 0.23321046480266117, "grad_norm": 1.608437418937683, "learning_rate": 9.992761359732706e-06, "loss": 0.9374, "step": 1297 }, { "epoch": 0.2333902724085229, "grad_norm": 1.5223108530044556, "learning_rate": 9.992729997318331e-06, "loss": 0.9391, "step": 1298 }, { "epoch": 0.23357008001438462, "grad_norm": 1.5415623188018799, "learning_rate": 9.99269856715916e-06, "loss": 0.9107, "step": 1299 }, { "epoch": 0.23374988762024634, "grad_norm": 1.5850861072540283, "learning_rate": 9.99266706925562e-06, "loss": 0.8945, "step": 1300 }, { "epoch": 0.23392969522610807, "grad_norm": 1.8269555568695068, "learning_rate": 9.992635503608139e-06, "loss": 0.9192, "step": 1301 }, { "epoch": 0.2341095028319698, "grad_norm": 1.511799693107605, "learning_rate": 9.992603870217145e-06, "loss": 0.9111, "step": 1302 }, { "epoch": 0.23428931043783152, "grad_norm": 1.5179312229156494, "learning_rate": 9.99257216908307e-06, "loss": 0.9175, "step": 1303 }, { "epoch": 0.23446911804369325, "grad_norm": 1.5787969827651978, "learning_rate": 9.99254040020634e-06, "loss": 0.9584, "step": 1304 }, { "epoch": 0.23464892564955497, "grad_norm": 1.5668203830718994, "learning_rate": 9.992508563587386e-06, "loss": 0.8604, "step": 1305 }, { "epoch": 0.2348287332554167, "grad_norm": 1.5674363374710083, "learning_rate": 9.992476659226645e-06, "loss": 0.8781, "step": 1306 }, { "epoch": 0.23500854086127843, "grad_norm": 1.4807945489883423, "learning_rate": 9.992444687124543e-06, "loss": 1.1223, "step": 1307 }, { "epoch": 0.23518834846714015, "grad_norm": 1.614135980606079, "learning_rate": 9.99241264728152e-06, "loss": 0.8716, "step": 1308 }, { "epoch": 0.23536815607300188, "grad_norm": 1.2814626693725586, "learning_rate": 9.992380539698006e-06, "loss": 1.182, "step": 1309 }, { "epoch": 0.2355479636788636, "grad_norm": 1.811711311340332, "learning_rate": 9.99234836437444e-06, "loss": 0.8889, "step": 1310 }, { "epoch": 0.23572777128472536, "grad_norm": 1.7625306844711304, "learning_rate": 9.992316121311259e-06, "loss": 0.8938, "step": 1311 }, { "epoch": 0.23590757889058708, "grad_norm": 1.5758488178253174, "learning_rate": 9.992283810508896e-06, "loss": 0.84, "step": 1312 }, { "epoch": 0.2360873864964488, "grad_norm": 1.749176025390625, "learning_rate": 9.992251431967792e-06, "loss": 0.8263, "step": 1313 }, { "epoch": 0.23626719410231053, "grad_norm": 1.6391639709472656, "learning_rate": 9.992218985688388e-06, "loss": 0.9234, "step": 1314 }, { "epoch": 0.23644700170817226, "grad_norm": 1.6131716966629028, "learning_rate": 9.992186471671124e-06, "loss": 0.8843, "step": 1315 }, { "epoch": 0.236626809314034, "grad_norm": 1.6756064891815186, "learning_rate": 9.992153889916439e-06, "loss": 0.9401, "step": 1316 }, { "epoch": 0.2368066169198957, "grad_norm": 1.538582682609558, "learning_rate": 9.992121240424776e-06, "loss": 0.8491, "step": 1317 }, { "epoch": 0.23698642452575744, "grad_norm": 1.5896841287612915, "learning_rate": 9.992088523196577e-06, "loss": 0.9142, "step": 1318 }, { "epoch": 0.23716623213161916, "grad_norm": 1.611388087272644, "learning_rate": 9.99205573823229e-06, "loss": 0.8705, "step": 1319 }, { "epoch": 0.2373460397374809, "grad_norm": 1.5598453283309937, "learning_rate": 9.992022885532354e-06, "loss": 0.8737, "step": 1320 }, { "epoch": 0.23752584734334262, "grad_norm": 1.5209224224090576, "learning_rate": 9.991989965097217e-06, "loss": 0.8363, "step": 1321 }, { "epoch": 0.23770565494920434, "grad_norm": 1.5926804542541504, "learning_rate": 9.991956976927328e-06, "loss": 1.1385, "step": 1322 }, { "epoch": 0.23788546255506607, "grad_norm": 1.796204924583435, "learning_rate": 9.991923921023135e-06, "loss": 0.8634, "step": 1323 }, { "epoch": 0.2380652701609278, "grad_norm": 1.1445019245147705, "learning_rate": 9.991890797385081e-06, "loss": 1.1176, "step": 1324 }, { "epoch": 0.23824507776678955, "grad_norm": 1.5844619274139404, "learning_rate": 9.99185760601362e-06, "loss": 0.8628, "step": 1325 }, { "epoch": 0.23842488537265127, "grad_norm": 1.6608521938323975, "learning_rate": 9.991824346909203e-06, "loss": 0.8172, "step": 1326 }, { "epoch": 0.238604692978513, "grad_norm": 1.2437269687652588, "learning_rate": 9.991791020072277e-06, "loss": 1.1005, "step": 1327 }, { "epoch": 0.23878450058437473, "grad_norm": 1.5913655757904053, "learning_rate": 9.991757625503298e-06, "loss": 0.8189, "step": 1328 }, { "epoch": 0.23896430819023645, "grad_norm": 1.2946503162384033, "learning_rate": 9.991724163202717e-06, "loss": 1.137, "step": 1329 }, { "epoch": 0.23914411579609818, "grad_norm": 1.6306135654449463, "learning_rate": 9.99169063317099e-06, "loss": 0.902, "step": 1330 }, { "epoch": 0.2393239234019599, "grad_norm": 1.7114380598068237, "learning_rate": 9.991657035408571e-06, "loss": 0.883, "step": 1331 }, { "epoch": 0.23950373100782163, "grad_norm": 1.6081830263137817, "learning_rate": 9.991623369915914e-06, "loss": 0.8776, "step": 1332 }, { "epoch": 0.23968353861368336, "grad_norm": 1.7401447296142578, "learning_rate": 9.99158963669348e-06, "loss": 0.8244, "step": 1333 }, { "epoch": 0.23986334621954508, "grad_norm": 1.6068040132522583, "learning_rate": 9.991555835741723e-06, "loss": 0.8388, "step": 1334 }, { "epoch": 0.2400431538254068, "grad_norm": 1.5052939653396606, "learning_rate": 9.991521967061104e-06, "loss": 0.8686, "step": 1335 }, { "epoch": 0.24022296143126853, "grad_norm": 1.6262890100479126, "learning_rate": 9.99148803065208e-06, "loss": 0.8869, "step": 1336 }, { "epoch": 0.24040276903713026, "grad_norm": 1.514194369316101, "learning_rate": 9.991454026515113e-06, "loss": 0.8807, "step": 1337 }, { "epoch": 0.24058257664299199, "grad_norm": 1.5441219806671143, "learning_rate": 9.991419954650664e-06, "loss": 0.8347, "step": 1338 }, { "epoch": 0.24076238424885374, "grad_norm": 1.3290119171142578, "learning_rate": 9.991385815059198e-06, "loss": 1.1136, "step": 1339 }, { "epoch": 0.24094219185471547, "grad_norm": 1.1993136405944824, "learning_rate": 9.991351607741174e-06, "loss": 1.1636, "step": 1340 }, { "epoch": 0.2411219994605772, "grad_norm": 1.5635877847671509, "learning_rate": 9.991317332697059e-06, "loss": 0.8727, "step": 1341 }, { "epoch": 0.24130180706643892, "grad_norm": 1.8681178092956543, "learning_rate": 9.991282989927315e-06, "loss": 0.7837, "step": 1342 }, { "epoch": 0.24148161467230064, "grad_norm": 1.7893632650375366, "learning_rate": 9.991248579432413e-06, "loss": 0.8128, "step": 1343 }, { "epoch": 0.24166142227816237, "grad_norm": 1.8141591548919678, "learning_rate": 9.991214101212816e-06, "loss": 0.8936, "step": 1344 }, { "epoch": 0.2418412298840241, "grad_norm": 1.5450291633605957, "learning_rate": 9.99117955526899e-06, "loss": 0.805, "step": 1345 }, { "epoch": 0.24202103748988582, "grad_norm": 1.6465510129928589, "learning_rate": 9.99114494160141e-06, "loss": 0.8393, "step": 1346 }, { "epoch": 0.24220084509574755, "grad_norm": 1.8631948232650757, "learning_rate": 9.991110260210541e-06, "loss": 0.8797, "step": 1347 }, { "epoch": 0.24238065270160927, "grad_norm": 1.384964108467102, "learning_rate": 9.991075511096855e-06, "loss": 1.1392, "step": 1348 }, { "epoch": 0.242560460307471, "grad_norm": 1.5744853019714355, "learning_rate": 9.991040694260824e-06, "loss": 0.8354, "step": 1349 }, { "epoch": 0.24274026791333272, "grad_norm": 1.5756044387817383, "learning_rate": 9.991005809702918e-06, "loss": 0.9193, "step": 1350 }, { "epoch": 0.24292007551919445, "grad_norm": 1.5688469409942627, "learning_rate": 9.990970857423612e-06, "loss": 0.9029, "step": 1351 }, { "epoch": 0.24309988312505618, "grad_norm": 1.6088577508926392, "learning_rate": 9.99093583742338e-06, "loss": 0.8622, "step": 1352 }, { "epoch": 0.24327969073091793, "grad_norm": 1.8416039943695068, "learning_rate": 9.990900749702701e-06, "loss": 0.9496, "step": 1353 }, { "epoch": 0.24345949833677966, "grad_norm": 1.0160248279571533, "learning_rate": 9.990865594262045e-06, "loss": 1.135, "step": 1354 }, { "epoch": 0.24363930594264138, "grad_norm": 1.5667216777801514, "learning_rate": 9.990830371101892e-06, "loss": 0.8314, "step": 1355 }, { "epoch": 0.2438191135485031, "grad_norm": 1.5993884801864624, "learning_rate": 9.99079508022272e-06, "loss": 0.8587, "step": 1356 }, { "epoch": 0.24399892115436483, "grad_norm": 1.6580530405044556, "learning_rate": 9.990759721625005e-06, "loss": 0.8287, "step": 1357 }, { "epoch": 0.24417872876022656, "grad_norm": 1.6580229997634888, "learning_rate": 9.990724295309231e-06, "loss": 0.8548, "step": 1358 }, { "epoch": 0.24435853636608829, "grad_norm": 1.6292262077331543, "learning_rate": 9.990688801275876e-06, "loss": 0.8899, "step": 1359 }, { "epoch": 0.24453834397195, "grad_norm": 1.4766350984573364, "learning_rate": 9.990653239525424e-06, "loss": 0.8123, "step": 1360 }, { "epoch": 0.24471815157781174, "grad_norm": 1.6206316947937012, "learning_rate": 9.990617610058355e-06, "loss": 0.875, "step": 1361 }, { "epoch": 0.24489795918367346, "grad_norm": 1.1156014204025269, "learning_rate": 9.990581912875153e-06, "loss": 1.1618, "step": 1362 }, { "epoch": 0.2450777667895352, "grad_norm": 1.7162522077560425, "learning_rate": 9.990546147976303e-06, "loss": 0.8197, "step": 1363 }, { "epoch": 0.24525757439539692, "grad_norm": 1.572569727897644, "learning_rate": 9.99051031536229e-06, "loss": 0.9453, "step": 1364 }, { "epoch": 0.24543738200125864, "grad_norm": 1.5736876726150513, "learning_rate": 9.990474415033602e-06, "loss": 0.791, "step": 1365 }, { "epoch": 0.24561718960712037, "grad_norm": 1.7357498407363892, "learning_rate": 9.990438446990722e-06, "loss": 0.9446, "step": 1366 }, { "epoch": 0.24579699721298212, "grad_norm": 1.5730262994766235, "learning_rate": 9.99040241123414e-06, "loss": 0.8522, "step": 1367 }, { "epoch": 0.24597680481884385, "grad_norm": 1.6663991212844849, "learning_rate": 9.990366307764348e-06, "loss": 0.8309, "step": 1368 }, { "epoch": 0.24615661242470557, "grad_norm": 1.1989023685455322, "learning_rate": 9.990330136581832e-06, "loss": 1.1267, "step": 1369 }, { "epoch": 0.2463364200305673, "grad_norm": 1.5644192695617676, "learning_rate": 9.990293897687085e-06, "loss": 0.8633, "step": 1370 }, { "epoch": 0.24651622763642903, "grad_norm": 1.5944011211395264, "learning_rate": 9.990257591080596e-06, "loss": 0.8884, "step": 1371 }, { "epoch": 0.24669603524229075, "grad_norm": 1.6410040855407715, "learning_rate": 9.99022121676286e-06, "loss": 0.8075, "step": 1372 }, { "epoch": 0.24687584284815248, "grad_norm": 1.5524935722351074, "learning_rate": 9.990184774734371e-06, "loss": 0.8765, "step": 1373 }, { "epoch": 0.2470556504540142, "grad_norm": 1.122062087059021, "learning_rate": 9.99014826499562e-06, "loss": 1.1301, "step": 1374 }, { "epoch": 0.24723545805987593, "grad_norm": 1.7849801778793335, "learning_rate": 9.990111687547105e-06, "loss": 0.9197, "step": 1375 }, { "epoch": 0.24741526566573765, "grad_norm": 1.598501205444336, "learning_rate": 9.990075042389324e-06, "loss": 0.8705, "step": 1376 }, { "epoch": 0.24759507327159938, "grad_norm": 1.890504240989685, "learning_rate": 9.990038329522774e-06, "loss": 0.8986, "step": 1377 }, { "epoch": 0.2477748808774611, "grad_norm": 1.5714342594146729, "learning_rate": 9.990001548947947e-06, "loss": 0.929, "step": 1378 }, { "epoch": 0.24795468848332283, "grad_norm": 1.6302326917648315, "learning_rate": 9.989964700665348e-06, "loss": 0.8029, "step": 1379 }, { "epoch": 0.2481344960891846, "grad_norm": 1.1217460632324219, "learning_rate": 9.989927784675477e-06, "loss": 1.122, "step": 1380 }, { "epoch": 0.2483143036950463, "grad_norm": 1.5405900478363037, "learning_rate": 9.989890800978832e-06, "loss": 0.8358, "step": 1381 }, { "epoch": 0.24849411130090804, "grad_norm": 1.6721354722976685, "learning_rate": 9.989853749575917e-06, "loss": 0.8506, "step": 1382 }, { "epoch": 0.24867391890676976, "grad_norm": 1.533176064491272, "learning_rate": 9.989816630467235e-06, "loss": 0.9175, "step": 1383 }, { "epoch": 0.2488537265126315, "grad_norm": 1.6771154403686523, "learning_rate": 9.989779443653287e-06, "loss": 0.9067, "step": 1384 }, { "epoch": 0.24903353411849322, "grad_norm": 1.6915466785430908, "learning_rate": 9.98974218913458e-06, "loss": 0.881, "step": 1385 }, { "epoch": 0.24921334172435494, "grad_norm": 1.1209063529968262, "learning_rate": 9.989704866911617e-06, "loss": 1.0927, "step": 1386 }, { "epoch": 0.24939314933021667, "grad_norm": 1.660820484161377, "learning_rate": 9.98966747698491e-06, "loss": 0.9038, "step": 1387 }, { "epoch": 0.2495729569360784, "grad_norm": 1.540689468383789, "learning_rate": 9.989630019354959e-06, "loss": 0.8903, "step": 1388 }, { "epoch": 0.24975276454194012, "grad_norm": 1.7129168510437012, "learning_rate": 9.989592494022278e-06, "loss": 0.8977, "step": 1389 }, { "epoch": 0.24993257214780185, "grad_norm": 1.0917071104049683, "learning_rate": 9.989554900987371e-06, "loss": 1.0814, "step": 1390 }, { "epoch": 0.25011237975366357, "grad_norm": 1.6526626348495483, "learning_rate": 9.989517240250753e-06, "loss": 0.8322, "step": 1391 }, { "epoch": 0.2502921873595253, "grad_norm": 1.5396935939788818, "learning_rate": 9.989479511812934e-06, "loss": 0.8544, "step": 1392 }, { "epoch": 0.250471994965387, "grad_norm": 1.6321653127670288, "learning_rate": 9.989441715674422e-06, "loss": 0.8513, "step": 1393 }, { "epoch": 0.25065180257124875, "grad_norm": 1.623604416847229, "learning_rate": 9.989403851835735e-06, "loss": 0.915, "step": 1394 }, { "epoch": 0.2508316101771105, "grad_norm": 1.607668399810791, "learning_rate": 9.989365920297384e-06, "loss": 0.9079, "step": 1395 }, { "epoch": 0.2510114177829722, "grad_norm": 1.6774439811706543, "learning_rate": 9.989327921059883e-06, "loss": 0.8554, "step": 1396 }, { "epoch": 0.2511912253888339, "grad_norm": 1.6935302019119263, "learning_rate": 9.98928985412375e-06, "loss": 0.84, "step": 1397 }, { "epoch": 0.25137103299469565, "grad_norm": 1.7040024995803833, "learning_rate": 9.989251719489501e-06, "loss": 0.8405, "step": 1398 }, { "epoch": 0.2515508406005574, "grad_norm": 1.4887977838516235, "learning_rate": 9.989213517157651e-06, "loss": 0.8284, "step": 1399 }, { "epoch": 0.2517306482064191, "grad_norm": 1.7197929620742798, "learning_rate": 9.989175247128722e-06, "loss": 0.9105, "step": 1400 }, { "epoch": 0.2519104558122809, "grad_norm": 1.4927877187728882, "learning_rate": 9.98913690940323e-06, "loss": 0.8969, "step": 1401 }, { "epoch": 0.2520902634181426, "grad_norm": 1.7125710248947144, "learning_rate": 9.989098503981695e-06, "loss": 0.8362, "step": 1402 }, { "epoch": 0.25227007102400434, "grad_norm": 1.754544734954834, "learning_rate": 9.989060030864643e-06, "loss": 0.9154, "step": 1403 }, { "epoch": 0.25244987862986606, "grad_norm": 1.6648731231689453, "learning_rate": 9.98902149005259e-06, "loss": 0.8542, "step": 1404 }, { "epoch": 0.2526296862357278, "grad_norm": 1.5901998281478882, "learning_rate": 9.988982881546063e-06, "loss": 0.8818, "step": 1405 }, { "epoch": 0.2528094938415895, "grad_norm": 1.731666088104248, "learning_rate": 9.988944205345585e-06, "loss": 0.9218, "step": 1406 }, { "epoch": 0.25298930144745124, "grad_norm": 1.537358045578003, "learning_rate": 9.98890546145168e-06, "loss": 0.9271, "step": 1407 }, { "epoch": 0.25316910905331297, "grad_norm": 1.48934006690979, "learning_rate": 9.988866649864874e-06, "loss": 0.8753, "step": 1408 }, { "epoch": 0.2533489166591747, "grad_norm": 2.0137786865234375, "learning_rate": 9.988827770585693e-06, "loss": 0.918, "step": 1409 }, { "epoch": 0.2535287242650364, "grad_norm": 1.6311794519424438, "learning_rate": 9.988788823614665e-06, "loss": 0.8641, "step": 1410 }, { "epoch": 0.25370853187089815, "grad_norm": 1.5396164655685425, "learning_rate": 9.98874980895232e-06, "loss": 0.8745, "step": 1411 }, { "epoch": 0.25388833947675987, "grad_norm": 1.622248649597168, "learning_rate": 9.988710726599184e-06, "loss": 0.8361, "step": 1412 }, { "epoch": 0.2540681470826216, "grad_norm": 1.8035643100738525, "learning_rate": 9.988671576555792e-06, "loss": 0.885, "step": 1413 }, { "epoch": 0.2542479546884833, "grad_norm": 1.6226240396499634, "learning_rate": 9.98863235882267e-06, "loss": 0.8305, "step": 1414 }, { "epoch": 0.25442776229434505, "grad_norm": 1.6568849086761475, "learning_rate": 9.988593073400354e-06, "loss": 0.9245, "step": 1415 }, { "epoch": 0.2546075699002068, "grad_norm": 1.5160715579986572, "learning_rate": 9.988553720289375e-06, "loss": 0.9121, "step": 1416 }, { "epoch": 0.2547873775060685, "grad_norm": 1.7230138778686523, "learning_rate": 9.988514299490268e-06, "loss": 0.933, "step": 1417 }, { "epoch": 0.25496718511193023, "grad_norm": 1.6250388622283936, "learning_rate": 9.988474811003567e-06, "loss": 0.8237, "step": 1418 }, { "epoch": 0.25514699271779195, "grad_norm": 1.6860772371292114, "learning_rate": 9.988435254829811e-06, "loss": 0.875, "step": 1419 }, { "epoch": 0.2553268003236537, "grad_norm": 1.6663554906845093, "learning_rate": 9.988395630969532e-06, "loss": 0.9389, "step": 1420 }, { "epoch": 0.2555066079295154, "grad_norm": 1.543130874633789, "learning_rate": 9.98835593942327e-06, "loss": 0.8823, "step": 1421 }, { "epoch": 0.25568641553537713, "grad_norm": 1.6279493570327759, "learning_rate": 9.988316180191563e-06, "loss": 0.8845, "step": 1422 }, { "epoch": 0.25586622314123886, "grad_norm": 1.607438325881958, "learning_rate": 9.98827635327495e-06, "loss": 0.8439, "step": 1423 }, { "epoch": 0.2560460307471006, "grad_norm": 1.5300372838974, "learning_rate": 9.988236458673974e-06, "loss": 0.8387, "step": 1424 }, { "epoch": 0.2562258383529623, "grad_norm": 1.5046820640563965, "learning_rate": 9.988196496389174e-06, "loss": 0.9232, "step": 1425 }, { "epoch": 0.25640564595882404, "grad_norm": 1.6434831619262695, "learning_rate": 9.988156466421091e-06, "loss": 0.8286, "step": 1426 }, { "epoch": 0.25658545356468576, "grad_norm": 1.377209186553955, "learning_rate": 9.988116368770272e-06, "loss": 1.1556, "step": 1427 }, { "epoch": 0.2567652611705475, "grad_norm": 1.1675206422805786, "learning_rate": 9.988076203437257e-06, "loss": 1.1347, "step": 1428 }, { "epoch": 0.25694506877640927, "grad_norm": 1.7112553119659424, "learning_rate": 9.988035970422595e-06, "loss": 0.9004, "step": 1429 }, { "epoch": 0.257124876382271, "grad_norm": 0.9629443287849426, "learning_rate": 9.98799566972683e-06, "loss": 1.1094, "step": 1430 }, { "epoch": 0.2573046839881327, "grad_norm": 1.5229110717773438, "learning_rate": 9.987955301350508e-06, "loss": 0.8491, "step": 1431 }, { "epoch": 0.25748449159399445, "grad_norm": 1.6472736597061157, "learning_rate": 9.987914865294178e-06, "loss": 0.8419, "step": 1432 }, { "epoch": 0.2576642991998562, "grad_norm": 1.6428245306015015, "learning_rate": 9.987874361558385e-06, "loss": 0.8507, "step": 1433 }, { "epoch": 0.2578441068057179, "grad_norm": 1.7953604459762573, "learning_rate": 9.987833790143685e-06, "loss": 0.8612, "step": 1434 }, { "epoch": 0.2580239144115796, "grad_norm": 1.5838286876678467, "learning_rate": 9.987793151050623e-06, "loss": 0.8901, "step": 1435 }, { "epoch": 0.25820372201744135, "grad_norm": 1.2226662635803223, "learning_rate": 9.987752444279755e-06, "loss": 1.0926, "step": 1436 }, { "epoch": 0.2583835296233031, "grad_norm": 1.5202949047088623, "learning_rate": 9.98771166983163e-06, "loss": 0.8653, "step": 1437 }, { "epoch": 0.2585633372291648, "grad_norm": 1.560626745223999, "learning_rate": 9.987670827706802e-06, "loss": 0.8855, "step": 1438 }, { "epoch": 0.25874314483502653, "grad_norm": 1.5495821237564087, "learning_rate": 9.987629917905825e-06, "loss": 0.8348, "step": 1439 }, { "epoch": 0.25892295244088825, "grad_norm": 1.548014521598816, "learning_rate": 9.987588940429254e-06, "loss": 0.7897, "step": 1440 }, { "epoch": 0.25910276004675, "grad_norm": 1.5427016019821167, "learning_rate": 9.987547895277648e-06, "loss": 0.8989, "step": 1441 }, { "epoch": 0.2592825676526117, "grad_norm": 1.5013127326965332, "learning_rate": 9.987506782451559e-06, "loss": 0.8662, "step": 1442 }, { "epoch": 0.25946237525847343, "grad_norm": 1.5906575918197632, "learning_rate": 9.987465601951546e-06, "loss": 0.8049, "step": 1443 }, { "epoch": 0.25964218286433516, "grad_norm": 1.594746708869934, "learning_rate": 9.987424353778172e-06, "loss": 0.9059, "step": 1444 }, { "epoch": 0.2598219904701969, "grad_norm": 1.5893824100494385, "learning_rate": 9.987383037931993e-06, "loss": 0.81, "step": 1445 }, { "epoch": 0.2600017980760586, "grad_norm": 1.5131349563598633, "learning_rate": 9.987341654413571e-06, "loss": 0.8749, "step": 1446 }, { "epoch": 0.26018160568192034, "grad_norm": 1.5178505182266235, "learning_rate": 9.987300203223465e-06, "loss": 0.8882, "step": 1447 }, { "epoch": 0.26036141328778206, "grad_norm": 1.7165395021438599, "learning_rate": 9.98725868436224e-06, "loss": 0.8928, "step": 1448 }, { "epoch": 0.2605412208936438, "grad_norm": 1.5361592769622803, "learning_rate": 9.987217097830459e-06, "loss": 0.8776, "step": 1449 }, { "epoch": 0.2607210284995055, "grad_norm": 1.5802643299102783, "learning_rate": 9.987175443628685e-06, "loss": 0.8145, "step": 1450 }, { "epoch": 0.26090083610536724, "grad_norm": 1.602022409439087, "learning_rate": 9.987133721757484e-06, "loss": 0.85, "step": 1451 }, { "epoch": 0.26108064371122897, "grad_norm": 1.760839819908142, "learning_rate": 9.987091932217423e-06, "loss": 0.871, "step": 1452 }, { "epoch": 0.2612604513170907, "grad_norm": 1.5879604816436768, "learning_rate": 9.987050075009068e-06, "loss": 0.9336, "step": 1453 }, { "epoch": 0.2614402589229524, "grad_norm": 1.5720462799072266, "learning_rate": 9.987008150132988e-06, "loss": 0.8823, "step": 1454 }, { "epoch": 0.26162006652881414, "grad_norm": 1.7439253330230713, "learning_rate": 9.986966157589751e-06, "loss": 0.8287, "step": 1455 }, { "epoch": 0.26179987413467587, "grad_norm": 1.690609097480774, "learning_rate": 9.986924097379924e-06, "loss": 0.9185, "step": 1456 }, { "epoch": 0.26197968174053765, "grad_norm": 1.6500568389892578, "learning_rate": 9.986881969504083e-06, "loss": 0.861, "step": 1457 }, { "epoch": 0.2621594893463994, "grad_norm": 1.333844780921936, "learning_rate": 9.986839773962797e-06, "loss": 1.1551, "step": 1458 }, { "epoch": 0.2623392969522611, "grad_norm": 1.5615646839141846, "learning_rate": 9.986797510756638e-06, "loss": 0.8196, "step": 1459 }, { "epoch": 0.26251910455812283, "grad_norm": 1.6161586046218872, "learning_rate": 9.98675517988618e-06, "loss": 0.8916, "step": 1460 }, { "epoch": 0.26269891216398455, "grad_norm": 1.065989375114441, "learning_rate": 9.986712781352e-06, "loss": 1.0973, "step": 1461 }, { "epoch": 0.2628787197698463, "grad_norm": 1.5181407928466797, "learning_rate": 9.986670315154668e-06, "loss": 0.8506, "step": 1462 }, { "epoch": 0.263058527375708, "grad_norm": 1.762992262840271, "learning_rate": 9.986627781294765e-06, "loss": 0.8619, "step": 1463 }, { "epoch": 0.26323833498156973, "grad_norm": 1.6718577146530151, "learning_rate": 9.986585179772864e-06, "loss": 0.8786, "step": 1464 }, { "epoch": 0.26341814258743146, "grad_norm": 1.5820645093917847, "learning_rate": 9.986542510589546e-06, "loss": 0.954, "step": 1465 }, { "epoch": 0.2635979501932932, "grad_norm": 1.567545771598816, "learning_rate": 9.986499773745389e-06, "loss": 0.7796, "step": 1466 }, { "epoch": 0.2637777577991549, "grad_norm": 1.6317651271820068, "learning_rate": 9.986456969240973e-06, "loss": 0.8727, "step": 1467 }, { "epoch": 0.26395756540501664, "grad_norm": 1.5343884229660034, "learning_rate": 9.98641409707688e-06, "loss": 0.8595, "step": 1468 }, { "epoch": 0.26413737301087836, "grad_norm": 1.606539011001587, "learning_rate": 9.98637115725369e-06, "loss": 0.788, "step": 1469 }, { "epoch": 0.2643171806167401, "grad_norm": 1.5534915924072266, "learning_rate": 9.986328149771987e-06, "loss": 0.8348, "step": 1470 }, { "epoch": 0.2644969882226018, "grad_norm": 1.5341843366622925, "learning_rate": 9.986285074632351e-06, "loss": 0.9409, "step": 1471 }, { "epoch": 0.26467679582846354, "grad_norm": 1.63602614402771, "learning_rate": 9.986241931835372e-06, "loss": 0.8607, "step": 1472 }, { "epoch": 0.26485660343432527, "grad_norm": 1.5307362079620361, "learning_rate": 9.98619872138163e-06, "loss": 0.8799, "step": 1473 }, { "epoch": 0.265036411040187, "grad_norm": 1.4995912313461304, "learning_rate": 9.986155443271716e-06, "loss": 0.834, "step": 1474 }, { "epoch": 0.2652162186460487, "grad_norm": 1.6552239656448364, "learning_rate": 9.986112097506215e-06, "loss": 0.8514, "step": 1475 }, { "epoch": 0.26539602625191044, "grad_norm": 1.5329868793487549, "learning_rate": 9.986068684085716e-06, "loss": 0.8287, "step": 1476 }, { "epoch": 0.26557583385777217, "grad_norm": 1.85758638381958, "learning_rate": 9.986025203010806e-06, "loss": 0.8475, "step": 1477 }, { "epoch": 0.2657556414636339, "grad_norm": 1.5225491523742676, "learning_rate": 9.985981654282078e-06, "loss": 0.8046, "step": 1478 }, { "epoch": 0.2659354490694956, "grad_norm": 1.679423451423645, "learning_rate": 9.985938037900118e-06, "loss": 0.8529, "step": 1479 }, { "epoch": 0.26611525667535735, "grad_norm": 1.796884536743164, "learning_rate": 9.985894353865524e-06, "loss": 0.9436, "step": 1480 }, { "epoch": 0.2662950642812191, "grad_norm": 1.530605673789978, "learning_rate": 9.985850602178884e-06, "loss": 0.8695, "step": 1481 }, { "epoch": 0.2664748718870808, "grad_norm": 1.2124789953231812, "learning_rate": 9.985806782840794e-06, "loss": 1.1735, "step": 1482 }, { "epoch": 0.2666546794929425, "grad_norm": 1.600229024887085, "learning_rate": 9.98576289585185e-06, "loss": 0.8183, "step": 1483 }, { "epoch": 0.2668344870988043, "grad_norm": 1.5953645706176758, "learning_rate": 9.985718941212642e-06, "loss": 0.8521, "step": 1484 }, { "epoch": 0.26701429470466603, "grad_norm": 1.5893964767456055, "learning_rate": 9.985674918923773e-06, "loss": 0.8391, "step": 1485 }, { "epoch": 0.26719410231052776, "grad_norm": 1.6738702058792114, "learning_rate": 9.985630828985835e-06, "loss": 0.9283, "step": 1486 }, { "epoch": 0.2673739099163895, "grad_norm": 1.6290979385375977, "learning_rate": 9.98558667139943e-06, "loss": 0.8757, "step": 1487 }, { "epoch": 0.2675537175222512, "grad_norm": 1.4837974309921265, "learning_rate": 9.985542446165155e-06, "loss": 0.8365, "step": 1488 }, { "epoch": 0.26773352512811294, "grad_norm": 1.4567681550979614, "learning_rate": 9.985498153283611e-06, "loss": 0.8517, "step": 1489 }, { "epoch": 0.26791333273397466, "grad_norm": 1.753299593925476, "learning_rate": 9.985453792755397e-06, "loss": 0.8679, "step": 1490 }, { "epoch": 0.2680931403398364, "grad_norm": 1.6458921432495117, "learning_rate": 9.985409364581118e-06, "loss": 0.794, "step": 1491 }, { "epoch": 0.2682729479456981, "grad_norm": 1.7698694467544556, "learning_rate": 9.985364868761376e-06, "loss": 0.8175, "step": 1492 }, { "epoch": 0.26845275555155984, "grad_norm": 1.5443434715270996, "learning_rate": 9.985320305296773e-06, "loss": 0.7853, "step": 1493 }, { "epoch": 0.26863256315742157, "grad_norm": 1.681062936782837, "learning_rate": 9.985275674187916e-06, "loss": 0.8643, "step": 1494 }, { "epoch": 0.2688123707632833, "grad_norm": 1.4945480823516846, "learning_rate": 9.98523097543541e-06, "loss": 0.8586, "step": 1495 }, { "epoch": 0.268992178369145, "grad_norm": 1.612076997756958, "learning_rate": 9.98518620903986e-06, "loss": 0.7815, "step": 1496 }, { "epoch": 0.26917198597500674, "grad_norm": 1.5866293907165527, "learning_rate": 9.985141375001875e-06, "loss": 0.8528, "step": 1497 }, { "epoch": 0.26935179358086847, "grad_norm": 1.5282317399978638, "learning_rate": 9.985096473322061e-06, "loss": 0.8069, "step": 1498 }, { "epoch": 0.2695316011867302, "grad_norm": 1.6329461336135864, "learning_rate": 9.98505150400103e-06, "loss": 0.8992, "step": 1499 }, { "epoch": 0.2697114087925919, "grad_norm": 1.459367275238037, "learning_rate": 9.985006467039391e-06, "loss": 0.8466, "step": 1500 }, { "epoch": 0.2697114087925919, "eval_loss": 0.8915850520133972, "eval_runtime": 148.5936, "eval_samples_per_second": 96.787, "eval_steps_per_second": 1.514, "step": 1500 }, { "epoch": 0.26989121639845365, "grad_norm": 1.5155142545700073, "learning_rate": 9.984961362437756e-06, "loss": 0.871, "step": 1501 }, { "epoch": 0.2700710240043154, "grad_norm": 1.545442819595337, "learning_rate": 9.984916190196736e-06, "loss": 0.8347, "step": 1502 }, { "epoch": 0.2702508316101771, "grad_norm": 1.5970532894134521, "learning_rate": 9.984870950316944e-06, "loss": 0.8596, "step": 1503 }, { "epoch": 0.2704306392160388, "grad_norm": 1.2803349494934082, "learning_rate": 9.984825642798994e-06, "loss": 1.174, "step": 1504 }, { "epoch": 0.27061044682190055, "grad_norm": 1.67231023311615, "learning_rate": 9.9847802676435e-06, "loss": 0.8469, "step": 1505 }, { "epoch": 0.2707902544277623, "grad_norm": 1.6699795722961426, "learning_rate": 9.98473482485108e-06, "loss": 0.8714, "step": 1506 }, { "epoch": 0.270970062033624, "grad_norm": 1.6775031089782715, "learning_rate": 9.98468931442235e-06, "loss": 0.8603, "step": 1507 }, { "epoch": 0.27114986963948573, "grad_norm": 1.0760583877563477, "learning_rate": 9.984643736357923e-06, "loss": 1.1533, "step": 1508 }, { "epoch": 0.27132967724534746, "grad_norm": 1.562222957611084, "learning_rate": 9.984598090658425e-06, "loss": 0.8648, "step": 1509 }, { "epoch": 0.2715094848512092, "grad_norm": 1.597350835800171, "learning_rate": 9.984552377324468e-06, "loss": 0.8264, "step": 1510 }, { "epoch": 0.2716892924570709, "grad_norm": 1.573202133178711, "learning_rate": 9.984506596356678e-06, "loss": 0.881, "step": 1511 }, { "epoch": 0.2718691000629327, "grad_norm": 1.6132471561431885, "learning_rate": 9.984460747755673e-06, "loss": 0.8883, "step": 1512 }, { "epoch": 0.2720489076687944, "grad_norm": 1.7962706089019775, "learning_rate": 9.984414831522075e-06, "loss": 0.8831, "step": 1513 }, { "epoch": 0.27222871527465614, "grad_norm": 1.7310529947280884, "learning_rate": 9.984368847656509e-06, "loss": 0.8201, "step": 1514 }, { "epoch": 0.27240852288051787, "grad_norm": 1.6619594097137451, "learning_rate": 9.984322796159598e-06, "loss": 0.7971, "step": 1515 }, { "epoch": 0.2725883304863796, "grad_norm": 1.5625663995742798, "learning_rate": 9.984276677031966e-06, "loss": 0.7885, "step": 1516 }, { "epoch": 0.2727681380922413, "grad_norm": 1.5455453395843506, "learning_rate": 9.98423049027424e-06, "loss": 0.8601, "step": 1517 }, { "epoch": 0.27294794569810304, "grad_norm": 1.5523873567581177, "learning_rate": 9.984184235887047e-06, "loss": 0.8757, "step": 1518 }, { "epoch": 0.27312775330396477, "grad_norm": 1.586276888847351, "learning_rate": 9.984137913871012e-06, "loss": 0.7727, "step": 1519 }, { "epoch": 0.2733075609098265, "grad_norm": 1.560213327407837, "learning_rate": 9.984091524226767e-06, "loss": 0.8651, "step": 1520 }, { "epoch": 0.2734873685156882, "grad_norm": 1.6431382894515991, "learning_rate": 9.98404506695494e-06, "loss": 0.8637, "step": 1521 }, { "epoch": 0.27366717612154995, "grad_norm": 1.634132981300354, "learning_rate": 9.983998542056159e-06, "loss": 0.87, "step": 1522 }, { "epoch": 0.2738469837274117, "grad_norm": 1.5469385385513306, "learning_rate": 9.983951949531058e-06, "loss": 0.9154, "step": 1523 }, { "epoch": 0.2740267913332734, "grad_norm": 1.549836277961731, "learning_rate": 9.983905289380271e-06, "loss": 0.7998, "step": 1524 }, { "epoch": 0.2742065989391351, "grad_norm": 1.6975895166397095, "learning_rate": 9.983858561604428e-06, "loss": 0.8639, "step": 1525 }, { "epoch": 0.27438640654499685, "grad_norm": 1.4888554811477661, "learning_rate": 9.983811766204163e-06, "loss": 0.8152, "step": 1526 }, { "epoch": 0.2745662141508586, "grad_norm": 1.6170976161956787, "learning_rate": 9.983764903180113e-06, "loss": 0.9359, "step": 1527 }, { "epoch": 0.2747460217567203, "grad_norm": 1.5813602209091187, "learning_rate": 9.983717972532912e-06, "loss": 0.8818, "step": 1528 }, { "epoch": 0.27492582936258203, "grad_norm": 1.6091432571411133, "learning_rate": 9.983670974263195e-06, "loss": 0.8478, "step": 1529 }, { "epoch": 0.27510563696844376, "grad_norm": 1.4763249158859253, "learning_rate": 9.983623908371604e-06, "loss": 0.8417, "step": 1530 }, { "epoch": 0.2752854445743055, "grad_norm": 1.5369130373001099, "learning_rate": 9.983576774858776e-06, "loss": 0.9121, "step": 1531 }, { "epoch": 0.2754652521801672, "grad_norm": 1.2022814750671387, "learning_rate": 9.98352957372535e-06, "loss": 1.1318, "step": 1532 }, { "epoch": 0.27564505978602893, "grad_norm": 1.6223880052566528, "learning_rate": 9.983482304971969e-06, "loss": 0.9057, "step": 1533 }, { "epoch": 0.27582486739189066, "grad_norm": 1.5695288181304932, "learning_rate": 9.98343496859927e-06, "loss": 0.7983, "step": 1534 }, { "epoch": 0.2760046749977524, "grad_norm": 1.0287377834320068, "learning_rate": 9.983387564607896e-06, "loss": 1.1655, "step": 1535 }, { "epoch": 0.2761844826036141, "grad_norm": 1.37147855758667, "learning_rate": 9.983340092998494e-06, "loss": 0.826, "step": 1536 }, { "epoch": 0.27636429020947584, "grad_norm": 1.7303791046142578, "learning_rate": 9.983292553771706e-06, "loss": 0.8833, "step": 1537 }, { "epoch": 0.27654409781533756, "grad_norm": 1.113152027130127, "learning_rate": 9.983244946928176e-06, "loss": 1.094, "step": 1538 }, { "epoch": 0.2767239054211993, "grad_norm": 1.4458116292953491, "learning_rate": 9.98319727246855e-06, "loss": 0.9018, "step": 1539 }, { "epoch": 0.27690371302706107, "grad_norm": 1.5760384798049927, "learning_rate": 9.983149530393477e-06, "loss": 0.8406, "step": 1540 }, { "epoch": 0.2770835206329228, "grad_norm": 1.5521490573883057, "learning_rate": 9.983101720703601e-06, "loss": 0.9188, "step": 1541 }, { "epoch": 0.2772633282387845, "grad_norm": 1.480575442314148, "learning_rate": 9.983053843399575e-06, "loss": 0.8567, "step": 1542 }, { "epoch": 0.27744313584464625, "grad_norm": 1.619966745376587, "learning_rate": 9.983005898482048e-06, "loss": 0.8118, "step": 1543 }, { "epoch": 0.277622943450508, "grad_norm": 1.5054521560668945, "learning_rate": 9.982957885951668e-06, "loss": 0.7955, "step": 1544 }, { "epoch": 0.2778027510563697, "grad_norm": 1.5132185220718384, "learning_rate": 9.982909805809087e-06, "loss": 0.8747, "step": 1545 }, { "epoch": 0.2779825586622314, "grad_norm": 1.4096152782440186, "learning_rate": 9.982861658054959e-06, "loss": 0.867, "step": 1546 }, { "epoch": 0.27816236626809315, "grad_norm": 1.5286799669265747, "learning_rate": 9.982813442689936e-06, "loss": 0.913, "step": 1547 }, { "epoch": 0.2783421738739549, "grad_norm": 1.5395317077636719, "learning_rate": 9.982765159714674e-06, "loss": 0.7658, "step": 1548 }, { "epoch": 0.2785219814798166, "grad_norm": 1.7257611751556396, "learning_rate": 9.982716809129826e-06, "loss": 0.8081, "step": 1549 }, { "epoch": 0.27870178908567833, "grad_norm": 1.676979422569275, "learning_rate": 9.982668390936049e-06, "loss": 0.8611, "step": 1550 }, { "epoch": 0.27888159669154006, "grad_norm": 1.5274337530136108, "learning_rate": 9.982619905133999e-06, "loss": 0.8107, "step": 1551 }, { "epoch": 0.2790614042974018, "grad_norm": 1.4648735523223877, "learning_rate": 9.982571351724337e-06, "loss": 0.9022, "step": 1552 }, { "epoch": 0.2792412119032635, "grad_norm": 1.159912347793579, "learning_rate": 9.982522730707717e-06, "loss": 1.1243, "step": 1553 }, { "epoch": 0.27942101950912523, "grad_norm": 1.4815442562103271, "learning_rate": 9.982474042084802e-06, "loss": 0.852, "step": 1554 }, { "epoch": 0.27960082711498696, "grad_norm": 1.4336612224578857, "learning_rate": 9.982425285856253e-06, "loss": 0.9107, "step": 1555 }, { "epoch": 0.2797806347208487, "grad_norm": 1.5429848432540894, "learning_rate": 9.98237646202273e-06, "loss": 0.8299, "step": 1556 }, { "epoch": 0.2799604423267104, "grad_norm": 1.110090732574463, "learning_rate": 9.982327570584895e-06, "loss": 1.1301, "step": 1557 }, { "epoch": 0.28014024993257214, "grad_norm": 1.5161784887313843, "learning_rate": 9.982278611543415e-06, "loss": 0.8, "step": 1558 }, { "epoch": 0.28032005753843386, "grad_norm": 1.571226716041565, "learning_rate": 9.982229584898949e-06, "loss": 0.9292, "step": 1559 }, { "epoch": 0.2804998651442956, "grad_norm": 1.482365608215332, "learning_rate": 9.982180490652165e-06, "loss": 0.8387, "step": 1560 }, { "epoch": 0.2806796727501573, "grad_norm": 1.0518163442611694, "learning_rate": 9.98213132880373e-06, "loss": 1.1267, "step": 1561 }, { "epoch": 0.28085948035601904, "grad_norm": 1.6694890260696411, "learning_rate": 9.982082099354311e-06, "loss": 0.846, "step": 1562 }, { "epoch": 0.28103928796188077, "grad_norm": 1.5008950233459473, "learning_rate": 9.982032802304572e-06, "loss": 0.8595, "step": 1563 }, { "epoch": 0.2812190955677425, "grad_norm": 1.084015965461731, "learning_rate": 9.981983437655189e-06, "loss": 1.1217, "step": 1564 }, { "epoch": 0.2813989031736042, "grad_norm": 1.8610891103744507, "learning_rate": 9.981934005406826e-06, "loss": 0.8093, "step": 1565 }, { "epoch": 0.28157871077946595, "grad_norm": 1.5963424444198608, "learning_rate": 9.981884505560156e-06, "loss": 0.9152, "step": 1566 }, { "epoch": 0.28175851838532767, "grad_norm": 1.1010018587112427, "learning_rate": 9.981834938115848e-06, "loss": 1.1715, "step": 1567 }, { "epoch": 0.28193832599118945, "grad_norm": 1.5005707740783691, "learning_rate": 9.981785303074577e-06, "loss": 0.9182, "step": 1568 }, { "epoch": 0.2821181335970512, "grad_norm": 1.589170217514038, "learning_rate": 9.98173560043702e-06, "loss": 0.8544, "step": 1569 }, { "epoch": 0.2822979412029129, "grad_norm": 1.5934251546859741, "learning_rate": 9.981685830203845e-06, "loss": 0.8741, "step": 1570 }, { "epoch": 0.28247774880877463, "grad_norm": 0.9963638186454773, "learning_rate": 9.981635992375729e-06, "loss": 1.1154, "step": 1571 }, { "epoch": 0.28265755641463636, "grad_norm": 1.4517700672149658, "learning_rate": 9.981586086953349e-06, "loss": 0.8187, "step": 1572 }, { "epoch": 0.2828373640204981, "grad_norm": 1.4943017959594727, "learning_rate": 9.981536113937384e-06, "loss": 0.8246, "step": 1573 }, { "epoch": 0.2830171716263598, "grad_norm": 1.4900875091552734, "learning_rate": 9.98148607332851e-06, "loss": 0.8759, "step": 1574 }, { "epoch": 0.28319697923222154, "grad_norm": 1.8721346855163574, "learning_rate": 9.981435965127405e-06, "loss": 0.8502, "step": 1575 }, { "epoch": 0.28337678683808326, "grad_norm": 1.606022596359253, "learning_rate": 9.981385789334753e-06, "loss": 0.8622, "step": 1576 }, { "epoch": 0.283556594443945, "grad_norm": 1.542027473449707, "learning_rate": 9.98133554595123e-06, "loss": 0.7771, "step": 1577 }, { "epoch": 0.2837364020498067, "grad_norm": 1.4632447957992554, "learning_rate": 9.981285234977522e-06, "loss": 0.8387, "step": 1578 }, { "epoch": 0.28391620965566844, "grad_norm": 1.2142610549926758, "learning_rate": 9.981234856414306e-06, "loss": 1.1018, "step": 1579 }, { "epoch": 0.28409601726153016, "grad_norm": 1.4931106567382812, "learning_rate": 9.981184410262273e-06, "loss": 0.7954, "step": 1580 }, { "epoch": 0.2842758248673919, "grad_norm": 1.6277728080749512, "learning_rate": 9.981133896522101e-06, "loss": 0.8068, "step": 1581 }, { "epoch": 0.2844556324732536, "grad_norm": 1.7352186441421509, "learning_rate": 9.981083315194477e-06, "loss": 0.9313, "step": 1582 }, { "epoch": 0.28463544007911534, "grad_norm": 1.7104939222335815, "learning_rate": 9.981032666280091e-06, "loss": 0.8599, "step": 1583 }, { "epoch": 0.28481524768497707, "grad_norm": 1.6016656160354614, "learning_rate": 9.980981949779627e-06, "loss": 0.8809, "step": 1584 }, { "epoch": 0.2849950552908388, "grad_norm": 1.0479145050048828, "learning_rate": 9.980931165693772e-06, "loss": 1.1253, "step": 1585 }, { "epoch": 0.2851748628967005, "grad_norm": 1.565377116203308, "learning_rate": 9.980880314023218e-06, "loss": 0.7612, "step": 1586 }, { "epoch": 0.28535467050256225, "grad_norm": 1.618428349494934, "learning_rate": 9.980829394768654e-06, "loss": 0.9292, "step": 1587 }, { "epoch": 0.285534478108424, "grad_norm": 1.5725921392440796, "learning_rate": 9.980778407930768e-06, "loss": 0.8215, "step": 1588 }, { "epoch": 0.2857142857142857, "grad_norm": 1.659741997718811, "learning_rate": 9.980727353510257e-06, "loss": 0.7955, "step": 1589 }, { "epoch": 0.2858940933201474, "grad_norm": 1.5735732316970825, "learning_rate": 9.980676231507811e-06, "loss": 0.8722, "step": 1590 }, { "epoch": 0.28607390092600915, "grad_norm": 1.588822364807129, "learning_rate": 9.980625041924125e-06, "loss": 0.8774, "step": 1591 }, { "epoch": 0.2862537085318709, "grad_norm": 1.5311057567596436, "learning_rate": 9.98057378475989e-06, "loss": 0.9038, "step": 1592 }, { "epoch": 0.2864335161377326, "grad_norm": 1.6332073211669922, "learning_rate": 9.980522460015805e-06, "loss": 0.8297, "step": 1593 }, { "epoch": 0.28661332374359433, "grad_norm": 1.5384687185287476, "learning_rate": 9.980471067692565e-06, "loss": 0.8546, "step": 1594 }, { "epoch": 0.2867931313494561, "grad_norm": 1.1855134963989258, "learning_rate": 9.980419607790869e-06, "loss": 1.1405, "step": 1595 }, { "epoch": 0.28697293895531784, "grad_norm": 1.4954456090927124, "learning_rate": 9.980368080311413e-06, "loss": 0.8576, "step": 1596 }, { "epoch": 0.28715274656117956, "grad_norm": 0.9989009499549866, "learning_rate": 9.980316485254898e-06, "loss": 1.1321, "step": 1597 }, { "epoch": 0.2873325541670413, "grad_norm": 1.6420968770980835, "learning_rate": 9.980264822622022e-06, "loss": 0.8453, "step": 1598 }, { "epoch": 0.287512361772903, "grad_norm": 1.0049136877059937, "learning_rate": 9.980213092413487e-06, "loss": 1.0976, "step": 1599 }, { "epoch": 0.28769216937876474, "grad_norm": 1.5873247385025024, "learning_rate": 9.980161294629995e-06, "loss": 0.8891, "step": 1600 }, { "epoch": 0.28787197698462647, "grad_norm": 1.4861847162246704, "learning_rate": 9.98010942927225e-06, "loss": 0.797, "step": 1601 }, { "epoch": 0.2880517845904882, "grad_norm": 1.554508090019226, "learning_rate": 9.980057496340953e-06, "loss": 0.8667, "step": 1602 }, { "epoch": 0.2882315921963499, "grad_norm": 1.0897148847579956, "learning_rate": 9.98000549583681e-06, "loss": 1.0803, "step": 1603 }, { "epoch": 0.28841139980221164, "grad_norm": 1.59756600856781, "learning_rate": 9.97995342776053e-06, "loss": 0.8636, "step": 1604 }, { "epoch": 0.28859120740807337, "grad_norm": 1.6238350868225098, "learning_rate": 9.979901292112812e-06, "loss": 0.8006, "step": 1605 }, { "epoch": 0.2887710150139351, "grad_norm": 1.5099754333496094, "learning_rate": 9.979849088894371e-06, "loss": 0.8467, "step": 1606 }, { "epoch": 0.2889508226197968, "grad_norm": 1.6640100479125977, "learning_rate": 9.979796818105911e-06, "loss": 0.9259, "step": 1607 }, { "epoch": 0.28913063022565855, "grad_norm": 1.1398874521255493, "learning_rate": 9.979744479748144e-06, "loss": 1.1107, "step": 1608 }, { "epoch": 0.2893104378315203, "grad_norm": 1.5570608377456665, "learning_rate": 9.979692073821776e-06, "loss": 0.8289, "step": 1609 }, { "epoch": 0.289490245437382, "grad_norm": 1.6049211025238037, "learning_rate": 9.979639600327522e-06, "loss": 0.9486, "step": 1610 }, { "epoch": 0.2896700530432437, "grad_norm": 1.5399210453033447, "learning_rate": 9.979587059266092e-06, "loss": 0.9077, "step": 1611 }, { "epoch": 0.28984986064910545, "grad_norm": 1.1531548500061035, "learning_rate": 9.9795344506382e-06, "loss": 1.0916, "step": 1612 }, { "epoch": 0.2900296682549672, "grad_norm": 1.546450138092041, "learning_rate": 9.97948177444456e-06, "loss": 0.8415, "step": 1613 }, { "epoch": 0.2902094758608289, "grad_norm": 1.6106752157211304, "learning_rate": 9.979429030685885e-06, "loss": 0.8324, "step": 1614 }, { "epoch": 0.29038928346669063, "grad_norm": 1.553743839263916, "learning_rate": 9.979376219362891e-06, "loss": 0.9002, "step": 1615 }, { "epoch": 0.29056909107255235, "grad_norm": 1.4807260036468506, "learning_rate": 9.979323340476297e-06, "loss": 0.8536, "step": 1616 }, { "epoch": 0.2907488986784141, "grad_norm": 1.104328989982605, "learning_rate": 9.979270394026817e-06, "loss": 1.088, "step": 1617 }, { "epoch": 0.2909287062842758, "grad_norm": 1.6425949335098267, "learning_rate": 9.979217380015173e-06, "loss": 0.8174, "step": 1618 }, { "epoch": 0.29110851389013753, "grad_norm": 1.131675124168396, "learning_rate": 9.979164298442082e-06, "loss": 1.1182, "step": 1619 }, { "epoch": 0.29128832149599926, "grad_norm": 1.637036919593811, "learning_rate": 9.979111149308265e-06, "loss": 0.8463, "step": 1620 }, { "epoch": 0.291468129101861, "grad_norm": 1.6973410844802856, "learning_rate": 9.979057932614442e-06, "loss": 0.8416, "step": 1621 }, { "epoch": 0.2916479367077227, "grad_norm": 2.57942795753479, "learning_rate": 9.979004648361337e-06, "loss": 0.7196, "step": 1622 }, { "epoch": 0.2918277443135845, "grad_norm": 1.5789377689361572, "learning_rate": 9.978951296549672e-06, "loss": 0.8198, "step": 1623 }, { "epoch": 0.2920075519194462, "grad_norm": 1.0314704179763794, "learning_rate": 9.978897877180172e-06, "loss": 1.1203, "step": 1624 }, { "epoch": 0.29218735952530794, "grad_norm": 1.7382044792175293, "learning_rate": 9.97884439025356e-06, "loss": 0.8784, "step": 1625 }, { "epoch": 0.29236716713116967, "grad_norm": 1.5737011432647705, "learning_rate": 9.978790835770561e-06, "loss": 0.8226, "step": 1626 }, { "epoch": 0.2925469747370314, "grad_norm": 1.5949962139129639, "learning_rate": 9.978737213731904e-06, "loss": 0.899, "step": 1627 }, { "epoch": 0.2927267823428931, "grad_norm": 1.0481866598129272, "learning_rate": 9.978683524138316e-06, "loss": 1.0895, "step": 1628 }, { "epoch": 0.29290658994875485, "grad_norm": 1.7675501108169556, "learning_rate": 9.978629766990527e-06, "loss": 0.8436, "step": 1629 }, { "epoch": 0.2930863975546166, "grad_norm": 1.5744868516921997, "learning_rate": 9.97857594228926e-06, "loss": 0.8995, "step": 1630 }, { "epoch": 0.2932662051604783, "grad_norm": 1.5553059577941895, "learning_rate": 9.978522050035256e-06, "loss": 0.8391, "step": 1631 }, { "epoch": 0.29344601276634, "grad_norm": 1.1277530193328857, "learning_rate": 9.978468090229236e-06, "loss": 1.1073, "step": 1632 }, { "epoch": 0.29362582037220175, "grad_norm": 1.6414903402328491, "learning_rate": 9.978414062871938e-06, "loss": 0.833, "step": 1633 }, { "epoch": 0.2938056279780635, "grad_norm": 1.7921141386032104, "learning_rate": 9.978359967964094e-06, "loss": 0.8843, "step": 1634 }, { "epoch": 0.2939854355839252, "grad_norm": 1.0136796236038208, "learning_rate": 9.978305805506436e-06, "loss": 1.0827, "step": 1635 }, { "epoch": 0.29416524318978693, "grad_norm": 1.5078881978988647, "learning_rate": 9.978251575499702e-06, "loss": 0.8237, "step": 1636 }, { "epoch": 0.29434505079564866, "grad_norm": 1.6656113862991333, "learning_rate": 9.978197277944626e-06, "loss": 0.9091, "step": 1637 }, { "epoch": 0.2945248584015104, "grad_norm": 1.5715144872665405, "learning_rate": 9.978142912841944e-06, "loss": 0.8307, "step": 1638 }, { "epoch": 0.2947046660073721, "grad_norm": 1.6467207670211792, "learning_rate": 9.978088480192396e-06, "loss": 0.904, "step": 1639 }, { "epoch": 0.29488447361323383, "grad_norm": 1.7426341772079468, "learning_rate": 9.978033979996719e-06, "loss": 0.8579, "step": 1640 }, { "epoch": 0.29506428121909556, "grad_norm": 1.6471514701843262, "learning_rate": 9.977979412255651e-06, "loss": 0.8784, "step": 1641 }, { "epoch": 0.2952440888249573, "grad_norm": 1.616461157798767, "learning_rate": 9.977924776969936e-06, "loss": 0.8721, "step": 1642 }, { "epoch": 0.295423896430819, "grad_norm": 1.5894291400909424, "learning_rate": 9.977870074140314e-06, "loss": 0.8447, "step": 1643 }, { "epoch": 0.29560370403668074, "grad_norm": 1.1640874147415161, "learning_rate": 9.977815303767525e-06, "loss": 1.1537, "step": 1644 }, { "epoch": 0.29578351164254246, "grad_norm": 1.0437068939208984, "learning_rate": 9.977760465852316e-06, "loss": 1.0932, "step": 1645 }, { "epoch": 0.2959633192484042, "grad_norm": 1.5446412563323975, "learning_rate": 9.977705560395427e-06, "loss": 0.8512, "step": 1646 }, { "epoch": 0.2961431268542659, "grad_norm": 1.607150673866272, "learning_rate": 9.977650587397606e-06, "loss": 0.8588, "step": 1647 }, { "epoch": 0.29632293446012764, "grad_norm": 1.529534101486206, "learning_rate": 9.977595546859596e-06, "loss": 0.8978, "step": 1648 }, { "epoch": 0.29650274206598937, "grad_norm": 1.54122793674469, "learning_rate": 9.977540438782147e-06, "loss": 0.8535, "step": 1649 }, { "epoch": 0.2966825496718511, "grad_norm": 1.5664646625518799, "learning_rate": 9.977485263166008e-06, "loss": 0.8378, "step": 1650 }, { "epoch": 0.2968623572777129, "grad_norm": 1.1174393892288208, "learning_rate": 9.977430020011922e-06, "loss": 1.1454, "step": 1651 }, { "epoch": 0.2970421648835746, "grad_norm": 1.1389999389648438, "learning_rate": 9.977374709320643e-06, "loss": 1.123, "step": 1652 }, { "epoch": 0.2972219724894363, "grad_norm": 1.4983912706375122, "learning_rate": 9.977319331092918e-06, "loss": 0.8306, "step": 1653 }, { "epoch": 0.29740178009529805, "grad_norm": 1.681870460510254, "learning_rate": 9.977263885329503e-06, "loss": 0.8711, "step": 1654 }, { "epoch": 0.2975815877011598, "grad_norm": 1.542197585105896, "learning_rate": 9.977208372031147e-06, "loss": 0.9007, "step": 1655 }, { "epoch": 0.2977613953070215, "grad_norm": 1.6800435781478882, "learning_rate": 9.977152791198604e-06, "loss": 0.8593, "step": 1656 }, { "epoch": 0.29794120291288323, "grad_norm": 1.6068346500396729, "learning_rate": 9.97709714283263e-06, "loss": 0.8379, "step": 1657 }, { "epoch": 0.29812101051874496, "grad_norm": 1.5241518020629883, "learning_rate": 9.977041426933975e-06, "loss": 0.8726, "step": 1658 }, { "epoch": 0.2983008181246067, "grad_norm": 1.6650941371917725, "learning_rate": 9.976985643503402e-06, "loss": 0.7882, "step": 1659 }, { "epoch": 0.2984806257304684, "grad_norm": 1.5754331350326538, "learning_rate": 9.976929792541663e-06, "loss": 0.8916, "step": 1660 }, { "epoch": 0.29866043333633013, "grad_norm": 1.5227032899856567, "learning_rate": 9.976873874049516e-06, "loss": 0.8783, "step": 1661 }, { "epoch": 0.29884024094219186, "grad_norm": 1.5176692008972168, "learning_rate": 9.976817888027723e-06, "loss": 0.8185, "step": 1662 }, { "epoch": 0.2990200485480536, "grad_norm": 1.5289859771728516, "learning_rate": 9.97676183447704e-06, "loss": 0.8466, "step": 1663 }, { "epoch": 0.2991998561539153, "grad_norm": 1.7702527046203613, "learning_rate": 9.976705713398229e-06, "loss": 0.8636, "step": 1664 }, { "epoch": 0.29937966375977704, "grad_norm": 1.6539654731750488, "learning_rate": 9.976649524792052e-06, "loss": 0.9267, "step": 1665 }, { "epoch": 0.29955947136563876, "grad_norm": 1.5533994436264038, "learning_rate": 9.976593268659272e-06, "loss": 0.8585, "step": 1666 }, { "epoch": 0.2997392789715005, "grad_norm": 1.5687283277511597, "learning_rate": 9.97653694500065e-06, "loss": 0.7587, "step": 1667 }, { "epoch": 0.2999190865773622, "grad_norm": 1.5858772993087769, "learning_rate": 9.976480553816952e-06, "loss": 0.9248, "step": 1668 }, { "epoch": 0.30009889418322394, "grad_norm": 1.5479596853256226, "learning_rate": 9.976424095108941e-06, "loss": 0.8615, "step": 1669 }, { "epoch": 0.30027870178908567, "grad_norm": 1.766879677772522, "learning_rate": 9.976367568877388e-06, "loss": 0.8671, "step": 1670 }, { "epoch": 0.3004585093949474, "grad_norm": 1.612399935722351, "learning_rate": 9.976310975123054e-06, "loss": 0.8601, "step": 1671 }, { "epoch": 0.3006383170008091, "grad_norm": 1.695294737815857, "learning_rate": 9.97625431384671e-06, "loss": 0.8552, "step": 1672 }, { "epoch": 0.30081812460667084, "grad_norm": 1.607796549797058, "learning_rate": 9.976197585049126e-06, "loss": 0.9273, "step": 1673 }, { "epoch": 0.30099793221253257, "grad_norm": 1.5130232572555542, "learning_rate": 9.97614078873107e-06, "loss": 0.8653, "step": 1674 }, { "epoch": 0.3011777398183943, "grad_norm": 1.506971836090088, "learning_rate": 9.976083924893311e-06, "loss": 0.8773, "step": 1675 }, { "epoch": 0.301357547424256, "grad_norm": 1.583051085472107, "learning_rate": 9.976026993536625e-06, "loss": 0.8615, "step": 1676 }, { "epoch": 0.30153735503011775, "grad_norm": 1.5970945358276367, "learning_rate": 9.97596999466178e-06, "loss": 0.8756, "step": 1677 }, { "epoch": 0.3017171626359795, "grad_norm": 1.5192290544509888, "learning_rate": 9.975912928269553e-06, "loss": 0.8525, "step": 1678 }, { "epoch": 0.30189697024184126, "grad_norm": 1.4558680057525635, "learning_rate": 9.975855794360716e-06, "loss": 0.8538, "step": 1679 }, { "epoch": 0.302076777847703, "grad_norm": 1.5620304346084595, "learning_rate": 9.975798592936043e-06, "loss": 0.944, "step": 1680 }, { "epoch": 0.3022565854535647, "grad_norm": 1.5532978773117065, "learning_rate": 9.975741323996313e-06, "loss": 0.7985, "step": 1681 }, { "epoch": 0.30243639305942643, "grad_norm": 1.540877103805542, "learning_rate": 9.975683987542303e-06, "loss": 0.8459, "step": 1682 }, { "epoch": 0.30261620066528816, "grad_norm": 1.5129897594451904, "learning_rate": 9.975626583574792e-06, "loss": 0.8285, "step": 1683 }, { "epoch": 0.3027960082711499, "grad_norm": 1.6796293258666992, "learning_rate": 9.975569112094555e-06, "loss": 0.921, "step": 1684 }, { "epoch": 0.3029758158770116, "grad_norm": 1.4317930936813354, "learning_rate": 9.975511573102372e-06, "loss": 1.1189, "step": 1685 }, { "epoch": 0.30315562348287334, "grad_norm": 1.635244369506836, "learning_rate": 9.975453966599026e-06, "loss": 0.8353, "step": 1686 }, { "epoch": 0.30333543108873506, "grad_norm": 1.1946797370910645, "learning_rate": 9.9753962925853e-06, "loss": 1.0865, "step": 1687 }, { "epoch": 0.3035152386945968, "grad_norm": 1.5695890188217163, "learning_rate": 9.975338551061973e-06, "loss": 0.8527, "step": 1688 }, { "epoch": 0.3036950463004585, "grad_norm": 1.5455530881881714, "learning_rate": 9.975280742029831e-06, "loss": 0.8783, "step": 1689 }, { "epoch": 0.30387485390632024, "grad_norm": 1.6566451787948608, "learning_rate": 9.975222865489657e-06, "loss": 0.8164, "step": 1690 }, { "epoch": 0.30405466151218197, "grad_norm": 1.4756081104278564, "learning_rate": 9.97516492144224e-06, "loss": 0.7745, "step": 1691 }, { "epoch": 0.3042344691180437, "grad_norm": 1.771692156791687, "learning_rate": 9.975106909888359e-06, "loss": 0.8378, "step": 1692 }, { "epoch": 0.3044142767239054, "grad_norm": 1.6826320886611938, "learning_rate": 9.975048830828807e-06, "loss": 0.8416, "step": 1693 }, { "epoch": 0.30459408432976715, "grad_norm": 1.4791301488876343, "learning_rate": 9.97499068426437e-06, "loss": 0.7895, "step": 1694 }, { "epoch": 0.30477389193562887, "grad_norm": 1.5885791778564453, "learning_rate": 9.974932470195837e-06, "loss": 0.8079, "step": 1695 }, { "epoch": 0.3049536995414906, "grad_norm": 1.5511046648025513, "learning_rate": 9.974874188623999e-06, "loss": 0.8244, "step": 1696 }, { "epoch": 0.3051335071473523, "grad_norm": 1.492921233177185, "learning_rate": 9.974815839549646e-06, "loss": 0.8599, "step": 1697 }, { "epoch": 0.30531331475321405, "grad_norm": 1.5162612199783325, "learning_rate": 9.974757422973568e-06, "loss": 0.8169, "step": 1698 }, { "epoch": 0.3054931223590758, "grad_norm": 1.6076687574386597, "learning_rate": 9.974698938896561e-06, "loss": 0.8415, "step": 1699 }, { "epoch": 0.3056729299649375, "grad_norm": 1.7363978624343872, "learning_rate": 9.974640387319417e-06, "loss": 0.8668, "step": 1700 }, { "epoch": 0.3058527375707992, "grad_norm": 1.54964280128479, "learning_rate": 9.974581768242931e-06, "loss": 0.8426, "step": 1701 }, { "epoch": 0.30603254517666095, "grad_norm": 1.4382115602493286, "learning_rate": 9.974523081667895e-06, "loss": 0.8918, "step": 1702 }, { "epoch": 0.3062123527825227, "grad_norm": 1.483417272567749, "learning_rate": 9.97446432759511e-06, "loss": 0.8402, "step": 1703 }, { "epoch": 0.3063921603883844, "grad_norm": 1.6067560911178589, "learning_rate": 9.974405506025371e-06, "loss": 0.8444, "step": 1704 }, { "epoch": 0.30657196799424613, "grad_norm": 1.6026532649993896, "learning_rate": 9.974346616959476e-06, "loss": 0.8621, "step": 1705 }, { "epoch": 0.3067517756001079, "grad_norm": 1.5898911952972412, "learning_rate": 9.974287660398226e-06, "loss": 0.8328, "step": 1706 }, { "epoch": 0.30693158320596964, "grad_norm": 1.6992371082305908, "learning_rate": 9.974228636342418e-06, "loss": 0.84, "step": 1707 }, { "epoch": 0.30711139081183136, "grad_norm": 1.6501500606536865, "learning_rate": 9.974169544792854e-06, "loss": 0.9162, "step": 1708 }, { "epoch": 0.3072911984176931, "grad_norm": 1.9763063192367554, "learning_rate": 9.974110385750336e-06, "loss": 1.129, "step": 1709 }, { "epoch": 0.3074710060235548, "grad_norm": 1.6965593099594116, "learning_rate": 9.974051159215668e-06, "loss": 1.1219, "step": 1710 }, { "epoch": 0.30765081362941654, "grad_norm": 1.554974913597107, "learning_rate": 9.97399186518965e-06, "loss": 0.8417, "step": 1711 }, { "epoch": 0.30783062123527827, "grad_norm": 1.4631580114364624, "learning_rate": 9.973932503673092e-06, "loss": 0.8728, "step": 1712 }, { "epoch": 0.30801042884114, "grad_norm": 1.5277106761932373, "learning_rate": 9.973873074666795e-06, "loss": 0.8428, "step": 1713 }, { "epoch": 0.3081902364470017, "grad_norm": 1.5568684339523315, "learning_rate": 9.973813578171566e-06, "loss": 0.8274, "step": 1714 }, { "epoch": 0.30837004405286345, "grad_norm": 1.5151287317276, "learning_rate": 9.973754014188214e-06, "loss": 0.8697, "step": 1715 }, { "epoch": 0.30854985165872517, "grad_norm": 1.539067029953003, "learning_rate": 9.973694382717545e-06, "loss": 0.8385, "step": 1716 }, { "epoch": 0.3087296592645869, "grad_norm": 1.8190032243728638, "learning_rate": 9.97363468376037e-06, "loss": 0.8604, "step": 1717 }, { "epoch": 0.3089094668704486, "grad_norm": 1.546557903289795, "learning_rate": 9.9735749173175e-06, "loss": 0.8706, "step": 1718 }, { "epoch": 0.30908927447631035, "grad_norm": 1.8814586400985718, "learning_rate": 9.973515083389743e-06, "loss": 1.1248, "step": 1719 }, { "epoch": 0.3092690820821721, "grad_norm": 1.7417705059051514, "learning_rate": 9.973455181977914e-06, "loss": 1.0905, "step": 1720 }, { "epoch": 0.3094488896880338, "grad_norm": 1.5876657962799072, "learning_rate": 9.973395213082822e-06, "loss": 0.8802, "step": 1721 }, { "epoch": 0.3096286972938955, "grad_norm": 1.6132954359054565, "learning_rate": 9.973335176705283e-06, "loss": 0.8087, "step": 1722 }, { "epoch": 0.30980850489975725, "grad_norm": 1.7001310586929321, "learning_rate": 9.97327507284611e-06, "loss": 0.8745, "step": 1723 }, { "epoch": 0.309988312505619, "grad_norm": 1.6726000308990479, "learning_rate": 9.973214901506124e-06, "loss": 0.8406, "step": 1724 }, { "epoch": 0.3101681201114807, "grad_norm": 1.6334235668182373, "learning_rate": 9.973154662686134e-06, "loss": 0.8378, "step": 1725 }, { "epoch": 0.31034792771734243, "grad_norm": 1.634869933128357, "learning_rate": 9.97309435638696e-06, "loss": 0.9012, "step": 1726 }, { "epoch": 0.31052773532320416, "grad_norm": 1.577277660369873, "learning_rate": 9.973033982609423e-06, "loss": 0.8441, "step": 1727 }, { "epoch": 0.3107075429290659, "grad_norm": 1.7439382076263428, "learning_rate": 9.97297354135434e-06, "loss": 0.8462, "step": 1728 }, { "epoch": 0.3108873505349276, "grad_norm": 1.672292947769165, "learning_rate": 9.972913032622532e-06, "loss": 0.9065, "step": 1729 }, { "epoch": 0.31106715814078933, "grad_norm": 1.5863062143325806, "learning_rate": 9.972852456414816e-06, "loss": 0.8821, "step": 1730 }, { "epoch": 0.31124696574665106, "grad_norm": 1.613913893699646, "learning_rate": 9.972791812732022e-06, "loss": 0.9034, "step": 1731 }, { "epoch": 0.3114267733525128, "grad_norm": 1.6186485290527344, "learning_rate": 9.972731101574965e-06, "loss": 0.8763, "step": 1732 }, { "epoch": 0.3116065809583745, "grad_norm": 1.3123530149459839, "learning_rate": 9.97267032294447e-06, "loss": 1.1254, "step": 1733 }, { "epoch": 0.3117863885642363, "grad_norm": 1.5708279609680176, "learning_rate": 9.972609476841368e-06, "loss": 0.8076, "step": 1734 }, { "epoch": 0.311966196170098, "grad_norm": 1.642563819885254, "learning_rate": 9.972548563266477e-06, "loss": 0.8394, "step": 1735 }, { "epoch": 0.31214600377595975, "grad_norm": 1.7059229612350464, "learning_rate": 9.972487582220628e-06, "loss": 0.921, "step": 1736 }, { "epoch": 0.31232581138182147, "grad_norm": 1.8965340852737427, "learning_rate": 9.972426533704646e-06, "loss": 0.8861, "step": 1737 }, { "epoch": 0.3125056189876832, "grad_norm": 1.5571885108947754, "learning_rate": 9.972365417719364e-06, "loss": 0.8306, "step": 1738 }, { "epoch": 0.3126854265935449, "grad_norm": 1.6898233890533447, "learning_rate": 9.972304234265603e-06, "loss": 0.9317, "step": 1739 }, { "epoch": 0.31286523419940665, "grad_norm": 1.3377647399902344, "learning_rate": 9.9722429833442e-06, "loss": 1.0957, "step": 1740 }, { "epoch": 0.3130450418052684, "grad_norm": 1.701106071472168, "learning_rate": 9.972181664955984e-06, "loss": 0.8952, "step": 1741 }, { "epoch": 0.3132248494111301, "grad_norm": 1.1004226207733154, "learning_rate": 9.972120279101786e-06, "loss": 1.1043, "step": 1742 }, { "epoch": 0.31340465701699183, "grad_norm": 1.5496290922164917, "learning_rate": 9.972058825782441e-06, "loss": 0.8517, "step": 1743 }, { "epoch": 0.31358446462285355, "grad_norm": 1.5562355518341064, "learning_rate": 9.971997304998782e-06, "loss": 0.8478, "step": 1744 }, { "epoch": 0.3137642722287153, "grad_norm": 1.4976489543914795, "learning_rate": 9.971935716751642e-06, "loss": 0.8181, "step": 1745 }, { "epoch": 0.313944079834577, "grad_norm": 1.5158238410949707, "learning_rate": 9.97187406104186e-06, "loss": 0.823, "step": 1746 }, { "epoch": 0.31412388744043873, "grad_norm": 1.5765734910964966, "learning_rate": 9.97181233787027e-06, "loss": 0.8938, "step": 1747 }, { "epoch": 0.31430369504630046, "grad_norm": 1.5952017307281494, "learning_rate": 9.971750547237709e-06, "loss": 0.8412, "step": 1748 }, { "epoch": 0.3144835026521622, "grad_norm": 1.628752589225769, "learning_rate": 9.971688689145019e-06, "loss": 0.8554, "step": 1749 }, { "epoch": 0.3146633102580239, "grad_norm": 1.6415146589279175, "learning_rate": 9.971626763593035e-06, "loss": 0.8695, "step": 1750 }, { "epoch": 0.31484311786388564, "grad_norm": 1.6266472339630127, "learning_rate": 9.9715647705826e-06, "loss": 0.8247, "step": 1751 }, { "epoch": 0.31502292546974736, "grad_norm": 1.5064839124679565, "learning_rate": 9.971502710114555e-06, "loss": 0.8564, "step": 1752 }, { "epoch": 0.3152027330756091, "grad_norm": 1.7120510339736938, "learning_rate": 9.971440582189741e-06, "loss": 0.8137, "step": 1753 }, { "epoch": 0.3153825406814708, "grad_norm": 1.839598298072815, "learning_rate": 9.971378386809002e-06, "loss": 0.8359, "step": 1754 }, { "epoch": 0.31556234828733254, "grad_norm": 1.535749912261963, "learning_rate": 9.97131612397318e-06, "loss": 0.8429, "step": 1755 }, { "epoch": 0.31574215589319427, "grad_norm": 1.822122573852539, "learning_rate": 9.971253793683123e-06, "loss": 0.9178, "step": 1756 }, { "epoch": 0.315921963499056, "grad_norm": 1.5264445543289185, "learning_rate": 9.971191395939675e-06, "loss": 0.8308, "step": 1757 }, { "epoch": 0.3161017711049177, "grad_norm": 1.4588216543197632, "learning_rate": 9.97112893074368e-06, "loss": 0.8188, "step": 1758 }, { "epoch": 0.31628157871077944, "grad_norm": 1.7785741090774536, "learning_rate": 9.971066398095992e-06, "loss": 0.8789, "step": 1759 }, { "epoch": 0.31646138631664117, "grad_norm": 1.1996477842330933, "learning_rate": 9.971003797997454e-06, "loss": 1.1391, "step": 1760 }, { "epoch": 0.3166411939225029, "grad_norm": 1.1976300477981567, "learning_rate": 9.970941130448917e-06, "loss": 1.1172, "step": 1761 }, { "epoch": 0.3168210015283647, "grad_norm": 1.7202491760253906, "learning_rate": 9.97087839545123e-06, "loss": 0.9077, "step": 1762 }, { "epoch": 0.3170008091342264, "grad_norm": 1.5022733211517334, "learning_rate": 9.970815593005248e-06, "loss": 0.8636, "step": 1763 }, { "epoch": 0.31718061674008813, "grad_norm": 1.7028264999389648, "learning_rate": 9.97075272311182e-06, "loss": 0.8976, "step": 1764 }, { "epoch": 0.31736042434594985, "grad_norm": 1.6055231094360352, "learning_rate": 9.970689785771798e-06, "loss": 0.8661, "step": 1765 }, { "epoch": 0.3175402319518116, "grad_norm": 1.514298439025879, "learning_rate": 9.97062678098604e-06, "loss": 0.8188, "step": 1766 }, { "epoch": 0.3177200395576733, "grad_norm": 1.4520511627197266, "learning_rate": 9.9705637087554e-06, "loss": 0.8202, "step": 1767 }, { "epoch": 0.31789984716353503, "grad_norm": 1.1771142482757568, "learning_rate": 9.97050056908073e-06, "loss": 1.0984, "step": 1768 }, { "epoch": 0.31807965476939676, "grad_norm": 1.5541900396347046, "learning_rate": 9.970437361962889e-06, "loss": 0.8102, "step": 1769 }, { "epoch": 0.3182594623752585, "grad_norm": 1.7512789964675903, "learning_rate": 9.970374087402737e-06, "loss": 0.7962, "step": 1770 }, { "epoch": 0.3184392699811202, "grad_norm": 1.5800710916519165, "learning_rate": 9.970310745401129e-06, "loss": 0.8534, "step": 1771 }, { "epoch": 0.31861907758698194, "grad_norm": 1.5439621210098267, "learning_rate": 9.970247335958925e-06, "loss": 0.8054, "step": 1772 }, { "epoch": 0.31879888519284366, "grad_norm": 1.026721477508545, "learning_rate": 9.970183859076987e-06, "loss": 1.1407, "step": 1773 }, { "epoch": 0.3189786927987054, "grad_norm": 1.5809403657913208, "learning_rate": 9.970120314756177e-06, "loss": 0.8581, "step": 1774 }, { "epoch": 0.3191585004045671, "grad_norm": 1.5973210334777832, "learning_rate": 9.970056702997355e-06, "loss": 0.8084, "step": 1775 }, { "epoch": 0.31933830801042884, "grad_norm": 1.1137486696243286, "learning_rate": 9.969993023801386e-06, "loss": 1.0816, "step": 1776 }, { "epoch": 0.31951811561629057, "grad_norm": 1.6257777214050293, "learning_rate": 9.96992927716913e-06, "loss": 0.8163, "step": 1777 }, { "epoch": 0.3196979232221523, "grad_norm": 1.5693477392196655, "learning_rate": 9.969865463101457e-06, "loss": 0.8015, "step": 1778 }, { "epoch": 0.319877730828014, "grad_norm": 1.4822450876235962, "learning_rate": 9.96980158159923e-06, "loss": 0.8601, "step": 1779 }, { "epoch": 0.32005753843387574, "grad_norm": 1.5954512357711792, "learning_rate": 9.969737632663318e-06, "loss": 0.8326, "step": 1780 }, { "epoch": 0.32023734603973747, "grad_norm": 1.0709717273712158, "learning_rate": 9.969673616294586e-06, "loss": 1.1422, "step": 1781 }, { "epoch": 0.3204171536455992, "grad_norm": 1.619102120399475, "learning_rate": 9.969609532493905e-06, "loss": 0.9219, "step": 1782 }, { "epoch": 0.3205969612514609, "grad_norm": 1.5826733112335205, "learning_rate": 9.969545381262142e-06, "loss": 0.7823, "step": 1783 }, { "epoch": 0.32077676885732265, "grad_norm": 1.5640844106674194, "learning_rate": 9.96948116260017e-06, "loss": 0.8705, "step": 1784 }, { "epoch": 0.3209565764631844, "grad_norm": 1.6874637603759766, "learning_rate": 9.969416876508859e-06, "loss": 0.8464, "step": 1785 }, { "epoch": 0.3211363840690461, "grad_norm": 1.3825693130493164, "learning_rate": 9.969352522989082e-06, "loss": 0.7781, "step": 1786 }, { "epoch": 0.3213161916749078, "grad_norm": 1.1224976778030396, "learning_rate": 9.96928810204171e-06, "loss": 1.13, "step": 1787 }, { "epoch": 0.32149599928076955, "grad_norm": 1.5350613594055176, "learning_rate": 9.96922361366762e-06, "loss": 0.8389, "step": 1788 }, { "epoch": 0.3216758068866313, "grad_norm": 1.0592683553695679, "learning_rate": 9.969159057867687e-06, "loss": 1.0685, "step": 1789 }, { "epoch": 0.32185561449249306, "grad_norm": 1.0412403345108032, "learning_rate": 9.969094434642784e-06, "loss": 1.0775, "step": 1790 }, { "epoch": 0.3220354220983548, "grad_norm": 1.581215262413025, "learning_rate": 9.969029743993791e-06, "loss": 0.8779, "step": 1791 }, { "epoch": 0.3222152297042165, "grad_norm": 1.432968020439148, "learning_rate": 9.968964985921584e-06, "loss": 0.824, "step": 1792 }, { "epoch": 0.32239503731007824, "grad_norm": 1.7216336727142334, "learning_rate": 9.968900160427041e-06, "loss": 0.8881, "step": 1793 }, { "epoch": 0.32257484491593996, "grad_norm": 1.765701413154602, "learning_rate": 9.968835267511044e-06, "loss": 0.7858, "step": 1794 }, { "epoch": 0.3227546525218017, "grad_norm": 1.5359985828399658, "learning_rate": 9.968770307174472e-06, "loss": 0.8665, "step": 1795 }, { "epoch": 0.3229344601276634, "grad_norm": 1.082518458366394, "learning_rate": 9.968705279418207e-06, "loss": 1.1351, "step": 1796 }, { "epoch": 0.32311426773352514, "grad_norm": 1.5495679378509521, "learning_rate": 9.96864018424313e-06, "loss": 0.9244, "step": 1797 }, { "epoch": 0.32329407533938687, "grad_norm": 1.4718693494796753, "learning_rate": 9.968575021650125e-06, "loss": 0.8445, "step": 1798 }, { "epoch": 0.3234738829452486, "grad_norm": 1.5853703022003174, "learning_rate": 9.968509791640078e-06, "loss": 0.8356, "step": 1799 }, { "epoch": 0.3236536905511103, "grad_norm": 1.4711313247680664, "learning_rate": 9.968444494213872e-06, "loss": 0.8929, "step": 1800 }, { "epoch": 0.32383349815697204, "grad_norm": 1.5763107538223267, "learning_rate": 9.968379129372392e-06, "loss": 0.9073, "step": 1801 }, { "epoch": 0.32401330576283377, "grad_norm": 1.409597635269165, "learning_rate": 9.968313697116528e-06, "loss": 0.853, "step": 1802 }, { "epoch": 0.3241931133686955, "grad_norm": 1.52982497215271, "learning_rate": 9.968248197447166e-06, "loss": 0.892, "step": 1803 }, { "epoch": 0.3243729209745572, "grad_norm": 1.508986234664917, "learning_rate": 9.968182630365194e-06, "loss": 0.8277, "step": 1804 }, { "epoch": 0.32455272858041895, "grad_norm": 1.548377275466919, "learning_rate": 9.968116995871504e-06, "loss": 0.8154, "step": 1805 }, { "epoch": 0.3247325361862807, "grad_norm": 1.539473533630371, "learning_rate": 9.968051293966984e-06, "loss": 0.8939, "step": 1806 }, { "epoch": 0.3249123437921424, "grad_norm": 1.5514447689056396, "learning_rate": 9.967985524652527e-06, "loss": 0.908, "step": 1807 }, { "epoch": 0.3250921513980041, "grad_norm": 1.4951096773147583, "learning_rate": 9.967919687929025e-06, "loss": 0.7904, "step": 1808 }, { "epoch": 0.32527195900386585, "grad_norm": 1.5983247756958008, "learning_rate": 9.967853783797372e-06, "loss": 0.8608, "step": 1809 }, { "epoch": 0.3254517666097276, "grad_norm": 1.6823383569717407, "learning_rate": 9.967787812258461e-06, "loss": 0.8663, "step": 1810 }, { "epoch": 0.3256315742155893, "grad_norm": 1.5097993612289429, "learning_rate": 9.967721773313188e-06, "loss": 0.8481, "step": 1811 }, { "epoch": 0.32581138182145103, "grad_norm": 1.7263426780700684, "learning_rate": 9.96765566696245e-06, "loss": 0.8726, "step": 1812 }, { "epoch": 0.32599118942731276, "grad_norm": 1.189790964126587, "learning_rate": 9.967589493207142e-06, "loss": 1.1518, "step": 1813 }, { "epoch": 0.3261709970331745, "grad_norm": 1.5105699300765991, "learning_rate": 9.967523252048162e-06, "loss": 0.8199, "step": 1814 }, { "epoch": 0.3263508046390362, "grad_norm": 1.553027629852295, "learning_rate": 9.96745694348641e-06, "loss": 0.8254, "step": 1815 }, { "epoch": 0.32653061224489793, "grad_norm": 1.585126519203186, "learning_rate": 9.967390567522786e-06, "loss": 0.8437, "step": 1816 }, { "epoch": 0.3267104198507597, "grad_norm": 1.4205716848373413, "learning_rate": 9.967324124158189e-06, "loss": 0.8272, "step": 1817 }, { "epoch": 0.32689022745662144, "grad_norm": 1.640176773071289, "learning_rate": 9.967257613393521e-06, "loss": 0.8686, "step": 1818 }, { "epoch": 0.32707003506248317, "grad_norm": 1.5453197956085205, "learning_rate": 9.967191035229686e-06, "loss": 0.8287, "step": 1819 }, { "epoch": 0.3272498426683449, "grad_norm": 1.5122348070144653, "learning_rate": 9.967124389667586e-06, "loss": 0.8489, "step": 1820 }, { "epoch": 0.3274296502742066, "grad_norm": 1.487722635269165, "learning_rate": 9.967057676708126e-06, "loss": 0.7801, "step": 1821 }, { "epoch": 0.32760945788006834, "grad_norm": 1.6049857139587402, "learning_rate": 9.96699089635221e-06, "loss": 0.8902, "step": 1822 }, { "epoch": 0.32778926548593007, "grad_norm": 1.1157350540161133, "learning_rate": 9.966924048600746e-06, "loss": 1.1023, "step": 1823 }, { "epoch": 0.3279690730917918, "grad_norm": 1.509761929512024, "learning_rate": 9.966857133454639e-06, "loss": 0.8601, "step": 1824 }, { "epoch": 0.3281488806976535, "grad_norm": 1.058154821395874, "learning_rate": 9.966790150914798e-06, "loss": 1.0936, "step": 1825 }, { "epoch": 0.32832868830351525, "grad_norm": 1.9203919172286987, "learning_rate": 9.966723100982131e-06, "loss": 0.8415, "step": 1826 }, { "epoch": 0.328508495909377, "grad_norm": 1.5314959287643433, "learning_rate": 9.96665598365755e-06, "loss": 0.8693, "step": 1827 }, { "epoch": 0.3286883035152387, "grad_norm": 1.4428666830062866, "learning_rate": 9.966588798941965e-06, "loss": 0.8007, "step": 1828 }, { "epoch": 0.3288681111211004, "grad_norm": 1.5769941806793213, "learning_rate": 9.966521546836286e-06, "loss": 0.7971, "step": 1829 }, { "epoch": 0.32904791872696215, "grad_norm": 1.569154977798462, "learning_rate": 9.966454227341425e-06, "loss": 0.8746, "step": 1830 }, { "epoch": 0.3292277263328239, "grad_norm": 1.5457146167755127, "learning_rate": 9.966386840458298e-06, "loss": 0.8593, "step": 1831 }, { "epoch": 0.3294075339386856, "grad_norm": 2.0538318157196045, "learning_rate": 9.966319386187816e-06, "loss": 0.8397, "step": 1832 }, { "epoch": 0.32958734154454733, "grad_norm": 1.5463035106658936, "learning_rate": 9.966251864530899e-06, "loss": 0.8104, "step": 1833 }, { "epoch": 0.32976714915040906, "grad_norm": 1.530861735343933, "learning_rate": 9.96618427548846e-06, "loss": 0.7937, "step": 1834 }, { "epoch": 0.3299469567562708, "grad_norm": 1.6721742153167725, "learning_rate": 9.966116619061417e-06, "loss": 0.9231, "step": 1835 }, { "epoch": 0.3301267643621325, "grad_norm": 1.6133123636245728, "learning_rate": 9.966048895250686e-06, "loss": 0.8864, "step": 1836 }, { "epoch": 0.33030657196799423, "grad_norm": 1.2537777423858643, "learning_rate": 9.96598110405719e-06, "loss": 1.1116, "step": 1837 }, { "epoch": 0.33048637957385596, "grad_norm": 1.619435429573059, "learning_rate": 9.965913245481843e-06, "loss": 0.8624, "step": 1838 }, { "epoch": 0.3306661871797177, "grad_norm": 1.520890235900879, "learning_rate": 9.965845319525573e-06, "loss": 0.8123, "step": 1839 }, { "epoch": 0.3308459947855794, "grad_norm": 1.5147597789764404, "learning_rate": 9.965777326189297e-06, "loss": 0.9444, "step": 1840 }, { "epoch": 0.33102580239144114, "grad_norm": 1.5033808946609497, "learning_rate": 9.965709265473937e-06, "loss": 0.8522, "step": 1841 }, { "epoch": 0.33120560999730286, "grad_norm": 1.5304560661315918, "learning_rate": 9.96564113738042e-06, "loss": 0.8749, "step": 1842 }, { "epoch": 0.3313854176031646, "grad_norm": 1.429155707359314, "learning_rate": 9.965572941909667e-06, "loss": 0.8238, "step": 1843 }, { "epoch": 0.3315652252090263, "grad_norm": 1.518715262413025, "learning_rate": 9.965504679062604e-06, "loss": 0.8277, "step": 1844 }, { "epoch": 0.3317450328148881, "grad_norm": 2.139725685119629, "learning_rate": 9.965436348840158e-06, "loss": 0.7848, "step": 1845 }, { "epoch": 0.3319248404207498, "grad_norm": 1.1263635158538818, "learning_rate": 9.965367951243258e-06, "loss": 1.0812, "step": 1846 }, { "epoch": 0.33210464802661155, "grad_norm": 1.616934895515442, "learning_rate": 9.965299486272828e-06, "loss": 0.7905, "step": 1847 }, { "epoch": 0.3322844556324733, "grad_norm": 1.633898138999939, "learning_rate": 9.9652309539298e-06, "loss": 0.819, "step": 1848 }, { "epoch": 0.332464263238335, "grad_norm": 1.0536693334579468, "learning_rate": 9.965162354215103e-06, "loss": 1.1513, "step": 1849 }, { "epoch": 0.3326440708441967, "grad_norm": 1.5421018600463867, "learning_rate": 9.965093687129669e-06, "loss": 0.8354, "step": 1850 }, { "epoch": 0.33282387845005845, "grad_norm": 1.497441291809082, "learning_rate": 9.965024952674426e-06, "loss": 0.8951, "step": 1851 }, { "epoch": 0.3330036860559202, "grad_norm": 1.6368069648742676, "learning_rate": 9.964956150850312e-06, "loss": 0.9102, "step": 1852 }, { "epoch": 0.3331834936617819, "grad_norm": 1.732130765914917, "learning_rate": 9.964887281658256e-06, "loss": 0.9049, "step": 1853 }, { "epoch": 0.33336330126764363, "grad_norm": 1.6776325702667236, "learning_rate": 9.964818345099196e-06, "loss": 0.8265, "step": 1854 }, { "epoch": 0.33354310887350536, "grad_norm": 1.528887391090393, "learning_rate": 9.964749341174063e-06, "loss": 0.8577, "step": 1855 }, { "epoch": 0.3337229164793671, "grad_norm": 1.5593796968460083, "learning_rate": 9.964680269883798e-06, "loss": 0.8497, "step": 1856 }, { "epoch": 0.3339027240852288, "grad_norm": 1.562131643295288, "learning_rate": 9.964611131229335e-06, "loss": 0.8074, "step": 1857 }, { "epoch": 0.33408253169109053, "grad_norm": 1.3532638549804688, "learning_rate": 9.964541925211613e-06, "loss": 1.1337, "step": 1858 }, { "epoch": 0.33426233929695226, "grad_norm": 1.6860439777374268, "learning_rate": 9.964472651831571e-06, "loss": 0.8834, "step": 1859 }, { "epoch": 0.334442146902814, "grad_norm": 1.5383342504501343, "learning_rate": 9.96440331109015e-06, "loss": 0.9006, "step": 1860 }, { "epoch": 0.3346219545086757, "grad_norm": 1.4428483247756958, "learning_rate": 9.96433390298829e-06, "loss": 0.8594, "step": 1861 }, { "epoch": 0.33480176211453744, "grad_norm": 1.4893358945846558, "learning_rate": 9.964264427526933e-06, "loss": 0.8729, "step": 1862 }, { "epoch": 0.33498156972039916, "grad_norm": 1.6187070608139038, "learning_rate": 9.964194884707022e-06, "loss": 0.9155, "step": 1863 }, { "epoch": 0.3351613773262609, "grad_norm": 1.573286533355713, "learning_rate": 9.964125274529497e-06, "loss": 0.8545, "step": 1864 }, { "epoch": 0.3353411849321226, "grad_norm": 1.624733805656433, "learning_rate": 9.96405559699531e-06, "loss": 0.7911, "step": 1865 }, { "epoch": 0.33552099253798434, "grad_norm": 1.545291543006897, "learning_rate": 9.9639858521054e-06, "loss": 0.8259, "step": 1866 }, { "epoch": 0.33570080014384607, "grad_norm": 1.4718923568725586, "learning_rate": 9.963916039860715e-06, "loss": 0.7938, "step": 1867 }, { "epoch": 0.3358806077497078, "grad_norm": 1.198808193206787, "learning_rate": 9.963846160262203e-06, "loss": 1.1243, "step": 1868 }, { "epoch": 0.3360604153555695, "grad_norm": 1.4910334348678589, "learning_rate": 9.963776213310811e-06, "loss": 0.8265, "step": 1869 }, { "epoch": 0.33624022296143125, "grad_norm": 1.6007823944091797, "learning_rate": 9.96370619900749e-06, "loss": 0.8424, "step": 1870 }, { "epoch": 0.33642003056729297, "grad_norm": 1.5586227178573608, "learning_rate": 9.963636117353188e-06, "loss": 0.9076, "step": 1871 }, { "epoch": 0.3365998381731547, "grad_norm": 1.0834547281265259, "learning_rate": 9.963565968348858e-06, "loss": 1.1179, "step": 1872 }, { "epoch": 0.3367796457790165, "grad_norm": 1.5412389039993286, "learning_rate": 9.96349575199545e-06, "loss": 0.8647, "step": 1873 }, { "epoch": 0.3369594533848782, "grad_norm": 1.0501450300216675, "learning_rate": 9.963425468293919e-06, "loss": 1.0685, "step": 1874 }, { "epoch": 0.33713926099073993, "grad_norm": 1.6383459568023682, "learning_rate": 9.963355117245215e-06, "loss": 0.8676, "step": 1875 }, { "epoch": 0.33731906859660166, "grad_norm": 1.053362488746643, "learning_rate": 9.963284698850296e-06, "loss": 1.0877, "step": 1876 }, { "epoch": 0.3374988762024634, "grad_norm": 1.029101848602295, "learning_rate": 9.963214213110115e-06, "loss": 1.1253, "step": 1877 }, { "epoch": 0.3376786838083251, "grad_norm": 1.6642684936523438, "learning_rate": 9.96314366002563e-06, "loss": 0.8287, "step": 1878 }, { "epoch": 0.33785849141418683, "grad_norm": 1.6126995086669922, "learning_rate": 9.963073039597798e-06, "loss": 0.8816, "step": 1879 }, { "epoch": 0.33803829902004856, "grad_norm": 1.6349600553512573, "learning_rate": 9.963002351827577e-06, "loss": 0.906, "step": 1880 }, { "epoch": 0.3382181066259103, "grad_norm": 1.5348116159439087, "learning_rate": 9.962931596715926e-06, "loss": 0.8749, "step": 1881 }, { "epoch": 0.338397914231772, "grad_norm": 1.662396788597107, "learning_rate": 9.962860774263806e-06, "loss": 0.8302, "step": 1882 }, { "epoch": 0.33857772183763374, "grad_norm": 1.6253689527511597, "learning_rate": 9.962789884472177e-06, "loss": 0.9059, "step": 1883 }, { "epoch": 0.33875752944349546, "grad_norm": 1.5330387353897095, "learning_rate": 9.962718927342e-06, "loss": 0.83, "step": 1884 }, { "epoch": 0.3389373370493572, "grad_norm": 1.4024304151535034, "learning_rate": 9.96264790287424e-06, "loss": 1.0734, "step": 1885 }, { "epoch": 0.3391171446552189, "grad_norm": 1.6393312215805054, "learning_rate": 9.96257681106986e-06, "loss": 0.7927, "step": 1886 }, { "epoch": 0.33929695226108064, "grad_norm": 1.4247355461120605, "learning_rate": 9.962505651929823e-06, "loss": 0.8156, "step": 1887 }, { "epoch": 0.33947675986694237, "grad_norm": 1.5194060802459717, "learning_rate": 9.9624344254551e-06, "loss": 0.8275, "step": 1888 }, { "epoch": 0.3396565674728041, "grad_norm": 1.5311968326568604, "learning_rate": 9.962363131646649e-06, "loss": 0.8465, "step": 1889 }, { "epoch": 0.3398363750786658, "grad_norm": 1.4745137691497803, "learning_rate": 9.962291770505441e-06, "loss": 0.8652, "step": 1890 }, { "epoch": 0.34001618268452755, "grad_norm": 1.6357954740524292, "learning_rate": 9.962220342032447e-06, "loss": 0.8424, "step": 1891 }, { "epoch": 0.34019599029038927, "grad_norm": 1.6167566776275635, "learning_rate": 9.962148846228632e-06, "loss": 0.9167, "step": 1892 }, { "epoch": 0.340375797896251, "grad_norm": 1.5913640260696411, "learning_rate": 9.962077283094972e-06, "loss": 0.8816, "step": 1893 }, { "epoch": 0.3405556055021127, "grad_norm": 1.4868258237838745, "learning_rate": 9.962005652632429e-06, "loss": 0.8626, "step": 1894 }, { "epoch": 0.34073541310797445, "grad_norm": 1.5829328298568726, "learning_rate": 9.961933954841983e-06, "loss": 0.8544, "step": 1895 }, { "epoch": 0.3409152207138362, "grad_norm": 1.4646506309509277, "learning_rate": 9.961862189724606e-06, "loss": 0.8639, "step": 1896 }, { "epoch": 0.3410950283196979, "grad_norm": 1.6044185161590576, "learning_rate": 9.961790357281266e-06, "loss": 0.8129, "step": 1897 }, { "epoch": 0.3412748359255596, "grad_norm": 1.5655683279037476, "learning_rate": 9.961718457512943e-06, "loss": 0.8446, "step": 1898 }, { "epoch": 0.34145464353142135, "grad_norm": 1.590071439743042, "learning_rate": 9.961646490420611e-06, "loss": 0.8718, "step": 1899 }, { "epoch": 0.3416344511372831, "grad_norm": 1.4333763122558594, "learning_rate": 9.961574456005246e-06, "loss": 0.8528, "step": 1900 }, { "epoch": 0.34181425874314486, "grad_norm": 1.8958356380462646, "learning_rate": 9.961502354267827e-06, "loss": 0.8035, "step": 1901 }, { "epoch": 0.3419940663490066, "grad_norm": 1.5958282947540283, "learning_rate": 9.96143018520933e-06, "loss": 0.8153, "step": 1902 }, { "epoch": 0.3421738739548683, "grad_norm": 1.5466463565826416, "learning_rate": 9.961357948830737e-06, "loss": 0.8271, "step": 1903 }, { "epoch": 0.34235368156073004, "grad_norm": 1.6118954420089722, "learning_rate": 9.961285645133025e-06, "loss": 0.8738, "step": 1904 }, { "epoch": 0.34253348916659176, "grad_norm": 1.4383368492126465, "learning_rate": 9.961213274117176e-06, "loss": 0.8416, "step": 1905 }, { "epoch": 0.3427132967724535, "grad_norm": 1.0806385278701782, "learning_rate": 9.961140835784175e-06, "loss": 1.1062, "step": 1906 }, { "epoch": 0.3428931043783152, "grad_norm": 1.4962834119796753, "learning_rate": 9.961068330135002e-06, "loss": 0.851, "step": 1907 }, { "epoch": 0.34307291198417694, "grad_norm": 1.5629451274871826, "learning_rate": 9.960995757170639e-06, "loss": 0.8307, "step": 1908 }, { "epoch": 0.34325271959003867, "grad_norm": 1.7075932025909424, "learning_rate": 9.960923116892076e-06, "loss": 0.8145, "step": 1909 }, { "epoch": 0.3434325271959004, "grad_norm": 1.491405725479126, "learning_rate": 9.960850409300296e-06, "loss": 0.8114, "step": 1910 }, { "epoch": 0.3436123348017621, "grad_norm": 1.545052409172058, "learning_rate": 9.960777634396283e-06, "loss": 0.8238, "step": 1911 }, { "epoch": 0.34379214240762385, "grad_norm": 1.5279557704925537, "learning_rate": 9.960704792181027e-06, "loss": 0.8255, "step": 1912 }, { "epoch": 0.3439719500134856, "grad_norm": 1.6072415113449097, "learning_rate": 9.960631882655516e-06, "loss": 0.7628, "step": 1913 }, { "epoch": 0.3441517576193473, "grad_norm": 1.5460147857666016, "learning_rate": 9.960558905820741e-06, "loss": 0.8113, "step": 1914 }, { "epoch": 0.344331565225209, "grad_norm": 1.0376935005187988, "learning_rate": 9.960485861677689e-06, "loss": 1.1115, "step": 1915 }, { "epoch": 0.34451137283107075, "grad_norm": 1.1432033777236938, "learning_rate": 9.960412750227354e-06, "loss": 1.1182, "step": 1916 }, { "epoch": 0.3446911804369325, "grad_norm": 1.5776773691177368, "learning_rate": 9.960339571470726e-06, "loss": 0.8209, "step": 1917 }, { "epoch": 0.3448709880427942, "grad_norm": 1.4266948699951172, "learning_rate": 9.960266325408798e-06, "loss": 0.8163, "step": 1918 }, { "epoch": 0.34505079564865593, "grad_norm": 1.4949357509613037, "learning_rate": 9.960193012042566e-06, "loss": 0.8593, "step": 1919 }, { "epoch": 0.34523060325451765, "grad_norm": 1.546820044517517, "learning_rate": 9.960119631373023e-06, "loss": 0.8318, "step": 1920 }, { "epoch": 0.3454104108603794, "grad_norm": 1.707139015197754, "learning_rate": 9.960046183401165e-06, "loss": 0.8142, "step": 1921 }, { "epoch": 0.3455902184662411, "grad_norm": 1.5945770740509033, "learning_rate": 9.959972668127987e-06, "loss": 0.933, "step": 1922 }, { "epoch": 0.34577002607210283, "grad_norm": 1.1915677785873413, "learning_rate": 9.95989908555449e-06, "loss": 1.1235, "step": 1923 }, { "epoch": 0.34594983367796456, "grad_norm": 1.596434235572815, "learning_rate": 9.95982543568167e-06, "loss": 0.8855, "step": 1924 }, { "epoch": 0.3461296412838263, "grad_norm": 1.7674444913864136, "learning_rate": 9.959751718510526e-06, "loss": 0.842, "step": 1925 }, { "epoch": 0.346309448889688, "grad_norm": 1.6054577827453613, "learning_rate": 9.959677934042058e-06, "loss": 0.8484, "step": 1926 }, { "epoch": 0.34648925649554974, "grad_norm": 1.692862868309021, "learning_rate": 9.95960408227727e-06, "loss": 0.8392, "step": 1927 }, { "epoch": 0.3466690641014115, "grad_norm": 1.608269214630127, "learning_rate": 9.959530163217161e-06, "loss": 0.8009, "step": 1928 }, { "epoch": 0.34684887170727324, "grad_norm": 1.5962727069854736, "learning_rate": 9.959456176862737e-06, "loss": 0.8519, "step": 1929 }, { "epoch": 0.34702867931313497, "grad_norm": 1.5782051086425781, "learning_rate": 9.959382123215e-06, "loss": 0.8386, "step": 1930 }, { "epoch": 0.3472084869189967, "grad_norm": 1.5978833436965942, "learning_rate": 9.959308002274954e-06, "loss": 0.7999, "step": 1931 }, { "epoch": 0.3473882945248584, "grad_norm": 1.5882831811904907, "learning_rate": 9.959233814043606e-06, "loss": 0.8705, "step": 1932 }, { "epoch": 0.34756810213072015, "grad_norm": 1.549277901649475, "learning_rate": 9.959159558521962e-06, "loss": 0.8265, "step": 1933 }, { "epoch": 0.3477479097365819, "grad_norm": 1.571807622909546, "learning_rate": 9.95908523571103e-06, "loss": 0.852, "step": 1934 }, { "epoch": 0.3479277173424436, "grad_norm": 1.2822281122207642, "learning_rate": 9.959010845611819e-06, "loss": 1.1326, "step": 1935 }, { "epoch": 0.3481075249483053, "grad_norm": 1.4354074001312256, "learning_rate": 9.958936388225338e-06, "loss": 0.7757, "step": 1936 }, { "epoch": 0.34828733255416705, "grad_norm": 1.5392545461654663, "learning_rate": 9.958861863552596e-06, "loss": 0.8191, "step": 1937 }, { "epoch": 0.3484671401600288, "grad_norm": 1.5748779773712158, "learning_rate": 9.958787271594606e-06, "loss": 0.7935, "step": 1938 }, { "epoch": 0.3486469477658905, "grad_norm": 1.5538076162338257, "learning_rate": 9.958712612352379e-06, "loss": 0.8651, "step": 1939 }, { "epoch": 0.34882675537175223, "grad_norm": 1.0754075050354004, "learning_rate": 9.958637885826927e-06, "loss": 1.0627, "step": 1940 }, { "epoch": 0.34900656297761395, "grad_norm": 1.5301320552825928, "learning_rate": 9.958563092019266e-06, "loss": 0.8292, "step": 1941 }, { "epoch": 0.3491863705834757, "grad_norm": 1.488305687904358, "learning_rate": 9.958488230930411e-06, "loss": 0.7877, "step": 1942 }, { "epoch": 0.3493661781893374, "grad_norm": 1.6579214334487915, "learning_rate": 9.958413302561377e-06, "loss": 0.849, "step": 1943 }, { "epoch": 0.34954598579519913, "grad_norm": 1.545333981513977, "learning_rate": 9.95833830691318e-06, "loss": 0.8775, "step": 1944 }, { "epoch": 0.34972579340106086, "grad_norm": 1.5441216230392456, "learning_rate": 9.958263243986839e-06, "loss": 0.841, "step": 1945 }, { "epoch": 0.3499056010069226, "grad_norm": 1.4603362083435059, "learning_rate": 9.95818811378337e-06, "loss": 0.864, "step": 1946 }, { "epoch": 0.3500854086127843, "grad_norm": 1.05051589012146, "learning_rate": 9.958112916303795e-06, "loss": 1.1174, "step": 1947 }, { "epoch": 0.35026521621864604, "grad_norm": 1.714518427848816, "learning_rate": 9.958037651549135e-06, "loss": 0.8512, "step": 1948 }, { "epoch": 0.35044502382450776, "grad_norm": 1.455753207206726, "learning_rate": 9.957962319520407e-06, "loss": 0.829, "step": 1949 }, { "epoch": 0.3506248314303695, "grad_norm": 1.6269257068634033, "learning_rate": 9.957886920218639e-06, "loss": 0.8591, "step": 1950 }, { "epoch": 0.3508046390362312, "grad_norm": 1.7434918880462646, "learning_rate": 9.957811453644848e-06, "loss": 0.8278, "step": 1951 }, { "epoch": 0.35098444664209294, "grad_norm": 1.654942512512207, "learning_rate": 9.957735919800062e-06, "loss": 0.8491, "step": 1952 }, { "epoch": 0.35116425424795467, "grad_norm": 1.5923858880996704, "learning_rate": 9.957660318685305e-06, "loss": 0.8929, "step": 1953 }, { "epoch": 0.3513440618538164, "grad_norm": 1.9494975805282593, "learning_rate": 9.957584650301602e-06, "loss": 0.8784, "step": 1954 }, { "epoch": 0.3515238694596781, "grad_norm": 1.4500317573547363, "learning_rate": 9.95750891464998e-06, "loss": 0.7744, "step": 1955 }, { "epoch": 0.3517036770655399, "grad_norm": 1.6152517795562744, "learning_rate": 9.957433111731468e-06, "loss": 0.8249, "step": 1956 }, { "epoch": 0.3518834846714016, "grad_norm": 1.2157578468322754, "learning_rate": 9.957357241547094e-06, "loss": 1.0737, "step": 1957 }, { "epoch": 0.35206329227726335, "grad_norm": 1.5282927751541138, "learning_rate": 9.957281304097886e-06, "loss": 0.7974, "step": 1958 }, { "epoch": 0.3522430998831251, "grad_norm": 1.5799062252044678, "learning_rate": 9.957205299384875e-06, "loss": 0.8361, "step": 1959 }, { "epoch": 0.3524229074889868, "grad_norm": 1.5504999160766602, "learning_rate": 9.957129227409093e-06, "loss": 0.886, "step": 1960 }, { "epoch": 0.35260271509484853, "grad_norm": 1.5920610427856445, "learning_rate": 9.957053088171572e-06, "loss": 0.8562, "step": 1961 }, { "epoch": 0.35278252270071025, "grad_norm": 1.6309843063354492, "learning_rate": 9.956976881673345e-06, "loss": 0.8203, "step": 1962 }, { "epoch": 0.352962330306572, "grad_norm": 1.5760855674743652, "learning_rate": 9.956900607915446e-06, "loss": 0.899, "step": 1963 }, { "epoch": 0.3531421379124337, "grad_norm": 1.5350751876831055, "learning_rate": 9.95682426689891e-06, "loss": 0.9035, "step": 1964 }, { "epoch": 0.35332194551829543, "grad_norm": 1.62566077709198, "learning_rate": 9.956747858624772e-06, "loss": 0.8371, "step": 1965 }, { "epoch": 0.35350175312415716, "grad_norm": 1.5852409601211548, "learning_rate": 9.95667138309407e-06, "loss": 0.8908, "step": 1966 }, { "epoch": 0.3536815607300189, "grad_norm": 1.6122584342956543, "learning_rate": 9.95659484030784e-06, "loss": 0.825, "step": 1967 }, { "epoch": 0.3538613683358806, "grad_norm": 1.559260606765747, "learning_rate": 9.956518230267123e-06, "loss": 0.8895, "step": 1968 }, { "epoch": 0.35404117594174234, "grad_norm": 1.1520968675613403, "learning_rate": 9.956441552972958e-06, "loss": 1.0715, "step": 1969 }, { "epoch": 0.35422098354760406, "grad_norm": 1.6529231071472168, "learning_rate": 9.956364808426383e-06, "loss": 0.8972, "step": 1970 }, { "epoch": 0.3544007911534658, "grad_norm": 1.5951528549194336, "learning_rate": 9.956287996628442e-06, "loss": 0.8397, "step": 1971 }, { "epoch": 0.3545805987593275, "grad_norm": 1.429672360420227, "learning_rate": 9.956211117580175e-06, "loss": 0.841, "step": 1972 }, { "epoch": 0.35476040636518924, "grad_norm": 1.6015816926956177, "learning_rate": 9.956134171282628e-06, "loss": 0.8312, "step": 1973 }, { "epoch": 0.35494021397105097, "grad_norm": 1.5028796195983887, "learning_rate": 9.956057157736842e-06, "loss": 0.7802, "step": 1974 }, { "epoch": 0.3551200215769127, "grad_norm": 1.0602349042892456, "learning_rate": 9.955980076943866e-06, "loss": 1.1263, "step": 1975 }, { "epoch": 0.3552998291827744, "grad_norm": 1.6071321964263916, "learning_rate": 9.955902928904739e-06, "loss": 0.8612, "step": 1976 }, { "epoch": 0.35547963678863614, "grad_norm": 1.0395020246505737, "learning_rate": 9.955825713620515e-06, "loss": 1.1189, "step": 1977 }, { "epoch": 0.35565944439449787, "grad_norm": 1.4506264925003052, "learning_rate": 9.955748431092238e-06, "loss": 0.8197, "step": 1978 }, { "epoch": 0.3558392520003596, "grad_norm": 1.495985746383667, "learning_rate": 9.955671081320958e-06, "loss": 0.8752, "step": 1979 }, { "epoch": 0.3560190596062213, "grad_norm": 1.6128417253494263, "learning_rate": 9.955593664307723e-06, "loss": 0.8797, "step": 1980 }, { "epoch": 0.35619886721208305, "grad_norm": 1.5959410667419434, "learning_rate": 9.955516180053585e-06, "loss": 0.8308, "step": 1981 }, { "epoch": 0.3563786748179448, "grad_norm": 1.5179420709609985, "learning_rate": 9.955438628559594e-06, "loss": 0.8438, "step": 1982 }, { "epoch": 0.3565584824238065, "grad_norm": 1.2951949834823608, "learning_rate": 9.955361009826805e-06, "loss": 1.1233, "step": 1983 }, { "epoch": 0.3567382900296683, "grad_norm": 1.6112958192825317, "learning_rate": 9.955283323856267e-06, "loss": 0.7828, "step": 1984 }, { "epoch": 0.35691809763553, "grad_norm": 1.4873008728027344, "learning_rate": 9.955205570649039e-06, "loss": 0.8159, "step": 1985 }, { "epoch": 0.35709790524139173, "grad_norm": 1.4862871170043945, "learning_rate": 9.955127750206171e-06, "loss": 0.8496, "step": 1986 }, { "epoch": 0.35727771284725346, "grad_norm": 1.6377148628234863, "learning_rate": 9.95504986252872e-06, "loss": 0.8501, "step": 1987 }, { "epoch": 0.3574575204531152, "grad_norm": 1.4960345029830933, "learning_rate": 9.954971907617747e-06, "loss": 0.8358, "step": 1988 }, { "epoch": 0.3576373280589769, "grad_norm": 1.0615839958190918, "learning_rate": 9.954893885474305e-06, "loss": 1.1055, "step": 1989 }, { "epoch": 0.35781713566483864, "grad_norm": 0.9943519234657288, "learning_rate": 9.954815796099454e-06, "loss": 1.0832, "step": 1990 }, { "epoch": 0.35799694327070036, "grad_norm": 1.4777911901474, "learning_rate": 9.954737639494257e-06, "loss": 0.8597, "step": 1991 }, { "epoch": 0.3581767508765621, "grad_norm": 1.6151466369628906, "learning_rate": 9.95465941565977e-06, "loss": 0.8712, "step": 1992 }, { "epoch": 0.3583565584824238, "grad_norm": 1.607988953590393, "learning_rate": 9.954581124597057e-06, "loss": 0.8376, "step": 1993 }, { "epoch": 0.35853636608828554, "grad_norm": 1.1628731489181519, "learning_rate": 9.954502766307175e-06, "loss": 1.1134, "step": 1994 }, { "epoch": 0.35871617369414727, "grad_norm": 1.0779715776443481, "learning_rate": 9.954424340791195e-06, "loss": 1.1018, "step": 1995 }, { "epoch": 0.358895981300009, "grad_norm": 1.8720892667770386, "learning_rate": 9.954345848050178e-06, "loss": 0.8302, "step": 1996 }, { "epoch": 0.3590757889058707, "grad_norm": 1.560768723487854, "learning_rate": 9.954267288085186e-06, "loss": 0.7844, "step": 1997 }, { "epoch": 0.35925559651173244, "grad_norm": 1.165230393409729, "learning_rate": 9.954188660897289e-06, "loss": 1.0402, "step": 1998 }, { "epoch": 0.35943540411759417, "grad_norm": 1.5735344886779785, "learning_rate": 9.954109966487552e-06, "loss": 0.8634, "step": 1999 }, { "epoch": 0.3596152117234559, "grad_norm": 1.0883795022964478, "learning_rate": 9.954031204857044e-06, "loss": 1.1151, "step": 2000 }, { "epoch": 0.3596152117234559, "eval_loss": 0.8737280368804932, "eval_runtime": 148.6255, "eval_samples_per_second": 96.767, "eval_steps_per_second": 1.514, "step": 2000 }, { "epoch": 0.3597950193293176, "grad_norm": 1.780922532081604, "learning_rate": 9.953952376006833e-06, "loss": 0.8555, "step": 2001 }, { "epoch": 0.35997482693517935, "grad_norm": 1.678255319595337, "learning_rate": 9.953873479937988e-06, "loss": 0.8462, "step": 2002 }, { "epoch": 0.3601546345410411, "grad_norm": 1.5691519975662231, "learning_rate": 9.95379451665158e-06, "loss": 0.8565, "step": 2003 }, { "epoch": 0.3603344421469028, "grad_norm": 1.4438846111297607, "learning_rate": 9.95371548614868e-06, "loss": 0.7756, "step": 2004 }, { "epoch": 0.3605142497527645, "grad_norm": 1.583592414855957, "learning_rate": 9.953636388430364e-06, "loss": 0.8351, "step": 2005 }, { "epoch": 0.36069405735862625, "grad_norm": 1.0423777103424072, "learning_rate": 9.953557223497698e-06, "loss": 1.0814, "step": 2006 }, { "epoch": 0.360873864964488, "grad_norm": 1.5397202968597412, "learning_rate": 9.953477991351762e-06, "loss": 0.8858, "step": 2007 }, { "epoch": 0.3610536725703497, "grad_norm": 1.7657012939453125, "learning_rate": 9.953398691993629e-06, "loss": 0.8816, "step": 2008 }, { "epoch": 0.36123348017621143, "grad_norm": 1.1417876482009888, "learning_rate": 9.953319325424375e-06, "loss": 1.0546, "step": 2009 }, { "epoch": 0.36141328778207316, "grad_norm": 1.582685947418213, "learning_rate": 9.953239891645078e-06, "loss": 0.873, "step": 2010 }, { "epoch": 0.3615930953879349, "grad_norm": 1.090479850769043, "learning_rate": 9.953160390656813e-06, "loss": 1.0825, "step": 2011 }, { "epoch": 0.36177290299379666, "grad_norm": 2.542325019836426, "learning_rate": 9.953080822460664e-06, "loss": 0.8943, "step": 2012 }, { "epoch": 0.3619527105996584, "grad_norm": 1.6300365924835205, "learning_rate": 9.953001187057705e-06, "loss": 0.8387, "step": 2013 }, { "epoch": 0.3621325182055201, "grad_norm": 1.5792402029037476, "learning_rate": 9.95292148444902e-06, "loss": 0.8487, "step": 2014 }, { "epoch": 0.36231232581138184, "grad_norm": 1.5371519327163696, "learning_rate": 9.952841714635688e-06, "loss": 0.8775, "step": 2015 }, { "epoch": 0.36249213341724357, "grad_norm": 1.558544397354126, "learning_rate": 9.952761877618794e-06, "loss": 0.8903, "step": 2016 }, { "epoch": 0.3626719410231053, "grad_norm": 1.6263394355773926, "learning_rate": 9.95268197339942e-06, "loss": 0.9073, "step": 2017 }, { "epoch": 0.362851748628967, "grad_norm": 1.4981852769851685, "learning_rate": 9.952602001978648e-06, "loss": 0.782, "step": 2018 }, { "epoch": 0.36303155623482875, "grad_norm": 1.624989628791809, "learning_rate": 9.952521963357568e-06, "loss": 0.9221, "step": 2019 }, { "epoch": 0.36321136384069047, "grad_norm": 1.5554983615875244, "learning_rate": 9.952441857537262e-06, "loss": 0.8425, "step": 2020 }, { "epoch": 0.3633911714465522, "grad_norm": 1.6879533529281616, "learning_rate": 9.95236168451882e-06, "loss": 0.8256, "step": 2021 }, { "epoch": 0.3635709790524139, "grad_norm": 1.2585091590881348, "learning_rate": 9.952281444303327e-06, "loss": 1.0927, "step": 2022 }, { "epoch": 0.36375078665827565, "grad_norm": 1.6282538175582886, "learning_rate": 9.952201136891873e-06, "loss": 0.8539, "step": 2023 }, { "epoch": 0.3639305942641374, "grad_norm": 1.6022804975509644, "learning_rate": 9.952120762285546e-06, "loss": 0.8852, "step": 2024 }, { "epoch": 0.3641104018699991, "grad_norm": 2.4350082874298096, "learning_rate": 9.952040320485439e-06, "loss": 0.8553, "step": 2025 }, { "epoch": 0.3642902094758608, "grad_norm": 1.5524661540985107, "learning_rate": 9.951959811492644e-06, "loss": 0.9205, "step": 2026 }, { "epoch": 0.36447001708172255, "grad_norm": 1.116573691368103, "learning_rate": 9.951879235308251e-06, "loss": 1.1026, "step": 2027 }, { "epoch": 0.3646498246875843, "grad_norm": 1.0155696868896484, "learning_rate": 9.951798591933356e-06, "loss": 1.1252, "step": 2028 }, { "epoch": 0.364829632293446, "grad_norm": 1.5050283670425415, "learning_rate": 9.951717881369047e-06, "loss": 0.8346, "step": 2029 }, { "epoch": 0.36500943989930773, "grad_norm": 0.9758833050727844, "learning_rate": 9.951637103616427e-06, "loss": 1.119, "step": 2030 }, { "epoch": 0.36518924750516946, "grad_norm": 1.7460007667541504, "learning_rate": 9.951556258676589e-06, "loss": 0.7844, "step": 2031 }, { "epoch": 0.3653690551110312, "grad_norm": 1.5213004350662231, "learning_rate": 9.951475346550628e-06, "loss": 0.8758, "step": 2032 }, { "epoch": 0.3655488627168929, "grad_norm": 1.5717092752456665, "learning_rate": 9.951394367239645e-06, "loss": 0.7672, "step": 2033 }, { "epoch": 0.36572867032275463, "grad_norm": 1.62686288356781, "learning_rate": 9.951313320744738e-06, "loss": 0.7733, "step": 2034 }, { "epoch": 0.36590847792861636, "grad_norm": 1.6647531986236572, "learning_rate": 9.951232207067004e-06, "loss": 0.7687, "step": 2035 }, { "epoch": 0.3660882855344781, "grad_norm": 1.570540428161621, "learning_rate": 9.951151026207546e-06, "loss": 0.8626, "step": 2036 }, { "epoch": 0.3662680931403398, "grad_norm": 1.5462818145751953, "learning_rate": 9.951069778167467e-06, "loss": 0.8681, "step": 2037 }, { "epoch": 0.36644790074620154, "grad_norm": 1.584170937538147, "learning_rate": 9.950988462947865e-06, "loss": 0.8059, "step": 2038 }, { "epoch": 0.3666277083520633, "grad_norm": 1.619003415107727, "learning_rate": 9.950907080549847e-06, "loss": 0.8409, "step": 2039 }, { "epoch": 0.36680751595792505, "grad_norm": 1.559859275817871, "learning_rate": 9.950825630974517e-06, "loss": 0.9355, "step": 2040 }, { "epoch": 0.36698732356378677, "grad_norm": 1.4977723360061646, "learning_rate": 9.950744114222979e-06, "loss": 0.8448, "step": 2041 }, { "epoch": 0.3671671311696485, "grad_norm": 1.59074866771698, "learning_rate": 9.95066253029634e-06, "loss": 0.8085, "step": 2042 }, { "epoch": 0.3673469387755102, "grad_norm": 1.6230549812316895, "learning_rate": 9.950580879195704e-06, "loss": 0.8812, "step": 2043 }, { "epoch": 0.36752674638137195, "grad_norm": 1.537979245185852, "learning_rate": 9.950499160922184e-06, "loss": 0.8807, "step": 2044 }, { "epoch": 0.3677065539872337, "grad_norm": 1.6341042518615723, "learning_rate": 9.950417375476883e-06, "loss": 0.8822, "step": 2045 }, { "epoch": 0.3678863615930954, "grad_norm": 1.4926822185516357, "learning_rate": 9.950335522860917e-06, "loss": 0.8417, "step": 2046 }, { "epoch": 0.3680661691989571, "grad_norm": 1.5950288772583008, "learning_rate": 9.950253603075393e-06, "loss": 0.8019, "step": 2047 }, { "epoch": 0.36824597680481885, "grad_norm": 1.487197756767273, "learning_rate": 9.950171616121423e-06, "loss": 0.8396, "step": 2048 }, { "epoch": 0.3684257844106806, "grad_norm": 1.6043163537979126, "learning_rate": 9.950089562000118e-06, "loss": 0.9577, "step": 2049 }, { "epoch": 0.3686055920165423, "grad_norm": 1.5562711954116821, "learning_rate": 9.950007440712593e-06, "loss": 0.7935, "step": 2050 }, { "epoch": 0.36878539962240403, "grad_norm": 1.6129634380340576, "learning_rate": 9.949925252259964e-06, "loss": 0.8147, "step": 2051 }, { "epoch": 0.36896520722826576, "grad_norm": 1.592318058013916, "learning_rate": 9.949842996643342e-06, "loss": 0.8987, "step": 2052 }, { "epoch": 0.3691450148341275, "grad_norm": 1.542807698249817, "learning_rate": 9.949760673863846e-06, "loss": 0.8027, "step": 2053 }, { "epoch": 0.3693248224399892, "grad_norm": 1.24369215965271, "learning_rate": 9.949678283922593e-06, "loss": 1.1057, "step": 2054 }, { "epoch": 0.36950463004585093, "grad_norm": 1.4628599882125854, "learning_rate": 9.9495958268207e-06, "loss": 0.7962, "step": 2055 }, { "epoch": 0.36968443765171266, "grad_norm": 1.6548447608947754, "learning_rate": 9.949513302559287e-06, "loss": 0.8855, "step": 2056 }, { "epoch": 0.3698642452575744, "grad_norm": 1.6694661378860474, "learning_rate": 9.949430711139471e-06, "loss": 0.8789, "step": 2057 }, { "epoch": 0.3700440528634361, "grad_norm": 1.5897654294967651, "learning_rate": 9.949348052562378e-06, "loss": 0.8983, "step": 2058 }, { "epoch": 0.37022386046929784, "grad_norm": 1.62803316116333, "learning_rate": 9.949265326829122e-06, "loss": 0.9071, "step": 2059 }, { "epoch": 0.37040366807515956, "grad_norm": 1.534269094467163, "learning_rate": 9.949182533940834e-06, "loss": 0.8492, "step": 2060 }, { "epoch": 0.3705834756810213, "grad_norm": 1.6501551866531372, "learning_rate": 9.94909967389863e-06, "loss": 0.8774, "step": 2061 }, { "epoch": 0.370763283286883, "grad_norm": 1.6894670724868774, "learning_rate": 9.949016746703637e-06, "loss": 0.8787, "step": 2062 }, { "epoch": 0.37094309089274474, "grad_norm": 1.4678151607513428, "learning_rate": 9.948933752356982e-06, "loss": 0.7667, "step": 2063 }, { "epoch": 0.37112289849860647, "grad_norm": 1.4553052186965942, "learning_rate": 9.94885069085979e-06, "loss": 0.8359, "step": 2064 }, { "epoch": 0.3713027061044682, "grad_norm": 1.5978178977966309, "learning_rate": 9.948767562213186e-06, "loss": 0.8243, "step": 2065 }, { "epoch": 0.3714825137103299, "grad_norm": 1.5381476879119873, "learning_rate": 9.9486843664183e-06, "loss": 0.8509, "step": 2066 }, { "epoch": 0.3716623213161917, "grad_norm": 1.4875235557556152, "learning_rate": 9.948601103476261e-06, "loss": 0.8635, "step": 2067 }, { "epoch": 0.3718421289220534, "grad_norm": 1.6053053140640259, "learning_rate": 9.948517773388199e-06, "loss": 0.7775, "step": 2068 }, { "epoch": 0.37202193652791515, "grad_norm": 1.4967793226242065, "learning_rate": 9.948434376155242e-06, "loss": 0.8231, "step": 2069 }, { "epoch": 0.3722017441337769, "grad_norm": 1.4570399522781372, "learning_rate": 9.948350911778526e-06, "loss": 0.8849, "step": 2070 }, { "epoch": 0.3723815517396386, "grad_norm": 1.6006746292114258, "learning_rate": 9.94826738025918e-06, "loss": 0.8741, "step": 2071 }, { "epoch": 0.37256135934550033, "grad_norm": 1.6586648225784302, "learning_rate": 9.948183781598337e-06, "loss": 0.7934, "step": 2072 }, { "epoch": 0.37274116695136206, "grad_norm": 1.5412650108337402, "learning_rate": 9.948100115797134e-06, "loss": 0.8714, "step": 2073 }, { "epoch": 0.3729209745572238, "grad_norm": 1.4242615699768066, "learning_rate": 9.948016382856706e-06, "loss": 0.7959, "step": 2074 }, { "epoch": 0.3731007821630855, "grad_norm": 1.2890387773513794, "learning_rate": 9.947932582778188e-06, "loss": 1.0695, "step": 2075 }, { "epoch": 0.37328058976894724, "grad_norm": 1.5797587633132935, "learning_rate": 9.947848715562715e-06, "loss": 0.8748, "step": 2076 }, { "epoch": 0.37346039737480896, "grad_norm": 1.9447070360183716, "learning_rate": 9.947764781211428e-06, "loss": 0.8569, "step": 2077 }, { "epoch": 0.3736402049806707, "grad_norm": 1.5877748727798462, "learning_rate": 9.947680779725466e-06, "loss": 0.84, "step": 2078 }, { "epoch": 0.3738200125865324, "grad_norm": 1.6266748905181885, "learning_rate": 9.947596711105969e-06, "loss": 0.8518, "step": 2079 }, { "epoch": 0.37399982019239414, "grad_norm": 1.556894063949585, "learning_rate": 9.947512575354075e-06, "loss": 0.9253, "step": 2080 }, { "epoch": 0.37417962779825586, "grad_norm": 1.093104362487793, "learning_rate": 9.947428372470926e-06, "loss": 1.1214, "step": 2081 }, { "epoch": 0.3743594354041176, "grad_norm": 1.583372950553894, "learning_rate": 9.947344102457669e-06, "loss": 0.7891, "step": 2082 }, { "epoch": 0.3745392430099793, "grad_norm": 1.5803518295288086, "learning_rate": 9.94725976531544e-06, "loss": 0.8413, "step": 2083 }, { "epoch": 0.37471905061584104, "grad_norm": 1.6542713642120361, "learning_rate": 9.94717536104539e-06, "loss": 0.8944, "step": 2084 }, { "epoch": 0.37489885822170277, "grad_norm": 1.5997506380081177, "learning_rate": 9.947090889648662e-06, "loss": 0.9099, "step": 2085 }, { "epoch": 0.3750786658275645, "grad_norm": 1.529154658317566, "learning_rate": 9.9470063511264e-06, "loss": 0.8577, "step": 2086 }, { "epoch": 0.3752584734334262, "grad_norm": 1.5127859115600586, "learning_rate": 9.946921745479755e-06, "loss": 0.8567, "step": 2087 }, { "epoch": 0.37543828103928795, "grad_norm": 1.5617406368255615, "learning_rate": 9.946837072709871e-06, "loss": 0.7698, "step": 2088 }, { "epoch": 0.3756180886451497, "grad_norm": 1.492639183998108, "learning_rate": 9.9467523328179e-06, "loss": 0.8077, "step": 2089 }, { "epoch": 0.3757978962510114, "grad_norm": 1.5762544870376587, "learning_rate": 9.946667525804991e-06, "loss": 0.8466, "step": 2090 }, { "epoch": 0.3759777038568731, "grad_norm": 1.5036773681640625, "learning_rate": 9.946582651672294e-06, "loss": 0.8587, "step": 2091 }, { "epoch": 0.37615751146273485, "grad_norm": 1.5018805265426636, "learning_rate": 9.946497710420962e-06, "loss": 0.9127, "step": 2092 }, { "epoch": 0.3763373190685966, "grad_norm": 1.5577253103256226, "learning_rate": 9.946412702052143e-06, "loss": 0.8368, "step": 2093 }, { "epoch": 0.3765171266744583, "grad_norm": 1.496537685394287, "learning_rate": 9.946327626566999e-06, "loss": 0.8702, "step": 2094 }, { "epoch": 0.3766969342803201, "grad_norm": 1.1382579803466797, "learning_rate": 9.946242483966675e-06, "loss": 1.121, "step": 2095 }, { "epoch": 0.3768767418861818, "grad_norm": 1.552796483039856, "learning_rate": 9.946157274252333e-06, "loss": 0.8947, "step": 2096 }, { "epoch": 0.37705654949204354, "grad_norm": 1.4756993055343628, "learning_rate": 9.946071997425126e-06, "loss": 0.8436, "step": 2097 }, { "epoch": 0.37723635709790526, "grad_norm": 1.5470733642578125, "learning_rate": 9.945986653486213e-06, "loss": 0.84, "step": 2098 }, { "epoch": 0.377416164703767, "grad_norm": 1.515239953994751, "learning_rate": 9.94590124243675e-06, "loss": 0.7235, "step": 2099 }, { "epoch": 0.3775959723096287, "grad_norm": 1.1274245977401733, "learning_rate": 9.945815764277898e-06, "loss": 1.1037, "step": 2100 }, { "epoch": 0.37777577991549044, "grad_norm": 1.5559329986572266, "learning_rate": 9.945730219010815e-06, "loss": 0.8402, "step": 2101 }, { "epoch": 0.37795558752135217, "grad_norm": 1.6516027450561523, "learning_rate": 9.94564460663666e-06, "loss": 0.8823, "step": 2102 }, { "epoch": 0.3781353951272139, "grad_norm": 1.4947891235351562, "learning_rate": 9.9455589271566e-06, "loss": 0.9161, "step": 2103 }, { "epoch": 0.3783152027330756, "grad_norm": 1.8008408546447754, "learning_rate": 9.945473180571794e-06, "loss": 0.8541, "step": 2104 }, { "epoch": 0.37849501033893734, "grad_norm": 1.609403133392334, "learning_rate": 9.945387366883406e-06, "loss": 0.8493, "step": 2105 }, { "epoch": 0.37867481794479907, "grad_norm": 1.4761962890625, "learning_rate": 9.9453014860926e-06, "loss": 0.8522, "step": 2106 }, { "epoch": 0.3788546255506608, "grad_norm": 1.7218515872955322, "learning_rate": 9.94521553820054e-06, "loss": 0.8749, "step": 2107 }, { "epoch": 0.3790344331565225, "grad_norm": 1.5720303058624268, "learning_rate": 9.945129523208396e-06, "loss": 0.7869, "step": 2108 }, { "epoch": 0.37921424076238425, "grad_norm": 1.545831322669983, "learning_rate": 9.945043441117335e-06, "loss": 0.8525, "step": 2109 }, { "epoch": 0.379394048368246, "grad_norm": 1.5199975967407227, "learning_rate": 9.94495729192852e-06, "loss": 0.7754, "step": 2110 }, { "epoch": 0.3795738559741077, "grad_norm": 1.4861736297607422, "learning_rate": 9.944871075643125e-06, "loss": 0.8258, "step": 2111 }, { "epoch": 0.3797536635799694, "grad_norm": 2.7864935398101807, "learning_rate": 9.944784792262316e-06, "loss": 0.8719, "step": 2112 }, { "epoch": 0.37993347118583115, "grad_norm": 1.5374789237976074, "learning_rate": 9.944698441787267e-06, "loss": 0.8874, "step": 2113 }, { "epoch": 0.3801132787916929, "grad_norm": 1.555168628692627, "learning_rate": 9.944612024219148e-06, "loss": 0.8608, "step": 2114 }, { "epoch": 0.3802930863975546, "grad_norm": 1.1232681274414062, "learning_rate": 9.944525539559131e-06, "loss": 1.1032, "step": 2115 }, { "epoch": 0.38047289400341633, "grad_norm": 1.6206635236740112, "learning_rate": 9.944438987808391e-06, "loss": 0.8161, "step": 2116 }, { "epoch": 0.38065270160927805, "grad_norm": 1.50304114818573, "learning_rate": 9.944352368968102e-06, "loss": 0.7874, "step": 2117 }, { "epoch": 0.3808325092151398, "grad_norm": 1.0648069381713867, "learning_rate": 9.944265683039439e-06, "loss": 1.0769, "step": 2118 }, { "epoch": 0.3810123168210015, "grad_norm": 1.4909359216690063, "learning_rate": 9.944178930023579e-06, "loss": 0.8178, "step": 2119 }, { "epoch": 0.38119212442686323, "grad_norm": 1.177664875984192, "learning_rate": 9.944092109921697e-06, "loss": 1.1619, "step": 2120 }, { "epoch": 0.38137193203272496, "grad_norm": 1.0117907524108887, "learning_rate": 9.944005222734971e-06, "loss": 1.073, "step": 2121 }, { "epoch": 0.38155173963858674, "grad_norm": 1.5709589719772339, "learning_rate": 9.943918268464583e-06, "loss": 0.8162, "step": 2122 }, { "epoch": 0.38173154724444847, "grad_norm": 1.4930270910263062, "learning_rate": 9.943831247111711e-06, "loss": 0.7695, "step": 2123 }, { "epoch": 0.3819113548503102, "grad_norm": 1.5026451349258423, "learning_rate": 9.943744158677538e-06, "loss": 0.8287, "step": 2124 }, { "epoch": 0.3820911624561719, "grad_norm": 1.7383097410202026, "learning_rate": 9.943657003163241e-06, "loss": 0.8829, "step": 2125 }, { "epoch": 0.38227097006203364, "grad_norm": 1.5084149837493896, "learning_rate": 9.943569780570007e-06, "loss": 0.8456, "step": 2126 }, { "epoch": 0.38245077766789537, "grad_norm": 1.2113597393035889, "learning_rate": 9.943482490899015e-06, "loss": 1.0828, "step": 2127 }, { "epoch": 0.3826305852737571, "grad_norm": 1.4438271522521973, "learning_rate": 9.943395134151455e-06, "loss": 0.7778, "step": 2128 }, { "epoch": 0.3828103928796188, "grad_norm": 1.6835744380950928, "learning_rate": 9.943307710328507e-06, "loss": 0.8941, "step": 2129 }, { "epoch": 0.38299020048548055, "grad_norm": 1.55042564868927, "learning_rate": 9.943220219431362e-06, "loss": 0.8442, "step": 2130 }, { "epoch": 0.3831700080913423, "grad_norm": 1.46316397190094, "learning_rate": 9.943132661461204e-06, "loss": 0.8324, "step": 2131 }, { "epoch": 0.383349815697204, "grad_norm": 1.580332636833191, "learning_rate": 9.943045036419221e-06, "loss": 0.8351, "step": 2132 }, { "epoch": 0.3835296233030657, "grad_norm": 1.6825112104415894, "learning_rate": 9.942957344306603e-06, "loss": 0.8166, "step": 2133 }, { "epoch": 0.38370943090892745, "grad_norm": 1.5184723138809204, "learning_rate": 9.942869585124539e-06, "loss": 0.8635, "step": 2134 }, { "epoch": 0.3838892385147892, "grad_norm": 2.0074877738952637, "learning_rate": 9.942781758874223e-06, "loss": 0.8113, "step": 2135 }, { "epoch": 0.3840690461206509, "grad_norm": 1.63437819480896, "learning_rate": 9.942693865556843e-06, "loss": 0.9059, "step": 2136 }, { "epoch": 0.38424885372651263, "grad_norm": 1.4110231399536133, "learning_rate": 9.942605905173593e-06, "loss": 0.782, "step": 2137 }, { "epoch": 0.38442866133237436, "grad_norm": 1.6423869132995605, "learning_rate": 9.942517877725664e-06, "loss": 0.7666, "step": 2138 }, { "epoch": 0.3846084689382361, "grad_norm": 1.5281344652175903, "learning_rate": 9.942429783214255e-06, "loss": 0.9304, "step": 2139 }, { "epoch": 0.3847882765440978, "grad_norm": 1.4955940246582031, "learning_rate": 9.942341621640558e-06, "loss": 0.9077, "step": 2140 }, { "epoch": 0.38496808414995953, "grad_norm": 1.5392431020736694, "learning_rate": 9.94225339300577e-06, "loss": 0.8027, "step": 2141 }, { "epoch": 0.38514789175582126, "grad_norm": 1.5047482252120972, "learning_rate": 9.942165097311089e-06, "loss": 0.8996, "step": 2142 }, { "epoch": 0.385327699361683, "grad_norm": 1.463209867477417, "learning_rate": 9.942076734557712e-06, "loss": 0.8652, "step": 2143 }, { "epoch": 0.3855075069675447, "grad_norm": 1.633591890335083, "learning_rate": 9.94198830474684e-06, "loss": 0.8218, "step": 2144 }, { "epoch": 0.38568731457340644, "grad_norm": 1.5935592651367188, "learning_rate": 9.941899807879669e-06, "loss": 0.7932, "step": 2145 }, { "epoch": 0.38586712217926816, "grad_norm": 1.6220169067382812, "learning_rate": 9.941811243957404e-06, "loss": 0.8667, "step": 2146 }, { "epoch": 0.3860469297851299, "grad_norm": 1.5375585556030273, "learning_rate": 9.941722612981242e-06, "loss": 0.7846, "step": 2147 }, { "epoch": 0.3862267373909916, "grad_norm": 1.109684705734253, "learning_rate": 9.941633914952391e-06, "loss": 1.079, "step": 2148 }, { "epoch": 0.38640654499685334, "grad_norm": 1.598067045211792, "learning_rate": 9.941545149872052e-06, "loss": 0.8644, "step": 2149 }, { "epoch": 0.3865863526027151, "grad_norm": 1.6142845153808594, "learning_rate": 9.941456317741428e-06, "loss": 0.8777, "step": 2150 }, { "epoch": 0.38676616020857685, "grad_norm": 1.5571753978729248, "learning_rate": 9.941367418561725e-06, "loss": 0.834, "step": 2151 }, { "epoch": 0.3869459678144386, "grad_norm": 1.6399874687194824, "learning_rate": 9.941278452334151e-06, "loss": 0.8084, "step": 2152 }, { "epoch": 0.3871257754203003, "grad_norm": 1.531429409980774, "learning_rate": 9.941189419059912e-06, "loss": 0.8195, "step": 2153 }, { "epoch": 0.387305583026162, "grad_norm": 1.4881808757781982, "learning_rate": 9.941100318740216e-06, "loss": 0.7964, "step": 2154 }, { "epoch": 0.38748539063202375, "grad_norm": 1.522531270980835, "learning_rate": 9.941011151376272e-06, "loss": 0.8208, "step": 2155 }, { "epoch": 0.3876651982378855, "grad_norm": 1.7456330060958862, "learning_rate": 9.940921916969289e-06, "loss": 0.9036, "step": 2156 }, { "epoch": 0.3878450058437472, "grad_norm": 1.487278938293457, "learning_rate": 9.94083261552048e-06, "loss": 0.8507, "step": 2157 }, { "epoch": 0.38802481344960893, "grad_norm": 1.5421242713928223, "learning_rate": 9.940743247031054e-06, "loss": 0.917, "step": 2158 }, { "epoch": 0.38820462105547066, "grad_norm": 1.5634161233901978, "learning_rate": 9.940653811502229e-06, "loss": 0.8776, "step": 2159 }, { "epoch": 0.3883844286613324, "grad_norm": 1.4566210508346558, "learning_rate": 9.94056430893521e-06, "loss": 0.8055, "step": 2160 }, { "epoch": 0.3885642362671941, "grad_norm": 1.2039788961410522, "learning_rate": 9.940474739331219e-06, "loss": 1.1453, "step": 2161 }, { "epoch": 0.38874404387305583, "grad_norm": 1.5578759908676147, "learning_rate": 9.940385102691467e-06, "loss": 0.8873, "step": 2162 }, { "epoch": 0.38892385147891756, "grad_norm": 1.4862266778945923, "learning_rate": 9.94029539901717e-06, "loss": 0.8167, "step": 2163 }, { "epoch": 0.3891036590847793, "grad_norm": 1.7700427770614624, "learning_rate": 9.940205628309549e-06, "loss": 0.8909, "step": 2164 }, { "epoch": 0.389283466690641, "grad_norm": 1.1383737325668335, "learning_rate": 9.94011579056982e-06, "loss": 1.0941, "step": 2165 }, { "epoch": 0.38946327429650274, "grad_norm": 1.0666733980178833, "learning_rate": 9.940025885799202e-06, "loss": 1.1355, "step": 2166 }, { "epoch": 0.38964308190236446, "grad_norm": 1.4669477939605713, "learning_rate": 9.939935913998913e-06, "loss": 0.7395, "step": 2167 }, { "epoch": 0.3898228895082262, "grad_norm": 1.5855697393417358, "learning_rate": 9.939845875170178e-06, "loss": 0.9353, "step": 2168 }, { "epoch": 0.3900026971140879, "grad_norm": 1.0437335968017578, "learning_rate": 9.939755769314215e-06, "loss": 1.1243, "step": 2169 }, { "epoch": 0.39018250471994964, "grad_norm": 1.8066303730010986, "learning_rate": 9.939665596432246e-06, "loss": 0.8252, "step": 2170 }, { "epoch": 0.39036231232581137, "grad_norm": 1.6130337715148926, "learning_rate": 9.939575356525499e-06, "loss": 0.8944, "step": 2171 }, { "epoch": 0.3905421199316731, "grad_norm": 1.491932988166809, "learning_rate": 9.939485049595195e-06, "loss": 0.8842, "step": 2172 }, { "epoch": 0.3907219275375348, "grad_norm": 1.1576576232910156, "learning_rate": 9.93939467564256e-06, "loss": 1.0697, "step": 2173 }, { "epoch": 0.39090173514339654, "grad_norm": 1.71902334690094, "learning_rate": 9.93930423466882e-06, "loss": 0.8009, "step": 2174 }, { "epoch": 0.39108154274925827, "grad_norm": 1.6416043043136597, "learning_rate": 9.939213726675204e-06, "loss": 0.8188, "step": 2175 }, { "epoch": 0.39126135035512, "grad_norm": 1.6064527034759521, "learning_rate": 9.939123151662935e-06, "loss": 0.8757, "step": 2176 }, { "epoch": 0.3914411579609817, "grad_norm": 1.51374351978302, "learning_rate": 9.939032509633248e-06, "loss": 0.8338, "step": 2177 }, { "epoch": 0.3916209655668435, "grad_norm": 1.0713926553726196, "learning_rate": 9.938941800587372e-06, "loss": 1.1043, "step": 2178 }, { "epoch": 0.39180077317270523, "grad_norm": 1.4182714223861694, "learning_rate": 9.938851024526535e-06, "loss": 0.8374, "step": 2179 }, { "epoch": 0.39198058077856696, "grad_norm": 1.7102664709091187, "learning_rate": 9.93876018145197e-06, "loss": 0.8422, "step": 2180 }, { "epoch": 0.3921603883844287, "grad_norm": 1.4037948846817017, "learning_rate": 9.93866927136491e-06, "loss": 0.7832, "step": 2181 }, { "epoch": 0.3923401959902904, "grad_norm": 1.061100721359253, "learning_rate": 9.938578294266588e-06, "loss": 1.0765, "step": 2182 }, { "epoch": 0.39252000359615213, "grad_norm": 1.5677928924560547, "learning_rate": 9.93848725015824e-06, "loss": 0.863, "step": 2183 }, { "epoch": 0.39269981120201386, "grad_norm": 1.5152058601379395, "learning_rate": 9.938396139041097e-06, "loss": 0.8796, "step": 2184 }, { "epoch": 0.3928796188078756, "grad_norm": 1.502644658088684, "learning_rate": 9.9383049609164e-06, "loss": 0.808, "step": 2185 }, { "epoch": 0.3930594264137373, "grad_norm": 1.4999289512634277, "learning_rate": 9.938213715785385e-06, "loss": 0.8777, "step": 2186 }, { "epoch": 0.39323923401959904, "grad_norm": 1.6211262941360474, "learning_rate": 9.938122403649288e-06, "loss": 0.8353, "step": 2187 }, { "epoch": 0.39341904162546076, "grad_norm": 1.5238052606582642, "learning_rate": 9.938031024509349e-06, "loss": 0.858, "step": 2188 }, { "epoch": 0.3935988492313225, "grad_norm": 1.5138319730758667, "learning_rate": 9.93793957836681e-06, "loss": 0.8775, "step": 2189 }, { "epoch": 0.3937786568371842, "grad_norm": 1.6130050420761108, "learning_rate": 9.93784806522291e-06, "loss": 0.8874, "step": 2190 }, { "epoch": 0.39395846444304594, "grad_norm": 1.633756160736084, "learning_rate": 9.93775648507889e-06, "loss": 0.8459, "step": 2191 }, { "epoch": 0.39413827204890767, "grad_norm": 1.4950379133224487, "learning_rate": 9.937664837935996e-06, "loss": 0.8339, "step": 2192 }, { "epoch": 0.3943180796547694, "grad_norm": 1.5838327407836914, "learning_rate": 9.937573123795467e-06, "loss": 0.8279, "step": 2193 }, { "epoch": 0.3944978872606311, "grad_norm": 1.6434606313705444, "learning_rate": 9.937481342658548e-06, "loss": 0.8108, "step": 2194 }, { "epoch": 0.39467769486649285, "grad_norm": 1.4524385929107666, "learning_rate": 9.937389494526489e-06, "loss": 0.7839, "step": 2195 }, { "epoch": 0.39485750247235457, "grad_norm": 1.6163129806518555, "learning_rate": 9.937297579400532e-06, "loss": 0.8667, "step": 2196 }, { "epoch": 0.3950373100782163, "grad_norm": 1.578473687171936, "learning_rate": 9.937205597281924e-06, "loss": 0.8103, "step": 2197 }, { "epoch": 0.395217117684078, "grad_norm": 1.5353448390960693, "learning_rate": 9.937113548171914e-06, "loss": 0.8048, "step": 2198 }, { "epoch": 0.39539692528993975, "grad_norm": 1.5131598711013794, "learning_rate": 9.937021432071754e-06, "loss": 0.7758, "step": 2199 }, { "epoch": 0.3955767328958015, "grad_norm": 1.3443036079406738, "learning_rate": 9.93692924898269e-06, "loss": 1.0823, "step": 2200 }, { "epoch": 0.3957565405016632, "grad_norm": 1.3873341083526611, "learning_rate": 9.936836998905971e-06, "loss": 0.7968, "step": 2201 }, { "epoch": 0.3959363481075249, "grad_norm": 1.7983591556549072, "learning_rate": 9.936744681842855e-06, "loss": 0.7931, "step": 2202 }, { "epoch": 0.39611615571338665, "grad_norm": 1.5064513683319092, "learning_rate": 9.936652297794592e-06, "loss": 0.8349, "step": 2203 }, { "epoch": 0.3962959633192484, "grad_norm": 1.8519337177276611, "learning_rate": 9.936559846762434e-06, "loss": 0.8924, "step": 2204 }, { "epoch": 0.3964757709251101, "grad_norm": 1.5667091608047485, "learning_rate": 9.936467328747636e-06, "loss": 0.842, "step": 2205 }, { "epoch": 0.3966555785309719, "grad_norm": 1.1396687030792236, "learning_rate": 9.936374743751453e-06, "loss": 1.1082, "step": 2206 }, { "epoch": 0.3968353861368336, "grad_norm": 1.5464400053024292, "learning_rate": 9.936282091775143e-06, "loss": 0.8391, "step": 2207 }, { "epoch": 0.39701519374269534, "grad_norm": 1.1743454933166504, "learning_rate": 9.936189372819962e-06, "loss": 1.0808, "step": 2208 }, { "epoch": 0.39719500134855706, "grad_norm": 1.5534696578979492, "learning_rate": 9.936096586887168e-06, "loss": 0.8813, "step": 2209 }, { "epoch": 0.3973748089544188, "grad_norm": 1.0235427618026733, "learning_rate": 9.936003733978019e-06, "loss": 1.1285, "step": 2210 }, { "epoch": 0.3975546165602805, "grad_norm": 0.9776834845542908, "learning_rate": 9.935910814093777e-06, "loss": 1.1138, "step": 2211 }, { "epoch": 0.39773442416614224, "grad_norm": 1.5950813293457031, "learning_rate": 9.935817827235702e-06, "loss": 0.9217, "step": 2212 }, { "epoch": 0.39791423177200397, "grad_norm": 1.5449674129486084, "learning_rate": 9.935724773405055e-06, "loss": 0.8673, "step": 2213 }, { "epoch": 0.3980940393778657, "grad_norm": 1.643388271331787, "learning_rate": 9.9356316526031e-06, "loss": 0.8726, "step": 2214 }, { "epoch": 0.3982738469837274, "grad_norm": 1.5651741027832031, "learning_rate": 9.935538464831101e-06, "loss": 0.8669, "step": 2215 }, { "epoch": 0.39845365458958915, "grad_norm": 1.6013493537902832, "learning_rate": 9.935445210090318e-06, "loss": 0.8249, "step": 2216 }, { "epoch": 0.39863346219545087, "grad_norm": 1.4315968751907349, "learning_rate": 9.935351888382022e-06, "loss": 0.7831, "step": 2217 }, { "epoch": 0.3988132698013126, "grad_norm": 1.6285966634750366, "learning_rate": 9.935258499707475e-06, "loss": 0.8761, "step": 2218 }, { "epoch": 0.3989930774071743, "grad_norm": 1.6541240215301514, "learning_rate": 9.935165044067946e-06, "loss": 0.8831, "step": 2219 }, { "epoch": 0.39917288501303605, "grad_norm": 1.562021255493164, "learning_rate": 9.935071521464704e-06, "loss": 0.7974, "step": 2220 }, { "epoch": 0.3993526926188978, "grad_norm": 1.487135410308838, "learning_rate": 9.934977931899016e-06, "loss": 0.8549, "step": 2221 }, { "epoch": 0.3995325002247595, "grad_norm": 1.4710863828659058, "learning_rate": 9.934884275372153e-06, "loss": 0.7732, "step": 2222 }, { "epoch": 0.3997123078306212, "grad_norm": 1.5119538307189941, "learning_rate": 9.934790551885385e-06, "loss": 0.8477, "step": 2223 }, { "epoch": 0.39989211543648295, "grad_norm": 1.4288055896759033, "learning_rate": 9.934696761439986e-06, "loss": 0.8278, "step": 2224 }, { "epoch": 0.4000719230423447, "grad_norm": 1.4791392087936401, "learning_rate": 9.934602904037226e-06, "loss": 0.8446, "step": 2225 }, { "epoch": 0.4002517306482064, "grad_norm": 1.5313385725021362, "learning_rate": 9.93450897967838e-06, "loss": 0.8103, "step": 2226 }, { "epoch": 0.40043153825406813, "grad_norm": 1.555843710899353, "learning_rate": 9.934414988364722e-06, "loss": 0.782, "step": 2227 }, { "epoch": 0.40061134585992986, "grad_norm": 1.618667483329773, "learning_rate": 9.934320930097527e-06, "loss": 0.8592, "step": 2228 }, { "epoch": 0.4007911534657916, "grad_norm": 1.4850162267684937, "learning_rate": 9.93422680487807e-06, "loss": 0.7704, "step": 2229 }, { "epoch": 0.4009709610716533, "grad_norm": 1.4843984842300415, "learning_rate": 9.934132612707631e-06, "loss": 0.7905, "step": 2230 }, { "epoch": 0.40115076867751503, "grad_norm": 1.6733379364013672, "learning_rate": 9.934038353587487e-06, "loss": 0.7862, "step": 2231 }, { "epoch": 0.40133057628337676, "grad_norm": 1.5466852188110352, "learning_rate": 9.933944027518917e-06, "loss": 0.8019, "step": 2232 }, { "epoch": 0.40151038388923854, "grad_norm": 1.4722044467926025, "learning_rate": 9.9338496345032e-06, "loss": 0.8723, "step": 2233 }, { "epoch": 0.40169019149510027, "grad_norm": 2.401890277862549, "learning_rate": 9.933755174541616e-06, "loss": 0.8223, "step": 2234 }, { "epoch": 0.401869999100962, "grad_norm": 1.6133091449737549, "learning_rate": 9.93366064763545e-06, "loss": 0.855, "step": 2235 }, { "epoch": 0.4020498067068237, "grad_norm": 1.595605731010437, "learning_rate": 9.933566053785982e-06, "loss": 0.8423, "step": 2236 }, { "epoch": 0.40222961431268545, "grad_norm": 1.5710434913635254, "learning_rate": 9.933471392994497e-06, "loss": 0.8688, "step": 2237 }, { "epoch": 0.40240942191854717, "grad_norm": 1.652132511138916, "learning_rate": 9.933376665262275e-06, "loss": 0.8899, "step": 2238 }, { "epoch": 0.4025892295244089, "grad_norm": 1.4768177270889282, "learning_rate": 9.933281870590609e-06, "loss": 0.7698, "step": 2239 }, { "epoch": 0.4027690371302706, "grad_norm": 1.6388800144195557, "learning_rate": 9.93318700898078e-06, "loss": 0.8878, "step": 2240 }, { "epoch": 0.40294884473613235, "grad_norm": 1.4648034572601318, "learning_rate": 9.933092080434075e-06, "loss": 0.8383, "step": 2241 }, { "epoch": 0.4031286523419941, "grad_norm": 1.5074117183685303, "learning_rate": 9.932997084951785e-06, "loss": 0.8261, "step": 2242 }, { "epoch": 0.4033084599478558, "grad_norm": 1.2257177829742432, "learning_rate": 9.932902022535196e-06, "loss": 1.0244, "step": 2243 }, { "epoch": 0.40348826755371753, "grad_norm": 1.4467891454696655, "learning_rate": 9.9328068931856e-06, "loss": 0.901, "step": 2244 }, { "epoch": 0.40366807515957925, "grad_norm": 1.223552942276001, "learning_rate": 9.932711696904286e-06, "loss": 1.1076, "step": 2245 }, { "epoch": 0.403847882765441, "grad_norm": 1.522331714630127, "learning_rate": 9.932616433692549e-06, "loss": 0.805, "step": 2246 }, { "epoch": 0.4040276903713027, "grad_norm": 1.1258546113967896, "learning_rate": 9.932521103551676e-06, "loss": 1.079, "step": 2247 }, { "epoch": 0.40420749797716443, "grad_norm": 1.4987846612930298, "learning_rate": 9.932425706482966e-06, "loss": 0.9084, "step": 2248 }, { "epoch": 0.40438730558302616, "grad_norm": 1.7786221504211426, "learning_rate": 9.932330242487711e-06, "loss": 0.8357, "step": 2249 }, { "epoch": 0.4045671131888879, "grad_norm": 1.6125704050064087, "learning_rate": 9.932234711567206e-06, "loss": 0.9084, "step": 2250 }, { "epoch": 0.4047469207947496, "grad_norm": 1.740449070930481, "learning_rate": 9.932139113722748e-06, "loss": 0.8514, "step": 2251 }, { "epoch": 0.40492672840061134, "grad_norm": 1.4948573112487793, "learning_rate": 9.932043448955634e-06, "loss": 0.8313, "step": 2252 }, { "epoch": 0.40510653600647306, "grad_norm": 1.571162462234497, "learning_rate": 9.93194771726716e-06, "loss": 0.891, "step": 2253 }, { "epoch": 0.4052863436123348, "grad_norm": 1.5573312044143677, "learning_rate": 9.93185191865863e-06, "loss": 0.8605, "step": 2254 }, { "epoch": 0.4054661512181965, "grad_norm": 1.533764123916626, "learning_rate": 9.93175605313134e-06, "loss": 0.8772, "step": 2255 }, { "epoch": 0.40564595882405824, "grad_norm": 1.6668049097061157, "learning_rate": 9.93166012068659e-06, "loss": 0.8951, "step": 2256 }, { "epoch": 0.40582576642991997, "grad_norm": 1.6752601861953735, "learning_rate": 9.931564121325684e-06, "loss": 0.8226, "step": 2257 }, { "epoch": 0.4060055740357817, "grad_norm": 1.6200512647628784, "learning_rate": 9.931468055049924e-06, "loss": 0.7699, "step": 2258 }, { "epoch": 0.4061853816416434, "grad_norm": 1.4934728145599365, "learning_rate": 9.931371921860614e-06, "loss": 0.8796, "step": 2259 }, { "epoch": 0.40636518924750514, "grad_norm": 1.4645984172821045, "learning_rate": 9.931275721759055e-06, "loss": 0.8222, "step": 2260 }, { "epoch": 0.4065449968533669, "grad_norm": 1.5430140495300293, "learning_rate": 9.931179454746556e-06, "loss": 0.7464, "step": 2261 }, { "epoch": 0.40672480445922865, "grad_norm": 1.2299302816390991, "learning_rate": 9.931083120824423e-06, "loss": 1.1215, "step": 2262 }, { "epoch": 0.4069046120650904, "grad_norm": 1.4847543239593506, "learning_rate": 9.930986719993962e-06, "loss": 0.8381, "step": 2263 }, { "epoch": 0.4070844196709521, "grad_norm": 1.0969451665878296, "learning_rate": 9.930890252256482e-06, "loss": 1.0922, "step": 2264 }, { "epoch": 0.40726422727681383, "grad_norm": 1.4499033689498901, "learning_rate": 9.930793717613291e-06, "loss": 0.7933, "step": 2265 }, { "epoch": 0.40744403488267555, "grad_norm": 1.8093976974487305, "learning_rate": 9.930697116065699e-06, "loss": 0.8682, "step": 2266 }, { "epoch": 0.4076238424885373, "grad_norm": 1.639225959777832, "learning_rate": 9.930600447615016e-06, "loss": 0.8564, "step": 2267 }, { "epoch": 0.407803650094399, "grad_norm": 1.5524381399154663, "learning_rate": 9.930503712262556e-06, "loss": 0.8203, "step": 2268 }, { "epoch": 0.40798345770026073, "grad_norm": 1.0694783926010132, "learning_rate": 9.930406910009629e-06, "loss": 1.1285, "step": 2269 }, { "epoch": 0.40816326530612246, "grad_norm": 1.5401698350906372, "learning_rate": 9.930310040857548e-06, "loss": 0.7928, "step": 2270 }, { "epoch": 0.4083430729119842, "grad_norm": 1.5488026142120361, "learning_rate": 9.930213104807633e-06, "loss": 0.8469, "step": 2271 }, { "epoch": 0.4085228805178459, "grad_norm": 1.7189080715179443, "learning_rate": 9.930116101861194e-06, "loss": 0.7887, "step": 2272 }, { "epoch": 0.40870268812370764, "grad_norm": 1.1203655004501343, "learning_rate": 9.930019032019546e-06, "loss": 1.1196, "step": 2273 }, { "epoch": 0.40888249572956936, "grad_norm": 1.5545257329940796, "learning_rate": 9.929921895284012e-06, "loss": 0.8642, "step": 2274 }, { "epoch": 0.4090623033354311, "grad_norm": 1.5077409744262695, "learning_rate": 9.929824691655903e-06, "loss": 0.8775, "step": 2275 }, { "epoch": 0.4092421109412928, "grad_norm": 1.4691096544265747, "learning_rate": 9.929727421136544e-06, "loss": 0.8353, "step": 2276 }, { "epoch": 0.40942191854715454, "grad_norm": 1.523252248764038, "learning_rate": 9.929630083727253e-06, "loss": 0.832, "step": 2277 }, { "epoch": 0.40960172615301627, "grad_norm": 1.411112666130066, "learning_rate": 9.929532679429348e-06, "loss": 0.8086, "step": 2278 }, { "epoch": 0.409781533758878, "grad_norm": 1.603169322013855, "learning_rate": 9.929435208244154e-06, "loss": 0.8514, "step": 2279 }, { "epoch": 0.4099613413647397, "grad_norm": 1.1224960088729858, "learning_rate": 9.92933767017299e-06, "loss": 1.0861, "step": 2280 }, { "epoch": 0.41014114897060144, "grad_norm": 1.594210147857666, "learning_rate": 9.929240065217186e-06, "loss": 0.8845, "step": 2281 }, { "epoch": 0.41032095657646317, "grad_norm": 1.6312865018844604, "learning_rate": 9.92914239337806e-06, "loss": 0.858, "step": 2282 }, { "epoch": 0.4105007641823249, "grad_norm": 1.5571554899215698, "learning_rate": 9.929044654656938e-06, "loss": 0.8342, "step": 2283 }, { "epoch": 0.4106805717881866, "grad_norm": 1.557047963142395, "learning_rate": 9.92894684905515e-06, "loss": 0.85, "step": 2284 }, { "epoch": 0.41086037939404835, "grad_norm": 1.5525059700012207, "learning_rate": 9.92884897657402e-06, "loss": 0.7782, "step": 2285 }, { "epoch": 0.4110401869999101, "grad_norm": 1.6405423879623413, "learning_rate": 9.928751037214877e-06, "loss": 0.9097, "step": 2286 }, { "epoch": 0.4112199946057718, "grad_norm": 1.764968991279602, "learning_rate": 9.928653030979048e-06, "loss": 0.9208, "step": 2287 }, { "epoch": 0.4113998022116335, "grad_norm": 1.6586703062057495, "learning_rate": 9.928554957867865e-06, "loss": 0.8633, "step": 2288 }, { "epoch": 0.4115796098174953, "grad_norm": 1.536964774131775, "learning_rate": 9.928456817882659e-06, "loss": 0.8666, "step": 2289 }, { "epoch": 0.41175941742335703, "grad_norm": 1.5315396785736084, "learning_rate": 9.92835861102476e-06, "loss": 0.8144, "step": 2290 }, { "epoch": 0.41193922502921876, "grad_norm": 1.5583208799362183, "learning_rate": 9.928260337295503e-06, "loss": 0.8586, "step": 2291 }, { "epoch": 0.4121190326350805, "grad_norm": 1.5119744539260864, "learning_rate": 9.928161996696218e-06, "loss": 0.902, "step": 2292 }, { "epoch": 0.4122988402409422, "grad_norm": 1.5109939575195312, "learning_rate": 9.92806358922824e-06, "loss": 0.8279, "step": 2293 }, { "epoch": 0.41247864784680394, "grad_norm": 1.5570265054702759, "learning_rate": 9.927965114892907e-06, "loss": 0.8617, "step": 2294 }, { "epoch": 0.41265845545266566, "grad_norm": 1.5764092206954956, "learning_rate": 9.927866573691555e-06, "loss": 0.8206, "step": 2295 }, { "epoch": 0.4128382630585274, "grad_norm": 1.5340701341629028, "learning_rate": 9.927767965625518e-06, "loss": 0.8554, "step": 2296 }, { "epoch": 0.4130180706643891, "grad_norm": 1.533673882484436, "learning_rate": 9.927669290696136e-06, "loss": 0.8815, "step": 2297 }, { "epoch": 0.41319787827025084, "grad_norm": 1.5027426481246948, "learning_rate": 9.927570548904749e-06, "loss": 0.8064, "step": 2298 }, { "epoch": 0.41337768587611257, "grad_norm": 1.6161518096923828, "learning_rate": 9.927471740252693e-06, "loss": 0.8909, "step": 2299 }, { "epoch": 0.4135574934819743, "grad_norm": 1.5026532411575317, "learning_rate": 9.92737286474131e-06, "loss": 0.8857, "step": 2300 }, { "epoch": 0.413737301087836, "grad_norm": 1.5695058107376099, "learning_rate": 9.927273922371946e-06, "loss": 0.9, "step": 2301 }, { "epoch": 0.41391710869369774, "grad_norm": 1.6268095970153809, "learning_rate": 9.927174913145937e-06, "loss": 0.8725, "step": 2302 }, { "epoch": 0.41409691629955947, "grad_norm": 1.5062994956970215, "learning_rate": 9.92707583706463e-06, "loss": 0.7994, "step": 2303 }, { "epoch": 0.4142767239054212, "grad_norm": 2.9973368644714355, "learning_rate": 9.926976694129371e-06, "loss": 0.7794, "step": 2304 }, { "epoch": 0.4144565315112829, "grad_norm": 1.1526200771331787, "learning_rate": 9.926877484341501e-06, "loss": 1.1329, "step": 2305 }, { "epoch": 0.41463633911714465, "grad_norm": 1.0988061428070068, "learning_rate": 9.92677820770237e-06, "loss": 1.096, "step": 2306 }, { "epoch": 0.4148161467230064, "grad_norm": 1.5460275411605835, "learning_rate": 9.926678864213322e-06, "loss": 0.9054, "step": 2307 }, { "epoch": 0.4149959543288681, "grad_norm": 1.5759869813919067, "learning_rate": 9.926579453875707e-06, "loss": 0.7903, "step": 2308 }, { "epoch": 0.4151757619347298, "grad_norm": 1.6414827108383179, "learning_rate": 9.926479976690872e-06, "loss": 0.8046, "step": 2309 }, { "epoch": 0.41535556954059155, "grad_norm": 1.1085355281829834, "learning_rate": 9.92638043266017e-06, "loss": 1.0262, "step": 2310 }, { "epoch": 0.4155353771464533, "grad_norm": 1.4575541019439697, "learning_rate": 9.926280821784949e-06, "loss": 0.8811, "step": 2311 }, { "epoch": 0.415715184752315, "grad_norm": 1.1459965705871582, "learning_rate": 9.92618114406656e-06, "loss": 1.0684, "step": 2312 }, { "epoch": 0.41589499235817673, "grad_norm": 1.5187686681747437, "learning_rate": 9.926081399506357e-06, "loss": 0.7657, "step": 2313 }, { "epoch": 0.41607479996403846, "grad_norm": 1.582604169845581, "learning_rate": 9.925981588105695e-06, "loss": 0.8588, "step": 2314 }, { "epoch": 0.4162546075699002, "grad_norm": 1.4889878034591675, "learning_rate": 9.925881709865925e-06, "loss": 0.8209, "step": 2315 }, { "epoch": 0.4164344151757619, "grad_norm": 1.0877041816711426, "learning_rate": 9.925781764788403e-06, "loss": 1.0704, "step": 2316 }, { "epoch": 0.4166142227816237, "grad_norm": 1.078351616859436, "learning_rate": 9.925681752874485e-06, "loss": 1.0908, "step": 2317 }, { "epoch": 0.4167940303874854, "grad_norm": 1.5661910772323608, "learning_rate": 9.92558167412553e-06, "loss": 0.8267, "step": 2318 }, { "epoch": 0.41697383799334714, "grad_norm": 1.550657033920288, "learning_rate": 9.925481528542896e-06, "loss": 0.8972, "step": 2319 }, { "epoch": 0.41715364559920887, "grad_norm": 1.5290125608444214, "learning_rate": 9.92538131612794e-06, "loss": 0.7855, "step": 2320 }, { "epoch": 0.4173334532050706, "grad_norm": 1.5387858152389526, "learning_rate": 9.925281036882021e-06, "loss": 0.9045, "step": 2321 }, { "epoch": 0.4175132608109323, "grad_norm": 1.649853229522705, "learning_rate": 9.925180690806502e-06, "loss": 0.8071, "step": 2322 }, { "epoch": 0.41769306841679404, "grad_norm": 1.0239498615264893, "learning_rate": 9.925080277902743e-06, "loss": 1.1193, "step": 2323 }, { "epoch": 0.41787287602265577, "grad_norm": 1.5653413534164429, "learning_rate": 9.924979798172107e-06, "loss": 0.8953, "step": 2324 }, { "epoch": 0.4180526836285175, "grad_norm": 1.044548749923706, "learning_rate": 9.924879251615958e-06, "loss": 1.0444, "step": 2325 }, { "epoch": 0.4182324912343792, "grad_norm": 1.4505752325057983, "learning_rate": 9.92477863823566e-06, "loss": 0.8636, "step": 2326 }, { "epoch": 0.41841229884024095, "grad_norm": 1.4396181106567383, "learning_rate": 9.924677958032575e-06, "loss": 0.8018, "step": 2327 }, { "epoch": 0.4185921064461027, "grad_norm": 1.4475315809249878, "learning_rate": 9.924577211008076e-06, "loss": 0.8551, "step": 2328 }, { "epoch": 0.4187719140519644, "grad_norm": 1.512417197227478, "learning_rate": 9.924476397163523e-06, "loss": 0.8335, "step": 2329 }, { "epoch": 0.4189517216578261, "grad_norm": 1.4682302474975586, "learning_rate": 9.924375516500289e-06, "loss": 0.8041, "step": 2330 }, { "epoch": 0.41913152926368785, "grad_norm": 1.5155363082885742, "learning_rate": 9.924274569019739e-06, "loss": 0.8773, "step": 2331 }, { "epoch": 0.4193113368695496, "grad_norm": 1.2619487047195435, "learning_rate": 9.924173554723244e-06, "loss": 1.0902, "step": 2332 }, { "epoch": 0.4194911444754113, "grad_norm": 1.5555179119110107, "learning_rate": 9.924072473612176e-06, "loss": 0.8124, "step": 2333 }, { "epoch": 0.41967095208127303, "grad_norm": 1.1937893629074097, "learning_rate": 9.923971325687906e-06, "loss": 1.1079, "step": 2334 }, { "epoch": 0.41985075968713476, "grad_norm": 1.6484133005142212, "learning_rate": 9.923870110951805e-06, "loss": 0.9335, "step": 2335 }, { "epoch": 0.4200305672929965, "grad_norm": 1.4525606632232666, "learning_rate": 9.923768829405249e-06, "loss": 0.8527, "step": 2336 }, { "epoch": 0.4202103748988582, "grad_norm": 1.528429627418518, "learning_rate": 9.92366748104961e-06, "loss": 0.7982, "step": 2337 }, { "epoch": 0.42039018250471993, "grad_norm": 1.0595214366912842, "learning_rate": 9.923566065886263e-06, "loss": 1.0424, "step": 2338 }, { "epoch": 0.42056999011058166, "grad_norm": 1.608428716659546, "learning_rate": 9.923464583916586e-06, "loss": 0.8979, "step": 2339 }, { "epoch": 0.4207497977164434, "grad_norm": 1.0833481550216675, "learning_rate": 9.923363035141953e-06, "loss": 1.0699, "step": 2340 }, { "epoch": 0.4209296053223051, "grad_norm": 1.589219570159912, "learning_rate": 9.923261419563746e-06, "loss": 0.8476, "step": 2341 }, { "epoch": 0.42110941292816684, "grad_norm": 1.5953439474105835, "learning_rate": 9.923159737183341e-06, "loss": 0.8114, "step": 2342 }, { "epoch": 0.42128922053402856, "grad_norm": 1.0695112943649292, "learning_rate": 9.923057988002117e-06, "loss": 1.1063, "step": 2343 }, { "epoch": 0.42146902813989034, "grad_norm": 1.4897994995117188, "learning_rate": 9.922956172021456e-06, "loss": 0.8283, "step": 2344 }, { "epoch": 0.42164883574575207, "grad_norm": 1.0627015829086304, "learning_rate": 9.922854289242741e-06, "loss": 1.0504, "step": 2345 }, { "epoch": 0.4218286433516138, "grad_norm": 1.5226960182189941, "learning_rate": 9.92275233966735e-06, "loss": 0.8628, "step": 2346 }, { "epoch": 0.4220084509574755, "grad_norm": 1.515489101409912, "learning_rate": 9.922650323296673e-06, "loss": 0.8945, "step": 2347 }, { "epoch": 0.42218825856333725, "grad_norm": 1.649574637413025, "learning_rate": 9.922548240132085e-06, "loss": 0.8641, "step": 2348 }, { "epoch": 0.422368066169199, "grad_norm": 1.4393917322158813, "learning_rate": 9.922446090174983e-06, "loss": 0.8364, "step": 2349 }, { "epoch": 0.4225478737750607, "grad_norm": 1.6105282306671143, "learning_rate": 9.92234387342674e-06, "loss": 0.8727, "step": 2350 }, { "epoch": 0.4227276813809224, "grad_norm": 1.0998605489730835, "learning_rate": 9.922241589888754e-06, "loss": 1.062, "step": 2351 }, { "epoch": 0.42290748898678415, "grad_norm": 1.7371755838394165, "learning_rate": 9.922139239562406e-06, "loss": 0.8377, "step": 2352 }, { "epoch": 0.4230872965926459, "grad_norm": 1.430688500404358, "learning_rate": 9.922036822449088e-06, "loss": 0.8504, "step": 2353 }, { "epoch": 0.4232671041985076, "grad_norm": 1.6810100078582764, "learning_rate": 9.921934338550187e-06, "loss": 0.8193, "step": 2354 }, { "epoch": 0.42344691180436933, "grad_norm": 1.5068222284317017, "learning_rate": 9.921831787867098e-06, "loss": 0.8988, "step": 2355 }, { "epoch": 0.42362671941023106, "grad_norm": 1.5643459558486938, "learning_rate": 9.921729170401209e-06, "loss": 0.8963, "step": 2356 }, { "epoch": 0.4238065270160928, "grad_norm": 1.1693477630615234, "learning_rate": 9.921626486153912e-06, "loss": 1.0701, "step": 2357 }, { "epoch": 0.4239863346219545, "grad_norm": 1.42444908618927, "learning_rate": 9.921523735126601e-06, "loss": 0.8102, "step": 2358 }, { "epoch": 0.42416614222781623, "grad_norm": 1.5701311826705933, "learning_rate": 9.921420917320672e-06, "loss": 0.8647, "step": 2359 }, { "epoch": 0.42434594983367796, "grad_norm": 1.457534909248352, "learning_rate": 9.921318032737519e-06, "loss": 0.7945, "step": 2360 }, { "epoch": 0.4245257574395397, "grad_norm": 1.454820156097412, "learning_rate": 9.921215081378536e-06, "loss": 0.813, "step": 2361 }, { "epoch": 0.4247055650454014, "grad_norm": 1.608148217201233, "learning_rate": 9.921112063245125e-06, "loss": 0.8435, "step": 2362 }, { "epoch": 0.42488537265126314, "grad_norm": 1.4875766038894653, "learning_rate": 9.921008978338677e-06, "loss": 0.847, "step": 2363 }, { "epoch": 0.42506518025712486, "grad_norm": 1.5478471517562866, "learning_rate": 9.920905826660596e-06, "loss": 0.844, "step": 2364 }, { "epoch": 0.4252449878629866, "grad_norm": 1.6568055152893066, "learning_rate": 9.92080260821228e-06, "loss": 0.8221, "step": 2365 }, { "epoch": 0.4254247954688483, "grad_norm": 1.163989543914795, "learning_rate": 9.920699322995127e-06, "loss": 1.096, "step": 2366 }, { "epoch": 0.42560460307471004, "grad_norm": 1.628710150718689, "learning_rate": 9.920595971010543e-06, "loss": 0.806, "step": 2367 }, { "epoch": 0.42578441068057177, "grad_norm": 1.038352608680725, "learning_rate": 9.920492552259928e-06, "loss": 1.0484, "step": 2368 }, { "epoch": 0.4259642182864335, "grad_norm": 2.1744678020477295, "learning_rate": 9.920389066744684e-06, "loss": 0.7987, "step": 2369 }, { "epoch": 0.4261440258922952, "grad_norm": 1.6715054512023926, "learning_rate": 9.920285514466217e-06, "loss": 0.8682, "step": 2370 }, { "epoch": 0.42632383349815695, "grad_norm": 1.5618643760681152, "learning_rate": 9.920181895425933e-06, "loss": 0.845, "step": 2371 }, { "epoch": 0.4265036411040187, "grad_norm": 1.5175185203552246, "learning_rate": 9.920078209625235e-06, "loss": 0.8981, "step": 2372 }, { "epoch": 0.42668344870988045, "grad_norm": 1.5245648622512817, "learning_rate": 9.919974457065533e-06, "loss": 0.7823, "step": 2373 }, { "epoch": 0.4268632563157422, "grad_norm": 1.521789789199829, "learning_rate": 9.919870637748232e-06, "loss": 0.8729, "step": 2374 }, { "epoch": 0.4270430639216039, "grad_norm": 1.469714641571045, "learning_rate": 9.919766751674744e-06, "loss": 0.7672, "step": 2375 }, { "epoch": 0.42722287152746563, "grad_norm": 1.5223188400268555, "learning_rate": 9.919662798846475e-06, "loss": 0.8281, "step": 2376 }, { "epoch": 0.42740267913332736, "grad_norm": 1.5437047481536865, "learning_rate": 9.919558779264837e-06, "loss": 0.8362, "step": 2377 }, { "epoch": 0.4275824867391891, "grad_norm": 1.5174371004104614, "learning_rate": 9.919454692931243e-06, "loss": 0.8368, "step": 2378 }, { "epoch": 0.4277622943450508, "grad_norm": 1.6762409210205078, "learning_rate": 9.919350539847101e-06, "loss": 0.8563, "step": 2379 }, { "epoch": 0.42794210195091253, "grad_norm": 1.4334219694137573, "learning_rate": 9.919246320013829e-06, "loss": 0.8394, "step": 2380 }, { "epoch": 0.42812190955677426, "grad_norm": 1.5514843463897705, "learning_rate": 9.919142033432839e-06, "loss": 0.8822, "step": 2381 }, { "epoch": 0.428301717162636, "grad_norm": 1.1513235569000244, "learning_rate": 9.919037680105546e-06, "loss": 1.0755, "step": 2382 }, { "epoch": 0.4284815247684977, "grad_norm": 1.13933527469635, "learning_rate": 9.918933260033366e-06, "loss": 1.0756, "step": 2383 }, { "epoch": 0.42866133237435944, "grad_norm": 1.7083842754364014, "learning_rate": 9.918828773217716e-06, "loss": 0.8324, "step": 2384 }, { "epoch": 0.42884113998022116, "grad_norm": 1.481421947479248, "learning_rate": 9.918724219660013e-06, "loss": 0.8009, "step": 2385 }, { "epoch": 0.4290209475860829, "grad_norm": 1.6778074502944946, "learning_rate": 9.918619599361678e-06, "loss": 0.8418, "step": 2386 }, { "epoch": 0.4292007551919446, "grad_norm": 1.069798469543457, "learning_rate": 9.918514912324129e-06, "loss": 1.071, "step": 2387 }, { "epoch": 0.42938056279780634, "grad_norm": 1.0260963439941406, "learning_rate": 9.918410158548786e-06, "loss": 1.1128, "step": 2388 }, { "epoch": 0.42956037040366807, "grad_norm": 1.4734383821487427, "learning_rate": 9.918305338037071e-06, "loss": 0.8195, "step": 2389 }, { "epoch": 0.4297401780095298, "grad_norm": 1.5069580078125, "learning_rate": 9.918200450790405e-06, "loss": 0.899, "step": 2390 }, { "epoch": 0.4299199856153915, "grad_norm": 1.4929250478744507, "learning_rate": 9.918095496810211e-06, "loss": 0.7854, "step": 2391 }, { "epoch": 0.43009979322125325, "grad_norm": 1.5509469509124756, "learning_rate": 9.917990476097917e-06, "loss": 0.8276, "step": 2392 }, { "epoch": 0.43027960082711497, "grad_norm": 1.4966496229171753, "learning_rate": 9.917885388654945e-06, "loss": 0.8498, "step": 2393 }, { "epoch": 0.4304594084329767, "grad_norm": 1.6335757970809937, "learning_rate": 9.91778023448272e-06, "loss": 0.8041, "step": 2394 }, { "epoch": 0.4306392160388384, "grad_norm": 1.442854642868042, "learning_rate": 9.917675013582671e-06, "loss": 0.8698, "step": 2395 }, { "epoch": 0.43081902364470015, "grad_norm": 1.5593807697296143, "learning_rate": 9.917569725956225e-06, "loss": 0.8302, "step": 2396 }, { "epoch": 0.4309988312505619, "grad_norm": 1.5142141580581665, "learning_rate": 9.917464371604809e-06, "loss": 0.8114, "step": 2397 }, { "epoch": 0.4311786388564236, "grad_norm": 1.557876706123352, "learning_rate": 9.917358950529854e-06, "loss": 0.8577, "step": 2398 }, { "epoch": 0.4313584464622853, "grad_norm": 1.5947502851486206, "learning_rate": 9.91725346273279e-06, "loss": 0.8347, "step": 2399 }, { "epoch": 0.4315382540681471, "grad_norm": 1.616645336151123, "learning_rate": 9.91714790821505e-06, "loss": 0.7903, "step": 2400 }, { "epoch": 0.43171806167400884, "grad_norm": 1.575147032737732, "learning_rate": 9.917042286978064e-06, "loss": 0.8283, "step": 2401 }, { "epoch": 0.43189786927987056, "grad_norm": 1.677069902420044, "learning_rate": 9.916936599023266e-06, "loss": 0.8253, "step": 2402 }, { "epoch": 0.4320776768857323, "grad_norm": 1.6707895994186401, "learning_rate": 9.91683084435209e-06, "loss": 0.8832, "step": 2403 }, { "epoch": 0.432257484491594, "grad_norm": 1.583690881729126, "learning_rate": 9.916725022965971e-06, "loss": 0.8628, "step": 2404 }, { "epoch": 0.43243729209745574, "grad_norm": 1.466468334197998, "learning_rate": 9.916619134866346e-06, "loss": 0.7689, "step": 2405 }, { "epoch": 0.43261709970331746, "grad_norm": 1.5791860818862915, "learning_rate": 9.91651318005465e-06, "loss": 0.8418, "step": 2406 }, { "epoch": 0.4327969073091792, "grad_norm": 1.5048342943191528, "learning_rate": 9.91640715853232e-06, "loss": 0.8577, "step": 2407 }, { "epoch": 0.4329767149150409, "grad_norm": 1.494005799293518, "learning_rate": 9.916301070300798e-06, "loss": 0.8245, "step": 2408 }, { "epoch": 0.43315652252090264, "grad_norm": 1.358862042427063, "learning_rate": 9.916194915361518e-06, "loss": 1.0721, "step": 2409 }, { "epoch": 0.43333633012676437, "grad_norm": 1.5218268632888794, "learning_rate": 9.916088693715927e-06, "loss": 0.8509, "step": 2410 }, { "epoch": 0.4335161377326261, "grad_norm": 1.5785698890686035, "learning_rate": 9.915982405365463e-06, "loss": 0.8049, "step": 2411 }, { "epoch": 0.4336959453384878, "grad_norm": 1.4197174310684204, "learning_rate": 9.915876050311565e-06, "loss": 0.875, "step": 2412 }, { "epoch": 0.43387575294434955, "grad_norm": 1.0686644315719604, "learning_rate": 9.915769628555682e-06, "loss": 1.094, "step": 2413 }, { "epoch": 0.4340555605502113, "grad_norm": 1.4375571012496948, "learning_rate": 9.915663140099256e-06, "loss": 0.7868, "step": 2414 }, { "epoch": 0.434235368156073, "grad_norm": 1.5514402389526367, "learning_rate": 9.91555658494373e-06, "loss": 0.8949, "step": 2415 }, { "epoch": 0.4344151757619347, "grad_norm": 1.5505059957504272, "learning_rate": 9.915449963090551e-06, "loss": 0.8605, "step": 2416 }, { "epoch": 0.43459498336779645, "grad_norm": 1.4317145347595215, "learning_rate": 9.915343274541165e-06, "loss": 0.7919, "step": 2417 }, { "epoch": 0.4347747909736582, "grad_norm": 1.1678762435913086, "learning_rate": 9.915236519297021e-06, "loss": 1.0931, "step": 2418 }, { "epoch": 0.4349545985795199, "grad_norm": 1.0904592275619507, "learning_rate": 9.915129697359566e-06, "loss": 1.088, "step": 2419 }, { "epoch": 0.43513440618538163, "grad_norm": 1.6123511791229248, "learning_rate": 9.915022808730252e-06, "loss": 0.8714, "step": 2420 }, { "epoch": 0.43531421379124335, "grad_norm": 1.4673606157302856, "learning_rate": 9.914915853410528e-06, "loss": 0.8085, "step": 2421 }, { "epoch": 0.4354940213971051, "grad_norm": 1.6150932312011719, "learning_rate": 9.914808831401842e-06, "loss": 0.7829, "step": 2422 }, { "epoch": 0.4356738290029668, "grad_norm": 1.573913812637329, "learning_rate": 9.914701742705652e-06, "loss": 0.8096, "step": 2423 }, { "epoch": 0.43585363660882853, "grad_norm": 1.564139723777771, "learning_rate": 9.914594587323408e-06, "loss": 0.8054, "step": 2424 }, { "epoch": 0.43603344421469026, "grad_norm": 1.527878761291504, "learning_rate": 9.914487365256562e-06, "loss": 0.8689, "step": 2425 }, { "epoch": 0.436213251820552, "grad_norm": 1.7436563968658447, "learning_rate": 9.914380076506572e-06, "loss": 0.8683, "step": 2426 }, { "epoch": 0.4363930594264137, "grad_norm": 1.4985802173614502, "learning_rate": 9.914272721074894e-06, "loss": 0.8613, "step": 2427 }, { "epoch": 0.4365728670322755, "grad_norm": 1.5824799537658691, "learning_rate": 9.91416529896298e-06, "loss": 0.8332, "step": 2428 }, { "epoch": 0.4367526746381372, "grad_norm": 1.4053213596343994, "learning_rate": 9.914057810172296e-06, "loss": 0.7545, "step": 2429 }, { "epoch": 0.43693248224399894, "grad_norm": 2.364774465560913, "learning_rate": 9.913950254704291e-06, "loss": 0.8515, "step": 2430 }, { "epoch": 0.43711228984986067, "grad_norm": 1.530521273612976, "learning_rate": 9.91384263256043e-06, "loss": 0.8505, "step": 2431 }, { "epoch": 0.4372920974557224, "grad_norm": 1.6768652200698853, "learning_rate": 9.913734943742173e-06, "loss": 1.1295, "step": 2432 }, { "epoch": 0.4374719050615841, "grad_norm": 1.4930325746536255, "learning_rate": 9.913627188250979e-06, "loss": 0.8147, "step": 2433 }, { "epoch": 0.43765171266744585, "grad_norm": 1.5284229516983032, "learning_rate": 9.913519366088312e-06, "loss": 0.827, "step": 2434 }, { "epoch": 0.4378315202733076, "grad_norm": 1.5437084436416626, "learning_rate": 9.913411477255634e-06, "loss": 0.8354, "step": 2435 }, { "epoch": 0.4380113278791693, "grad_norm": 3.009801149368286, "learning_rate": 9.91330352175441e-06, "loss": 0.8314, "step": 2436 }, { "epoch": 0.438191135485031, "grad_norm": 1.4444376230239868, "learning_rate": 9.913195499586105e-06, "loss": 0.8069, "step": 2437 }, { "epoch": 0.43837094309089275, "grad_norm": 1.564110517501831, "learning_rate": 9.913087410752183e-06, "loss": 0.8017, "step": 2438 }, { "epoch": 0.4385507506967545, "grad_norm": 1.6118375062942505, "learning_rate": 9.912979255254111e-06, "loss": 0.8413, "step": 2439 }, { "epoch": 0.4387305583026162, "grad_norm": 1.5061959028244019, "learning_rate": 9.912871033093356e-06, "loss": 0.8151, "step": 2440 }, { "epoch": 0.43891036590847793, "grad_norm": 1.440015435218811, "learning_rate": 9.91276274427139e-06, "loss": 0.824, "step": 2441 }, { "epoch": 0.43909017351433965, "grad_norm": 1.5082778930664062, "learning_rate": 9.912654388789678e-06, "loss": 0.82, "step": 2442 }, { "epoch": 0.4392699811202014, "grad_norm": 1.5205658674240112, "learning_rate": 9.912545966649693e-06, "loss": 0.784, "step": 2443 }, { "epoch": 0.4394497887260631, "grad_norm": 1.4534865617752075, "learning_rate": 9.912437477852905e-06, "loss": 0.804, "step": 2444 }, { "epoch": 0.43962959633192483, "grad_norm": 1.4785397052764893, "learning_rate": 9.912328922400785e-06, "loss": 0.8475, "step": 2445 }, { "epoch": 0.43980940393778656, "grad_norm": 1.0366032123565674, "learning_rate": 9.912220300294807e-06, "loss": 1.0778, "step": 2446 }, { "epoch": 0.4399892115436483, "grad_norm": 1.0261709690093994, "learning_rate": 9.912111611536447e-06, "loss": 1.0679, "step": 2447 }, { "epoch": 0.44016901914951, "grad_norm": 1.587607979774475, "learning_rate": 9.912002856127177e-06, "loss": 0.8286, "step": 2448 }, { "epoch": 0.44034882675537174, "grad_norm": 1.49055016040802, "learning_rate": 9.911894034068474e-06, "loss": 0.8314, "step": 2449 }, { "epoch": 0.44052863436123346, "grad_norm": 1.0660908222198486, "learning_rate": 9.911785145361814e-06, "loss": 1.0609, "step": 2450 }, { "epoch": 0.4407084419670952, "grad_norm": 1.5199803113937378, "learning_rate": 9.911676190008673e-06, "loss": 0.8023, "step": 2451 }, { "epoch": 0.4408882495729569, "grad_norm": 1.0748035907745361, "learning_rate": 9.911567168010532e-06, "loss": 1.1046, "step": 2452 }, { "epoch": 0.44106805717881864, "grad_norm": 1.5253825187683105, "learning_rate": 9.91145807936887e-06, "loss": 0.791, "step": 2453 }, { "epoch": 0.44124786478468037, "grad_norm": 1.0811787843704224, "learning_rate": 9.911348924085165e-06, "loss": 1.0784, "step": 2454 }, { "epoch": 0.44142767239054215, "grad_norm": 1.6659926176071167, "learning_rate": 9.9112397021609e-06, "loss": 0.9533, "step": 2455 }, { "epoch": 0.4416074799964039, "grad_norm": 1.591433048248291, "learning_rate": 9.911130413597556e-06, "loss": 0.8406, "step": 2456 }, { "epoch": 0.4417872876022656, "grad_norm": 1.5269899368286133, "learning_rate": 9.911021058396618e-06, "loss": 0.8453, "step": 2457 }, { "epoch": 0.4419670952081273, "grad_norm": 1.0827020406723022, "learning_rate": 9.910911636559567e-06, "loss": 1.0629, "step": 2458 }, { "epoch": 0.44214690281398905, "grad_norm": 2.814195156097412, "learning_rate": 9.910802148087887e-06, "loss": 0.8737, "step": 2459 }, { "epoch": 0.4423267104198508, "grad_norm": 1.4682694673538208, "learning_rate": 9.910692592983066e-06, "loss": 0.8574, "step": 2460 }, { "epoch": 0.4425065180257125, "grad_norm": 1.5051543712615967, "learning_rate": 9.910582971246592e-06, "loss": 0.8539, "step": 2461 }, { "epoch": 0.44268632563157423, "grad_norm": 1.553985357284546, "learning_rate": 9.91047328287995e-06, "loss": 0.8661, "step": 2462 }, { "epoch": 0.44286613323743595, "grad_norm": 1.495041847229004, "learning_rate": 9.910363527884627e-06, "loss": 0.7971, "step": 2463 }, { "epoch": 0.4430459408432977, "grad_norm": 1.649571180343628, "learning_rate": 9.910253706262116e-06, "loss": 0.8373, "step": 2464 }, { "epoch": 0.4432257484491594, "grad_norm": 1.5189071893692017, "learning_rate": 9.910143818013905e-06, "loss": 0.8725, "step": 2465 }, { "epoch": 0.44340555605502113, "grad_norm": 1.5505541563034058, "learning_rate": 9.910033863141485e-06, "loss": 0.8303, "step": 2466 }, { "epoch": 0.44358536366088286, "grad_norm": 1.5025222301483154, "learning_rate": 9.909923841646347e-06, "loss": 0.8513, "step": 2467 }, { "epoch": 0.4437651712667446, "grad_norm": 1.446141242980957, "learning_rate": 9.909813753529987e-06, "loss": 0.8072, "step": 2468 }, { "epoch": 0.4439449788726063, "grad_norm": 1.5343796014785767, "learning_rate": 9.909703598793895e-06, "loss": 0.8689, "step": 2469 }, { "epoch": 0.44412478647846804, "grad_norm": 1.4698457717895508, "learning_rate": 9.909593377439569e-06, "loss": 0.8255, "step": 2470 }, { "epoch": 0.44430459408432976, "grad_norm": 1.1336724758148193, "learning_rate": 9.9094830894685e-06, "loss": 1.122, "step": 2471 }, { "epoch": 0.4444844016901915, "grad_norm": 1.553816318511963, "learning_rate": 9.90937273488219e-06, "loss": 0.8152, "step": 2472 }, { "epoch": 0.4446642092960532, "grad_norm": 1.5249749422073364, "learning_rate": 9.909262313682133e-06, "loss": 0.8307, "step": 2473 }, { "epoch": 0.44484401690191494, "grad_norm": 1.4319777488708496, "learning_rate": 9.909151825869827e-06, "loss": 0.838, "step": 2474 }, { "epoch": 0.44502382450777667, "grad_norm": 1.589898943901062, "learning_rate": 9.909041271446773e-06, "loss": 0.8443, "step": 2475 }, { "epoch": 0.4452036321136384, "grad_norm": 1.5242555141448975, "learning_rate": 9.90893065041447e-06, "loss": 0.8099, "step": 2476 }, { "epoch": 0.4453834397195001, "grad_norm": 2.2500481605529785, "learning_rate": 9.90881996277442e-06, "loss": 1.0825, "step": 2477 }, { "epoch": 0.44556324732536184, "grad_norm": 1.8440536260604858, "learning_rate": 9.908709208528124e-06, "loss": 0.8168, "step": 2478 }, { "epoch": 0.44574305493122357, "grad_norm": 1.5924036502838135, "learning_rate": 9.908598387677085e-06, "loss": 0.8653, "step": 2479 }, { "epoch": 0.4459228625370853, "grad_norm": 1.3821762800216675, "learning_rate": 9.908487500222806e-06, "loss": 0.7155, "step": 2480 }, { "epoch": 0.446102670142947, "grad_norm": 1.5962104797363281, "learning_rate": 9.908376546166793e-06, "loss": 0.819, "step": 2481 }, { "epoch": 0.44628247774880875, "grad_norm": 1.5541999340057373, "learning_rate": 9.908265525510549e-06, "loss": 0.795, "step": 2482 }, { "epoch": 0.44646228535467053, "grad_norm": 1.528351068496704, "learning_rate": 9.908154438255586e-06, "loss": 0.8686, "step": 2483 }, { "epoch": 0.44664209296053226, "grad_norm": 1.5273449420928955, "learning_rate": 9.908043284403404e-06, "loss": 0.8575, "step": 2484 }, { "epoch": 0.446821900566394, "grad_norm": 1.4422616958618164, "learning_rate": 9.907932063955515e-06, "loss": 0.8315, "step": 2485 }, { "epoch": 0.4470017081722557, "grad_norm": 1.7080589532852173, "learning_rate": 9.907820776913429e-06, "loss": 0.8144, "step": 2486 }, { "epoch": 0.44718151577811743, "grad_norm": 1.2078909873962402, "learning_rate": 9.907709423278654e-06, "loss": 1.0982, "step": 2487 }, { "epoch": 0.44736132338397916, "grad_norm": 1.572887897491455, "learning_rate": 9.907598003052701e-06, "loss": 0.8622, "step": 2488 }, { "epoch": 0.4475411309898409, "grad_norm": 1.6511040925979614, "learning_rate": 9.907486516237084e-06, "loss": 0.8153, "step": 2489 }, { "epoch": 0.4477209385957026, "grad_norm": 1.5290846824645996, "learning_rate": 9.907374962833313e-06, "loss": 0.8393, "step": 2490 }, { "epoch": 0.44790074620156434, "grad_norm": 1.6253187656402588, "learning_rate": 9.907263342842904e-06, "loss": 0.7748, "step": 2491 }, { "epoch": 0.44808055380742606, "grad_norm": 1.4786094427108765, "learning_rate": 9.907151656267372e-06, "loss": 0.8286, "step": 2492 }, { "epoch": 0.4482603614132878, "grad_norm": 1.5785024166107178, "learning_rate": 9.907039903108226e-06, "loss": 0.8483, "step": 2493 }, { "epoch": 0.4484401690191495, "grad_norm": 1.588182806968689, "learning_rate": 9.906928083366992e-06, "loss": 0.8149, "step": 2494 }, { "epoch": 0.44861997662501124, "grad_norm": 1.5544952154159546, "learning_rate": 9.90681619704518e-06, "loss": 0.7985, "step": 2495 }, { "epoch": 0.44879978423087297, "grad_norm": 1.5586316585540771, "learning_rate": 9.90670424414431e-06, "loss": 0.808, "step": 2496 }, { "epoch": 0.4489795918367347, "grad_norm": 1.5877716541290283, "learning_rate": 9.906592224665903e-06, "loss": 0.8326, "step": 2497 }, { "epoch": 0.4491593994425964, "grad_norm": 1.1127163171768188, "learning_rate": 9.906480138611478e-06, "loss": 1.0731, "step": 2498 }, { "epoch": 0.44933920704845814, "grad_norm": 1.4861953258514404, "learning_rate": 9.906367985982555e-06, "loss": 0.811, "step": 2499 }, { "epoch": 0.44951901465431987, "grad_norm": 1.5699818134307861, "learning_rate": 9.906255766780657e-06, "loss": 0.7919, "step": 2500 }, { "epoch": 0.44951901465431987, "eval_loss": 0.8614675402641296, "eval_runtime": 148.7071, "eval_samples_per_second": 96.714, "eval_steps_per_second": 1.513, "step": 2500 }, { "epoch": 0.4496988222601816, "grad_norm": 1.553491473197937, "learning_rate": 9.906143481007304e-06, "loss": 0.8454, "step": 2501 }, { "epoch": 0.4498786298660433, "grad_norm": 1.5977733135223389, "learning_rate": 9.906031128664023e-06, "loss": 0.7927, "step": 2502 }, { "epoch": 0.45005843747190505, "grad_norm": 1.7219133377075195, "learning_rate": 9.905918709752338e-06, "loss": 0.8639, "step": 2503 }, { "epoch": 0.4502382450777668, "grad_norm": 1.470960021018982, "learning_rate": 9.905806224273771e-06, "loss": 0.8442, "step": 2504 }, { "epoch": 0.4504180526836285, "grad_norm": 1.4708337783813477, "learning_rate": 9.905693672229851e-06, "loss": 0.823, "step": 2505 }, { "epoch": 0.4505978602894902, "grad_norm": 1.578317642211914, "learning_rate": 9.905581053622105e-06, "loss": 0.8471, "step": 2506 }, { "epoch": 0.45077766789535195, "grad_norm": 1.6586967706680298, "learning_rate": 9.905468368452062e-06, "loss": 0.8616, "step": 2507 }, { "epoch": 0.4509574755012137, "grad_norm": 1.5738680362701416, "learning_rate": 9.905355616721249e-06, "loss": 0.7892, "step": 2508 }, { "epoch": 0.4511372831070754, "grad_norm": 1.5817112922668457, "learning_rate": 9.905242798431196e-06, "loss": 0.8079, "step": 2509 }, { "epoch": 0.45131709071293713, "grad_norm": 1.6052733659744263, "learning_rate": 9.905129913583435e-06, "loss": 0.8335, "step": 2510 }, { "epoch": 0.4514968983187989, "grad_norm": 1.7553304433822632, "learning_rate": 9.905016962179499e-06, "loss": 0.8534, "step": 2511 }, { "epoch": 0.45167670592466064, "grad_norm": 2.6041359901428223, "learning_rate": 9.904903944220919e-06, "loss": 0.7954, "step": 2512 }, { "epoch": 0.45185651353052236, "grad_norm": 1.2038242816925049, "learning_rate": 9.904790859709225e-06, "loss": 1.0932, "step": 2513 }, { "epoch": 0.4520363211363841, "grad_norm": 1.4894423484802246, "learning_rate": 9.904677708645959e-06, "loss": 0.8154, "step": 2514 }, { "epoch": 0.4522161287422458, "grad_norm": 1.5056085586547852, "learning_rate": 9.904564491032648e-06, "loss": 0.7926, "step": 2515 }, { "epoch": 0.45239593634810754, "grad_norm": 1.4649238586425781, "learning_rate": 9.904451206870835e-06, "loss": 0.8518, "step": 2516 }, { "epoch": 0.45257574395396927, "grad_norm": 1.6778141260147095, "learning_rate": 9.904337856162054e-06, "loss": 0.8485, "step": 2517 }, { "epoch": 0.452755551559831, "grad_norm": 1.703490138053894, "learning_rate": 9.904224438907843e-06, "loss": 0.7953, "step": 2518 }, { "epoch": 0.4529353591656927, "grad_norm": 1.483712911605835, "learning_rate": 9.90411095510974e-06, "loss": 0.7652, "step": 2519 }, { "epoch": 0.45311516677155445, "grad_norm": 1.580459475517273, "learning_rate": 9.903997404769289e-06, "loss": 0.7541, "step": 2520 }, { "epoch": 0.45329497437741617, "grad_norm": 1.8843134641647339, "learning_rate": 9.903883787888027e-06, "loss": 0.7581, "step": 2521 }, { "epoch": 0.4534747819832779, "grad_norm": 1.6162362098693848, "learning_rate": 9.903770104467497e-06, "loss": 0.8947, "step": 2522 }, { "epoch": 0.4536545895891396, "grad_norm": 1.5380065441131592, "learning_rate": 9.90365635450924e-06, "loss": 0.8035, "step": 2523 }, { "epoch": 0.45383439719500135, "grad_norm": 1.5379712581634521, "learning_rate": 9.9035425380148e-06, "loss": 0.8047, "step": 2524 }, { "epoch": 0.4540142048008631, "grad_norm": 1.490455985069275, "learning_rate": 9.903428654985723e-06, "loss": 0.8577, "step": 2525 }, { "epoch": 0.4541940124067248, "grad_norm": 1.676100492477417, "learning_rate": 9.903314705423552e-06, "loss": 0.9108, "step": 2526 }, { "epoch": 0.4543738200125865, "grad_norm": 1.5574097633361816, "learning_rate": 9.903200689329834e-06, "loss": 0.7899, "step": 2527 }, { "epoch": 0.45455362761844825, "grad_norm": 1.451900839805603, "learning_rate": 9.903086606706119e-06, "loss": 0.8579, "step": 2528 }, { "epoch": 0.45473343522431, "grad_norm": 1.7584826946258545, "learning_rate": 9.90297245755395e-06, "loss": 0.8866, "step": 2529 }, { "epoch": 0.4549132428301717, "grad_norm": 1.4688513278961182, "learning_rate": 9.90285824187488e-06, "loss": 0.8144, "step": 2530 }, { "epoch": 0.45509305043603343, "grad_norm": 1.5480796098709106, "learning_rate": 9.902743959670455e-06, "loss": 0.8809, "step": 2531 }, { "epoch": 0.45527285804189516, "grad_norm": 1.550115704536438, "learning_rate": 9.902629610942229e-06, "loss": 0.809, "step": 2532 }, { "epoch": 0.4554526656477569, "grad_norm": 1.4843964576721191, "learning_rate": 9.902515195691751e-06, "loss": 0.8151, "step": 2533 }, { "epoch": 0.4556324732536186, "grad_norm": 1.4643597602844238, "learning_rate": 9.902400713920575e-06, "loss": 0.8489, "step": 2534 }, { "epoch": 0.45581228085948033, "grad_norm": 1.5376555919647217, "learning_rate": 9.902286165630252e-06, "loss": 0.7622, "step": 2535 }, { "epoch": 0.45599208846534206, "grad_norm": 1.4310137033462524, "learning_rate": 9.902171550822341e-06, "loss": 0.848, "step": 2536 }, { "epoch": 0.4561718960712038, "grad_norm": 1.5296666622161865, "learning_rate": 9.902056869498393e-06, "loss": 0.7958, "step": 2537 }, { "epoch": 0.4563517036770655, "grad_norm": 1.6200224161148071, "learning_rate": 9.901942121659966e-06, "loss": 0.7672, "step": 2538 }, { "epoch": 0.4565315112829273, "grad_norm": 1.4994958639144897, "learning_rate": 9.901827307308616e-06, "loss": 0.7215, "step": 2539 }, { "epoch": 0.456711318888789, "grad_norm": 1.5981345176696777, "learning_rate": 9.901712426445901e-06, "loss": 0.7996, "step": 2540 }, { "epoch": 0.45689112649465075, "grad_norm": 1.4199411869049072, "learning_rate": 9.901597479073382e-06, "loss": 0.8436, "step": 2541 }, { "epoch": 0.45707093410051247, "grad_norm": 1.1455599069595337, "learning_rate": 9.901482465192616e-06, "loss": 1.0846, "step": 2542 }, { "epoch": 0.4572507417063742, "grad_norm": 1.3447349071502686, "learning_rate": 9.901367384805163e-06, "loss": 0.7742, "step": 2543 }, { "epoch": 0.4574305493122359, "grad_norm": 1.034902811050415, "learning_rate": 9.901252237912586e-06, "loss": 1.0871, "step": 2544 }, { "epoch": 0.45761035691809765, "grad_norm": 1.5692569017410278, "learning_rate": 9.901137024516449e-06, "loss": 0.8192, "step": 2545 }, { "epoch": 0.4577901645239594, "grad_norm": 1.6994982957839966, "learning_rate": 9.90102174461831e-06, "loss": 0.8322, "step": 2546 }, { "epoch": 0.4579699721298211, "grad_norm": 1.491075873374939, "learning_rate": 9.90090639821974e-06, "loss": 0.7361, "step": 2547 }, { "epoch": 0.4581497797356828, "grad_norm": 1.5694986581802368, "learning_rate": 9.900790985322302e-06, "loss": 0.7938, "step": 2548 }, { "epoch": 0.45832958734154455, "grad_norm": 1.5170072317123413, "learning_rate": 9.900675505927556e-06, "loss": 0.8531, "step": 2549 }, { "epoch": 0.4585093949474063, "grad_norm": 1.5430388450622559, "learning_rate": 9.900559960037079e-06, "loss": 0.7811, "step": 2550 }, { "epoch": 0.458689202553268, "grad_norm": 1.5756486654281616, "learning_rate": 9.90044434765243e-06, "loss": 0.8376, "step": 2551 }, { "epoch": 0.45886901015912973, "grad_norm": 1.545575737953186, "learning_rate": 9.900328668775183e-06, "loss": 0.8011, "step": 2552 }, { "epoch": 0.45904881776499146, "grad_norm": 1.592370629310608, "learning_rate": 9.900212923406905e-06, "loss": 0.8363, "step": 2553 }, { "epoch": 0.4592286253708532, "grad_norm": 1.5503813028335571, "learning_rate": 9.900097111549168e-06, "loss": 0.8248, "step": 2554 }, { "epoch": 0.4594084329767149, "grad_norm": 1.5230860710144043, "learning_rate": 9.899981233203542e-06, "loss": 0.8255, "step": 2555 }, { "epoch": 0.45958824058257663, "grad_norm": 1.6106969118118286, "learning_rate": 9.8998652883716e-06, "loss": 0.8528, "step": 2556 }, { "epoch": 0.45976804818843836, "grad_norm": 1.5124422311782837, "learning_rate": 9.899749277054916e-06, "loss": 0.9282, "step": 2557 }, { "epoch": 0.4599478557943001, "grad_norm": 1.5151700973510742, "learning_rate": 9.899633199255063e-06, "loss": 0.8326, "step": 2558 }, { "epoch": 0.4601276634001618, "grad_norm": 1.5342978239059448, "learning_rate": 9.899517054973618e-06, "loss": 0.8206, "step": 2559 }, { "epoch": 0.46030747100602354, "grad_norm": 1.6239745616912842, "learning_rate": 9.899400844212154e-06, "loss": 0.847, "step": 2560 }, { "epoch": 0.46048727861188526, "grad_norm": 1.5401986837387085, "learning_rate": 9.899284566972249e-06, "loss": 1.068, "step": 2561 }, { "epoch": 0.460667086217747, "grad_norm": 1.4969291687011719, "learning_rate": 9.899168223255482e-06, "loss": 0.8039, "step": 2562 }, { "epoch": 0.4608468938236087, "grad_norm": 1.502083659172058, "learning_rate": 9.899051813063429e-06, "loss": 0.9005, "step": 2563 }, { "epoch": 0.46102670142947044, "grad_norm": 1.511492371559143, "learning_rate": 9.898935336397673e-06, "loss": 0.8144, "step": 2564 }, { "epoch": 0.46120650903533217, "grad_norm": 1.6495897769927979, "learning_rate": 9.89881879325979e-06, "loss": 0.8686, "step": 2565 }, { "epoch": 0.46138631664119395, "grad_norm": 1.449591040611267, "learning_rate": 9.898702183651366e-06, "loss": 0.8668, "step": 2566 }, { "epoch": 0.4615661242470557, "grad_norm": 1.7070987224578857, "learning_rate": 9.898585507573981e-06, "loss": 0.7801, "step": 2567 }, { "epoch": 0.4617459318529174, "grad_norm": 1.5939399003982544, "learning_rate": 9.898468765029217e-06, "loss": 0.819, "step": 2568 }, { "epoch": 0.4619257394587791, "grad_norm": 1.644212245941162, "learning_rate": 9.898351956018662e-06, "loss": 0.8631, "step": 2569 }, { "epoch": 0.46210554706464085, "grad_norm": 1.5065555572509766, "learning_rate": 9.898235080543896e-06, "loss": 0.8656, "step": 2570 }, { "epoch": 0.4622853546705026, "grad_norm": 1.47316575050354, "learning_rate": 9.898118138606507e-06, "loss": 0.8359, "step": 2571 }, { "epoch": 0.4624651622763643, "grad_norm": 1.59735107421875, "learning_rate": 9.898001130208082e-06, "loss": 0.8612, "step": 2572 }, { "epoch": 0.46264496988222603, "grad_norm": 1.1476689577102661, "learning_rate": 9.89788405535021e-06, "loss": 1.0736, "step": 2573 }, { "epoch": 0.46282477748808776, "grad_norm": 1.6912773847579956, "learning_rate": 9.897766914034477e-06, "loss": 0.9737, "step": 2574 }, { "epoch": 0.4630045850939495, "grad_norm": 1.504783034324646, "learning_rate": 9.897649706262474e-06, "loss": 0.8665, "step": 2575 }, { "epoch": 0.4631843926998112, "grad_norm": 1.9352943897247314, "learning_rate": 9.897532432035791e-06, "loss": 0.9189, "step": 2576 }, { "epoch": 0.46336420030567294, "grad_norm": 1.8337575197219849, "learning_rate": 9.897415091356017e-06, "loss": 0.7657, "step": 2577 }, { "epoch": 0.46354400791153466, "grad_norm": 1.6136903762817383, "learning_rate": 9.897297684224749e-06, "loss": 0.7877, "step": 2578 }, { "epoch": 0.4637238155173964, "grad_norm": 1.643237829208374, "learning_rate": 9.897180210643575e-06, "loss": 0.8068, "step": 2579 }, { "epoch": 0.4639036231232581, "grad_norm": 1.0806094408035278, "learning_rate": 9.897062670614092e-06, "loss": 1.0749, "step": 2580 }, { "epoch": 0.46408343072911984, "grad_norm": 1.8718681335449219, "learning_rate": 9.896945064137895e-06, "loss": 0.8771, "step": 2581 }, { "epoch": 0.46426323833498157, "grad_norm": 1.0069388151168823, "learning_rate": 9.896827391216578e-06, "loss": 1.1024, "step": 2582 }, { "epoch": 0.4644430459408433, "grad_norm": 1.456615686416626, "learning_rate": 9.89670965185174e-06, "loss": 0.7604, "step": 2583 }, { "epoch": 0.464622853546705, "grad_norm": 1.0392764806747437, "learning_rate": 9.896591846044976e-06, "loss": 1.0478, "step": 2584 }, { "epoch": 0.46480266115256674, "grad_norm": 1.637243628501892, "learning_rate": 9.896473973797886e-06, "loss": 0.8407, "step": 2585 }, { "epoch": 0.46498246875842847, "grad_norm": 1.6806052923202515, "learning_rate": 9.896356035112069e-06, "loss": 0.7434, "step": 2586 }, { "epoch": 0.4651622763642902, "grad_norm": 1.467620611190796, "learning_rate": 9.896238029989128e-06, "loss": 0.7586, "step": 2587 }, { "epoch": 0.4653420839701519, "grad_norm": 1.1218675374984741, "learning_rate": 9.896119958430657e-06, "loss": 1.0677, "step": 2588 }, { "epoch": 0.46552189157601365, "grad_norm": 1.4799679517745972, "learning_rate": 9.896001820438265e-06, "loss": 0.8206, "step": 2589 }, { "epoch": 0.4657016991818754, "grad_norm": 1.561996579170227, "learning_rate": 9.89588361601355e-06, "loss": 0.872, "step": 2590 }, { "epoch": 0.4658815067877371, "grad_norm": 1.438478946685791, "learning_rate": 9.895765345158122e-06, "loss": 0.8808, "step": 2591 }, { "epoch": 0.4660613143935988, "grad_norm": 1.5822250843048096, "learning_rate": 9.89564700787358e-06, "loss": 0.8398, "step": 2592 }, { "epoch": 0.46624112199946055, "grad_norm": 1.5234055519104004, "learning_rate": 9.895528604161532e-06, "loss": 0.8303, "step": 2593 }, { "epoch": 0.46642092960532233, "grad_norm": 1.1035220623016357, "learning_rate": 9.895410134023585e-06, "loss": 1.0855, "step": 2594 }, { "epoch": 0.46660073721118406, "grad_norm": 1.7757598161697388, "learning_rate": 9.895291597461346e-06, "loss": 0.8605, "step": 2595 }, { "epoch": 0.4667805448170458, "grad_norm": 1.0627930164337158, "learning_rate": 9.895172994476423e-06, "loss": 1.0964, "step": 2596 }, { "epoch": 0.4669603524229075, "grad_norm": 1.5356359481811523, "learning_rate": 9.895054325070425e-06, "loss": 0.8907, "step": 2597 }, { "epoch": 0.46714016002876924, "grad_norm": 1.0164508819580078, "learning_rate": 9.894935589244965e-06, "loss": 1.0787, "step": 2598 }, { "epoch": 0.46731996763463096, "grad_norm": 2.0380136966705322, "learning_rate": 9.894816787001648e-06, "loss": 0.8454, "step": 2599 }, { "epoch": 0.4674997752404927, "grad_norm": 1.5695935487747192, "learning_rate": 9.894697918342093e-06, "loss": 0.8687, "step": 2600 }, { "epoch": 0.4676795828463544, "grad_norm": 1.0621960163116455, "learning_rate": 9.894578983267909e-06, "loss": 1.0747, "step": 2601 }, { "epoch": 0.46785939045221614, "grad_norm": 1.5326999425888062, "learning_rate": 9.894459981780711e-06, "loss": 0.7986, "step": 2602 }, { "epoch": 0.46803919805807787, "grad_norm": 1.4959661960601807, "learning_rate": 9.894340913882113e-06, "loss": 0.7713, "step": 2603 }, { "epoch": 0.4682190056639396, "grad_norm": 1.5932610034942627, "learning_rate": 9.894221779573729e-06, "loss": 0.8968, "step": 2604 }, { "epoch": 0.4683988132698013, "grad_norm": 1.5693800449371338, "learning_rate": 9.89410257885718e-06, "loss": 0.8079, "step": 2605 }, { "epoch": 0.46857862087566304, "grad_norm": 1.6857457160949707, "learning_rate": 9.893983311734078e-06, "loss": 0.8367, "step": 2606 }, { "epoch": 0.46875842848152477, "grad_norm": 2.1376028060913086, "learning_rate": 9.893863978206046e-06, "loss": 0.8204, "step": 2607 }, { "epoch": 0.4689382360873865, "grad_norm": 1.5082740783691406, "learning_rate": 9.893744578274702e-06, "loss": 0.8389, "step": 2608 }, { "epoch": 0.4691180436932482, "grad_norm": 1.6410473585128784, "learning_rate": 9.893625111941663e-06, "loss": 0.7777, "step": 2609 }, { "epoch": 0.46929785129910995, "grad_norm": 1.4708731174468994, "learning_rate": 9.893505579208554e-06, "loss": 0.7995, "step": 2610 }, { "epoch": 0.4694776589049717, "grad_norm": 1.508799433708191, "learning_rate": 9.893385980076995e-06, "loss": 0.7756, "step": 2611 }, { "epoch": 0.4696574665108334, "grad_norm": 1.5745434761047363, "learning_rate": 9.893266314548608e-06, "loss": 0.8885, "step": 2612 }, { "epoch": 0.4698372741166951, "grad_norm": 1.751867413520813, "learning_rate": 9.893146582625019e-06, "loss": 0.8171, "step": 2613 }, { "epoch": 0.47001708172255685, "grad_norm": 1.527207851409912, "learning_rate": 9.893026784307851e-06, "loss": 0.86, "step": 2614 }, { "epoch": 0.4701968893284186, "grad_norm": 1.5076864957809448, "learning_rate": 9.89290691959873e-06, "loss": 0.9324, "step": 2615 }, { "epoch": 0.4703766969342803, "grad_norm": 1.7824742794036865, "learning_rate": 9.892786988499284e-06, "loss": 0.8321, "step": 2616 }, { "epoch": 0.47055650454014203, "grad_norm": 1.5276470184326172, "learning_rate": 9.892666991011135e-06, "loss": 0.8506, "step": 2617 }, { "epoch": 0.47073631214600375, "grad_norm": 1.5718748569488525, "learning_rate": 9.892546927135916e-06, "loss": 0.7956, "step": 2618 }, { "epoch": 0.4709161197518655, "grad_norm": 1.623161792755127, "learning_rate": 9.892426796875256e-06, "loss": 0.8669, "step": 2619 }, { "epoch": 0.4710959273577272, "grad_norm": 1.4622313976287842, "learning_rate": 9.892306600230784e-06, "loss": 0.8076, "step": 2620 }, { "epoch": 0.47127573496358893, "grad_norm": 1.7852836847305298, "learning_rate": 9.892186337204128e-06, "loss": 0.8414, "step": 2621 }, { "epoch": 0.4714555425694507, "grad_norm": 1.5572574138641357, "learning_rate": 9.892066007796925e-06, "loss": 0.853, "step": 2622 }, { "epoch": 0.47163535017531244, "grad_norm": 1.4990078210830688, "learning_rate": 9.891945612010806e-06, "loss": 0.8392, "step": 2623 }, { "epoch": 0.47181515778117417, "grad_norm": 1.0901527404785156, "learning_rate": 9.891825149847403e-06, "loss": 1.1118, "step": 2624 }, { "epoch": 0.4719949653870359, "grad_norm": 1.5297986268997192, "learning_rate": 9.891704621308352e-06, "loss": 0.8599, "step": 2625 }, { "epoch": 0.4721747729928976, "grad_norm": 1.5217336416244507, "learning_rate": 9.891584026395286e-06, "loss": 0.8471, "step": 2626 }, { "epoch": 0.47235458059875934, "grad_norm": 1.4804110527038574, "learning_rate": 9.891463365109844e-06, "loss": 0.8039, "step": 2627 }, { "epoch": 0.47253438820462107, "grad_norm": 1.5455600023269653, "learning_rate": 9.891342637453663e-06, "loss": 0.7812, "step": 2628 }, { "epoch": 0.4727141958104828, "grad_norm": 1.653765082359314, "learning_rate": 9.89122184342838e-06, "loss": 0.8522, "step": 2629 }, { "epoch": 0.4728940034163445, "grad_norm": 1.5558215379714966, "learning_rate": 9.891100983035635e-06, "loss": 0.8034, "step": 2630 }, { "epoch": 0.47307381102220625, "grad_norm": 1.7370260953903198, "learning_rate": 9.890980056277068e-06, "loss": 0.8912, "step": 2631 }, { "epoch": 0.473253618628068, "grad_norm": 1.4413740634918213, "learning_rate": 9.890859063154319e-06, "loss": 0.7875, "step": 2632 }, { "epoch": 0.4734334262339297, "grad_norm": 1.0616861581802368, "learning_rate": 9.890738003669029e-06, "loss": 1.0768, "step": 2633 }, { "epoch": 0.4736132338397914, "grad_norm": 1.488640546798706, "learning_rate": 9.890616877822842e-06, "loss": 0.7908, "step": 2634 }, { "epoch": 0.47379304144565315, "grad_norm": 1.3972223997116089, "learning_rate": 9.890495685617401e-06, "loss": 0.7055, "step": 2635 }, { "epoch": 0.4739728490515149, "grad_norm": 1.5581254959106445, "learning_rate": 9.89037442705435e-06, "loss": 0.8335, "step": 2636 }, { "epoch": 0.4741526566573766, "grad_norm": 1.63548743724823, "learning_rate": 9.890253102135337e-06, "loss": 0.8258, "step": 2637 }, { "epoch": 0.47433246426323833, "grad_norm": 1.1398749351501465, "learning_rate": 9.890131710862005e-06, "loss": 1.0868, "step": 2638 }, { "epoch": 0.47451227186910006, "grad_norm": 2.692257881164551, "learning_rate": 9.890010253236003e-06, "loss": 0.9008, "step": 2639 }, { "epoch": 0.4746920794749618, "grad_norm": 1.6194720268249512, "learning_rate": 9.889888729258976e-06, "loss": 0.83, "step": 2640 }, { "epoch": 0.4748718870808235, "grad_norm": 1.56401526927948, "learning_rate": 9.889767138932576e-06, "loss": 0.8653, "step": 2641 }, { "epoch": 0.47505169468668523, "grad_norm": 1.4461244344711304, "learning_rate": 9.889645482258453e-06, "loss": 0.7801, "step": 2642 }, { "epoch": 0.47523150229254696, "grad_norm": 1.513426661491394, "learning_rate": 9.889523759238255e-06, "loss": 0.8825, "step": 2643 }, { "epoch": 0.4754113098984087, "grad_norm": 1.1280999183654785, "learning_rate": 9.889401969873638e-06, "loss": 1.036, "step": 2644 }, { "epoch": 0.4755911175042704, "grad_norm": 1.464083194732666, "learning_rate": 9.889280114166249e-06, "loss": 0.7786, "step": 2645 }, { "epoch": 0.47577092511013214, "grad_norm": 1.543268084526062, "learning_rate": 9.889158192117745e-06, "loss": 0.8003, "step": 2646 }, { "epoch": 0.47595073271599386, "grad_norm": 1.7254375219345093, "learning_rate": 9.88903620372978e-06, "loss": 0.8803, "step": 2647 }, { "epoch": 0.4761305403218556, "grad_norm": 1.6360265016555786, "learning_rate": 9.88891414900401e-06, "loss": 0.8842, "step": 2648 }, { "epoch": 0.47631034792771737, "grad_norm": 1.5125691890716553, "learning_rate": 9.88879202794209e-06, "loss": 0.7904, "step": 2649 }, { "epoch": 0.4764901555335791, "grad_norm": 1.551940679550171, "learning_rate": 9.888669840545675e-06, "loss": 0.8546, "step": 2650 }, { "epoch": 0.4766699631394408, "grad_norm": 1.6023621559143066, "learning_rate": 9.888547586816424e-06, "loss": 0.8288, "step": 2651 }, { "epoch": 0.47684977074530255, "grad_norm": 1.6231268644332886, "learning_rate": 9.888425266755998e-06, "loss": 0.8813, "step": 2652 }, { "epoch": 0.4770295783511643, "grad_norm": 2.2432503700256348, "learning_rate": 9.888302880366056e-06, "loss": 0.8183, "step": 2653 }, { "epoch": 0.477209385957026, "grad_norm": 1.7438609600067139, "learning_rate": 9.888180427648258e-06, "loss": 0.8484, "step": 2654 }, { "epoch": 0.4773891935628877, "grad_norm": 1.61137056350708, "learning_rate": 9.888057908604265e-06, "loss": 0.9127, "step": 2655 }, { "epoch": 0.47756900116874945, "grad_norm": 1.466927170753479, "learning_rate": 9.88793532323574e-06, "loss": 0.852, "step": 2656 }, { "epoch": 0.4777488087746112, "grad_norm": 1.7160851955413818, "learning_rate": 9.887812671544348e-06, "loss": 0.8589, "step": 2657 }, { "epoch": 0.4779286163804729, "grad_norm": 1.5081639289855957, "learning_rate": 9.88768995353175e-06, "loss": 0.8211, "step": 2658 }, { "epoch": 0.47810842398633463, "grad_norm": 1.4571871757507324, "learning_rate": 9.887567169199612e-06, "loss": 0.7699, "step": 2659 }, { "epoch": 0.47828823159219636, "grad_norm": 1.6019238233566284, "learning_rate": 9.887444318549601e-06, "loss": 0.8068, "step": 2660 }, { "epoch": 0.4784680391980581, "grad_norm": 1.7857565879821777, "learning_rate": 9.887321401583384e-06, "loss": 0.802, "step": 2661 }, { "epoch": 0.4786478468039198, "grad_norm": 1.5226688385009766, "learning_rate": 9.887198418302629e-06, "loss": 0.8219, "step": 2662 }, { "epoch": 0.47882765440978153, "grad_norm": 1.4473907947540283, "learning_rate": 9.887075368709002e-06, "loss": 0.7927, "step": 2663 }, { "epoch": 0.47900746201564326, "grad_norm": 1.6714158058166504, "learning_rate": 9.886952252804177e-06, "loss": 0.8266, "step": 2664 }, { "epoch": 0.479187269621505, "grad_norm": 1.4423799514770508, "learning_rate": 9.886829070589821e-06, "loss": 0.7842, "step": 2665 }, { "epoch": 0.4793670772273667, "grad_norm": 1.5578676462173462, "learning_rate": 9.886705822067608e-06, "loss": 0.8317, "step": 2666 }, { "epoch": 0.47954688483322844, "grad_norm": 1.6396561861038208, "learning_rate": 9.886582507239208e-06, "loss": 0.7976, "step": 2667 }, { "epoch": 0.47972669243909016, "grad_norm": 1.52335524559021, "learning_rate": 9.886459126106296e-06, "loss": 0.8197, "step": 2668 }, { "epoch": 0.4799065000449519, "grad_norm": 1.1716266870498657, "learning_rate": 9.886335678670544e-06, "loss": 1.0384, "step": 2669 }, { "epoch": 0.4800863076508136, "grad_norm": 1.720466136932373, "learning_rate": 9.88621216493363e-06, "loss": 0.8549, "step": 2670 }, { "epoch": 0.48026611525667534, "grad_norm": 1.5848422050476074, "learning_rate": 9.886088584897227e-06, "loss": 0.8905, "step": 2671 }, { "epoch": 0.48044592286253707, "grad_norm": 2.021686553955078, "learning_rate": 9.885964938563014e-06, "loss": 0.8053, "step": 2672 }, { "epoch": 0.4806257304683988, "grad_norm": 1.526981234550476, "learning_rate": 9.885841225932667e-06, "loss": 0.8599, "step": 2673 }, { "epoch": 0.4808055380742605, "grad_norm": 1.5014543533325195, "learning_rate": 9.885717447007866e-06, "loss": 0.8966, "step": 2674 }, { "epoch": 0.48098534568012224, "grad_norm": 1.5986942052841187, "learning_rate": 9.885593601790292e-06, "loss": 0.8272, "step": 2675 }, { "epoch": 0.48116515328598397, "grad_norm": 1.5495703220367432, "learning_rate": 9.88546969028162e-06, "loss": 0.9018, "step": 2676 }, { "epoch": 0.48134496089184575, "grad_norm": 1.6618950366973877, "learning_rate": 9.885345712483535e-06, "loss": 0.8405, "step": 2677 }, { "epoch": 0.4815247684977075, "grad_norm": 1.5490273237228394, "learning_rate": 9.88522166839772e-06, "loss": 0.7992, "step": 2678 }, { "epoch": 0.4817045761035692, "grad_norm": 1.433476209640503, "learning_rate": 9.885097558025858e-06, "loss": 0.8124, "step": 2679 }, { "epoch": 0.48188438370943093, "grad_norm": 1.5674537420272827, "learning_rate": 9.884973381369631e-06, "loss": 0.7715, "step": 2680 }, { "epoch": 0.48206419131529266, "grad_norm": 1.5420477390289307, "learning_rate": 9.884849138430725e-06, "loss": 0.7458, "step": 2681 }, { "epoch": 0.4822439989211544, "grad_norm": 1.7203078269958496, "learning_rate": 9.884724829210826e-06, "loss": 0.8548, "step": 2682 }, { "epoch": 0.4824238065270161, "grad_norm": 1.6063923835754395, "learning_rate": 9.88460045371162e-06, "loss": 0.825, "step": 2683 }, { "epoch": 0.48260361413287783, "grad_norm": 1.5032329559326172, "learning_rate": 9.884476011934795e-06, "loss": 0.8436, "step": 2684 }, { "epoch": 0.48278342173873956, "grad_norm": 1.4637402296066284, "learning_rate": 9.884351503882039e-06, "loss": 0.7866, "step": 2685 }, { "epoch": 0.4829632293446013, "grad_norm": 1.4989657402038574, "learning_rate": 9.884226929555045e-06, "loss": 0.8512, "step": 2686 }, { "epoch": 0.483143036950463, "grad_norm": 1.504847526550293, "learning_rate": 9.884102288955498e-06, "loss": 0.8013, "step": 2687 }, { "epoch": 0.48332284455632474, "grad_norm": 1.4925456047058105, "learning_rate": 9.883977582085091e-06, "loss": 0.8372, "step": 2688 }, { "epoch": 0.48350265216218646, "grad_norm": 1.589509129524231, "learning_rate": 9.883852808945517e-06, "loss": 0.8098, "step": 2689 }, { "epoch": 0.4836824597680482, "grad_norm": 1.5726357698440552, "learning_rate": 9.88372796953847e-06, "loss": 0.7954, "step": 2690 }, { "epoch": 0.4838622673739099, "grad_norm": 1.5816409587860107, "learning_rate": 9.883603063865642e-06, "loss": 0.881, "step": 2691 }, { "epoch": 0.48404207497977164, "grad_norm": 1.5810505151748657, "learning_rate": 9.883478091928727e-06, "loss": 0.7943, "step": 2692 }, { "epoch": 0.48422188258563337, "grad_norm": 1.5800104141235352, "learning_rate": 9.883353053729425e-06, "loss": 0.8036, "step": 2693 }, { "epoch": 0.4844016901914951, "grad_norm": 1.5626944303512573, "learning_rate": 9.883227949269427e-06, "loss": 0.8646, "step": 2694 }, { "epoch": 0.4845814977973568, "grad_norm": 1.310742735862732, "learning_rate": 9.883102778550434e-06, "loss": 1.0502, "step": 2695 }, { "epoch": 0.48476130540321855, "grad_norm": 1.6059093475341797, "learning_rate": 9.882977541574144e-06, "loss": 0.8643, "step": 2696 }, { "epoch": 0.48494111300908027, "grad_norm": 1.526673674583435, "learning_rate": 9.882852238342256e-06, "loss": 0.8663, "step": 2697 }, { "epoch": 0.485120920614942, "grad_norm": 1.426153302192688, "learning_rate": 9.882726868856469e-06, "loss": 0.8025, "step": 2698 }, { "epoch": 0.4853007282208037, "grad_norm": 1.4647432565689087, "learning_rate": 9.882601433118487e-06, "loss": 0.8124, "step": 2699 }, { "epoch": 0.48548053582666545, "grad_norm": 1.3956128358840942, "learning_rate": 9.88247593113001e-06, "loss": 0.72, "step": 2700 }, { "epoch": 0.4856603434325272, "grad_norm": 1.4736442565917969, "learning_rate": 9.882350362892739e-06, "loss": 0.8018, "step": 2701 }, { "epoch": 0.4858401510383889, "grad_norm": 1.5319072008132935, "learning_rate": 9.88222472840838e-06, "loss": 0.8294, "step": 2702 }, { "epoch": 0.4860199586442506, "grad_norm": 1.4821677207946777, "learning_rate": 9.88209902767864e-06, "loss": 0.7742, "step": 2703 }, { "epoch": 0.48619976625011235, "grad_norm": 1.4794350862503052, "learning_rate": 9.88197326070522e-06, "loss": 0.8188, "step": 2704 }, { "epoch": 0.48637957385597413, "grad_norm": 1.4514191150665283, "learning_rate": 9.88184742748983e-06, "loss": 0.7973, "step": 2705 }, { "epoch": 0.48655938146183586, "grad_norm": 1.5145338773727417, "learning_rate": 9.881721528034174e-06, "loss": 0.8429, "step": 2706 }, { "epoch": 0.4867391890676976, "grad_norm": 1.4515248537063599, "learning_rate": 9.881595562339964e-06, "loss": 0.8314, "step": 2707 }, { "epoch": 0.4869189966735593, "grad_norm": 1.6116957664489746, "learning_rate": 9.88146953040891e-06, "loss": 0.8238, "step": 2708 }, { "epoch": 0.48709880427942104, "grad_norm": 1.140925407409668, "learning_rate": 9.881343432242716e-06, "loss": 1.0659, "step": 2709 }, { "epoch": 0.48727861188528276, "grad_norm": 1.5731008052825928, "learning_rate": 9.881217267843098e-06, "loss": 0.8426, "step": 2710 }, { "epoch": 0.4874584194911445, "grad_norm": 1.5284005403518677, "learning_rate": 9.881091037211765e-06, "loss": 0.8356, "step": 2711 }, { "epoch": 0.4876382270970062, "grad_norm": 1.1548473834991455, "learning_rate": 9.880964740350432e-06, "loss": 1.0831, "step": 2712 }, { "epoch": 0.48781803470286794, "grad_norm": 1.5759364366531372, "learning_rate": 9.880838377260813e-06, "loss": 0.8589, "step": 2713 }, { "epoch": 0.48799784230872967, "grad_norm": 1.435999870300293, "learning_rate": 9.88071194794462e-06, "loss": 0.7671, "step": 2714 }, { "epoch": 0.4881776499145914, "grad_norm": 1.5518922805786133, "learning_rate": 9.880585452403572e-06, "loss": 0.8434, "step": 2715 }, { "epoch": 0.4883574575204531, "grad_norm": 1.582343578338623, "learning_rate": 9.880458890639382e-06, "loss": 0.8168, "step": 2716 }, { "epoch": 0.48853726512631485, "grad_norm": 1.6809476613998413, "learning_rate": 9.880332262653768e-06, "loss": 0.8262, "step": 2717 }, { "epoch": 0.48871707273217657, "grad_norm": 1.2350448369979858, "learning_rate": 9.88020556844845e-06, "loss": 1.0688, "step": 2718 }, { "epoch": 0.4888968803380383, "grad_norm": 1.4917434453964233, "learning_rate": 9.880078808025147e-06, "loss": 0.8031, "step": 2719 }, { "epoch": 0.4890766879439, "grad_norm": 1.5269992351531982, "learning_rate": 9.879951981385577e-06, "loss": 0.7826, "step": 2720 }, { "epoch": 0.48925649554976175, "grad_norm": 1.5460293292999268, "learning_rate": 9.879825088531463e-06, "loss": 0.7717, "step": 2721 }, { "epoch": 0.4894363031556235, "grad_norm": 1.705568552017212, "learning_rate": 9.879698129464523e-06, "loss": 0.8081, "step": 2722 }, { "epoch": 0.4896161107614852, "grad_norm": 1.6612125635147095, "learning_rate": 9.879571104186482e-06, "loss": 0.8799, "step": 2723 }, { "epoch": 0.4897959183673469, "grad_norm": 1.625460147857666, "learning_rate": 9.879444012699066e-06, "loss": 0.8474, "step": 2724 }, { "epoch": 0.48997572597320865, "grad_norm": 1.4683929681777954, "learning_rate": 9.879316855003997e-06, "loss": 0.785, "step": 2725 }, { "epoch": 0.4901555335790704, "grad_norm": 1.556555986404419, "learning_rate": 9.879189631103e-06, "loss": 0.8119, "step": 2726 }, { "epoch": 0.4903353411849321, "grad_norm": 1.5060425996780396, "learning_rate": 9.879062340997802e-06, "loss": 0.7958, "step": 2727 }, { "epoch": 0.49051514879079383, "grad_norm": 1.4450324773788452, "learning_rate": 9.878934984690129e-06, "loss": 0.7667, "step": 2728 }, { "epoch": 0.49069495639665556, "grad_norm": 1.630951166152954, "learning_rate": 9.878807562181712e-06, "loss": 0.8544, "step": 2729 }, { "epoch": 0.4908747640025173, "grad_norm": 1.6447012424468994, "learning_rate": 9.878680073474277e-06, "loss": 0.8032, "step": 2730 }, { "epoch": 0.491054571608379, "grad_norm": 1.625891923904419, "learning_rate": 9.878552518569555e-06, "loss": 0.8416, "step": 2731 }, { "epoch": 0.49123437921424074, "grad_norm": 1.4577361345291138, "learning_rate": 9.878424897469276e-06, "loss": 0.8052, "step": 2732 }, { "epoch": 0.4914141868201025, "grad_norm": 1.5171576738357544, "learning_rate": 9.878297210175173e-06, "loss": 0.8144, "step": 2733 }, { "epoch": 0.49159399442596424, "grad_norm": 1.3226771354675293, "learning_rate": 9.878169456688977e-06, "loss": 1.0821, "step": 2734 }, { "epoch": 0.49177380203182597, "grad_norm": 1.7085977792739868, "learning_rate": 9.878041637012424e-06, "loss": 0.8179, "step": 2735 }, { "epoch": 0.4919536096376877, "grad_norm": 1.5238264799118042, "learning_rate": 9.877913751147245e-06, "loss": 0.8783, "step": 2736 }, { "epoch": 0.4921334172435494, "grad_norm": 1.5561704635620117, "learning_rate": 9.877785799095178e-06, "loss": 0.8227, "step": 2737 }, { "epoch": 0.49231322484941115, "grad_norm": 1.5390068292617798, "learning_rate": 9.877657780857957e-06, "loss": 0.8214, "step": 2738 }, { "epoch": 0.49249303245527287, "grad_norm": 1.449344515800476, "learning_rate": 9.87752969643732e-06, "loss": 0.8152, "step": 2739 }, { "epoch": 0.4926728400611346, "grad_norm": 1.556854248046875, "learning_rate": 9.877401545835006e-06, "loss": 0.7905, "step": 2740 }, { "epoch": 0.4928526476669963, "grad_norm": 1.2458415031433105, "learning_rate": 9.877273329052753e-06, "loss": 1.1098, "step": 2741 }, { "epoch": 0.49303245527285805, "grad_norm": 1.4506947994232178, "learning_rate": 9.8771450460923e-06, "loss": 0.8264, "step": 2742 }, { "epoch": 0.4932122628787198, "grad_norm": 1.155693531036377, "learning_rate": 9.877016696955388e-06, "loss": 1.0187, "step": 2743 }, { "epoch": 0.4933920704845815, "grad_norm": 1.4002312421798706, "learning_rate": 9.87688828164376e-06, "loss": 0.7965, "step": 2744 }, { "epoch": 0.49357187809044323, "grad_norm": 1.5144938230514526, "learning_rate": 9.876759800159155e-06, "loss": 0.8544, "step": 2745 }, { "epoch": 0.49375168569630495, "grad_norm": 1.4909722805023193, "learning_rate": 9.87663125250332e-06, "loss": 0.8292, "step": 2746 }, { "epoch": 0.4939314933021667, "grad_norm": 1.7874755859375, "learning_rate": 9.876502638677997e-06, "loss": 0.7711, "step": 2747 }, { "epoch": 0.4941113009080284, "grad_norm": 1.4681938886642456, "learning_rate": 9.876373958684933e-06, "loss": 0.7809, "step": 2748 }, { "epoch": 0.49429110851389013, "grad_norm": 1.433597207069397, "learning_rate": 9.87624521252587e-06, "loss": 0.8456, "step": 2749 }, { "epoch": 0.49447091611975186, "grad_norm": 1.652563452720642, "learning_rate": 9.876116400202562e-06, "loss": 0.8082, "step": 2750 }, { "epoch": 0.4946507237256136, "grad_norm": 1.7612197399139404, "learning_rate": 9.87598752171675e-06, "loss": 0.83, "step": 2751 }, { "epoch": 0.4948305313314753, "grad_norm": 1.662824273109436, "learning_rate": 9.875858577070186e-06, "loss": 0.879, "step": 2752 }, { "epoch": 0.49501033893733704, "grad_norm": 1.3979262113571167, "learning_rate": 9.875729566264617e-06, "loss": 1.0905, "step": 2753 }, { "epoch": 0.49519014654319876, "grad_norm": 1.4718728065490723, "learning_rate": 9.875600489301798e-06, "loss": 0.8788, "step": 2754 }, { "epoch": 0.4953699541490605, "grad_norm": 1.6625784635543823, "learning_rate": 9.875471346183476e-06, "loss": 0.8731, "step": 2755 }, { "epoch": 0.4955497617549222, "grad_norm": 1.0133012533187866, "learning_rate": 9.875342136911405e-06, "loss": 1.0916, "step": 2756 }, { "epoch": 0.49572956936078394, "grad_norm": 1.559998631477356, "learning_rate": 9.87521286148734e-06, "loss": 0.9193, "step": 2757 }, { "epoch": 0.49590937696664567, "grad_norm": 1.5084973573684692, "learning_rate": 9.875083519913034e-06, "loss": 0.8332, "step": 2758 }, { "epoch": 0.4960891845725074, "grad_norm": 1.6086689233779907, "learning_rate": 9.874954112190238e-06, "loss": 0.7898, "step": 2759 }, { "epoch": 0.4962689921783692, "grad_norm": 1.5646920204162598, "learning_rate": 9.874824638320715e-06, "loss": 0.9862, "step": 2760 }, { "epoch": 0.4964487997842309, "grad_norm": 1.5955164432525635, "learning_rate": 9.874695098306215e-06, "loss": 0.8299, "step": 2761 }, { "epoch": 0.4966286073900926, "grad_norm": 1.4513943195343018, "learning_rate": 9.8745654921485e-06, "loss": 0.8118, "step": 2762 }, { "epoch": 0.49680841499595435, "grad_norm": 1.450607419013977, "learning_rate": 9.874435819849328e-06, "loss": 0.8048, "step": 2763 }, { "epoch": 0.4969882226018161, "grad_norm": 1.4487707614898682, "learning_rate": 9.874306081410459e-06, "loss": 0.8524, "step": 2764 }, { "epoch": 0.4971680302076778, "grad_norm": 1.460255742073059, "learning_rate": 9.87417627683365e-06, "loss": 0.8271, "step": 2765 }, { "epoch": 0.49734783781353953, "grad_norm": 1.5924937725067139, "learning_rate": 9.874046406120665e-06, "loss": 0.7361, "step": 2766 }, { "epoch": 0.49752764541940125, "grad_norm": 1.6148821115493774, "learning_rate": 9.873916469273265e-06, "loss": 0.8437, "step": 2767 }, { "epoch": 0.497707453025263, "grad_norm": 1.5710171461105347, "learning_rate": 9.873786466293215e-06, "loss": 0.8211, "step": 2768 }, { "epoch": 0.4978872606311247, "grad_norm": 1.5399343967437744, "learning_rate": 9.873656397182278e-06, "loss": 0.7426, "step": 2769 }, { "epoch": 0.49806706823698643, "grad_norm": 1.4640452861785889, "learning_rate": 9.873526261942217e-06, "loss": 1.1088, "step": 2770 }, { "epoch": 0.49824687584284816, "grad_norm": 1.601380467414856, "learning_rate": 9.8733960605748e-06, "loss": 0.7977, "step": 2771 }, { "epoch": 0.4984266834487099, "grad_norm": 1.1038665771484375, "learning_rate": 9.873265793081794e-06, "loss": 1.0558, "step": 2772 }, { "epoch": 0.4986064910545716, "grad_norm": 1.4351387023925781, "learning_rate": 9.873135459464965e-06, "loss": 0.7455, "step": 2773 }, { "epoch": 0.49878629866043334, "grad_norm": 1.150199055671692, "learning_rate": 9.873005059726083e-06, "loss": 1.0861, "step": 2774 }, { "epoch": 0.49896610626629506, "grad_norm": 1.6071783304214478, "learning_rate": 9.872874593866914e-06, "loss": 0.8181, "step": 2775 }, { "epoch": 0.4991459138721568, "grad_norm": 1.801246166229248, "learning_rate": 9.872744061889233e-06, "loss": 0.7985, "step": 2776 }, { "epoch": 0.4993257214780185, "grad_norm": 1.1881557703018188, "learning_rate": 9.872613463794806e-06, "loss": 1.0441, "step": 2777 }, { "epoch": 0.49950552908388024, "grad_norm": 1.65872323513031, "learning_rate": 9.87248279958541e-06, "loss": 0.7665, "step": 2778 }, { "epoch": 0.49968533668974197, "grad_norm": 1.423622965812683, "learning_rate": 9.872352069262817e-06, "loss": 0.8589, "step": 2779 }, { "epoch": 0.4998651442956037, "grad_norm": 1.5168243646621704, "learning_rate": 9.872221272828797e-06, "loss": 0.8919, "step": 2780 }, { "epoch": 0.5000449519014655, "grad_norm": 1.5212205648422241, "learning_rate": 9.872090410285127e-06, "loss": 0.8298, "step": 2781 }, { "epoch": 0.5002247595073271, "grad_norm": 1.470539927482605, "learning_rate": 9.871959481633584e-06, "loss": 0.7913, "step": 2782 }, { "epoch": 0.5004045671131889, "grad_norm": 1.5908411741256714, "learning_rate": 9.871828486875945e-06, "loss": 0.9184, "step": 2783 }, { "epoch": 0.5005843747190506, "grad_norm": 1.515014410018921, "learning_rate": 9.871697426013985e-06, "loss": 0.8055, "step": 2784 }, { "epoch": 0.5007641823249124, "grad_norm": 1.557043194770813, "learning_rate": 9.871566299049482e-06, "loss": 0.7808, "step": 2785 }, { "epoch": 0.500943989930774, "grad_norm": 1.5245405435562134, "learning_rate": 9.871435105984217e-06, "loss": 0.8291, "step": 2786 }, { "epoch": 0.5011237975366358, "grad_norm": 1.157729148864746, "learning_rate": 9.87130384681997e-06, "loss": 1.0386, "step": 2787 }, { "epoch": 0.5013036051424975, "grad_norm": 1.5036097764968872, "learning_rate": 9.871172521558523e-06, "loss": 0.8125, "step": 2788 }, { "epoch": 0.5014834127483593, "grad_norm": 1.4678460359573364, "learning_rate": 9.871041130201656e-06, "loss": 0.8423, "step": 2789 }, { "epoch": 0.501663220354221, "grad_norm": 1.523374319076538, "learning_rate": 9.87090967275115e-06, "loss": 0.8594, "step": 2790 }, { "epoch": 0.5018430279600827, "grad_norm": 1.587170124053955, "learning_rate": 9.870778149208793e-06, "loss": 0.8767, "step": 2791 }, { "epoch": 0.5020228355659444, "grad_norm": 1.584244728088379, "learning_rate": 9.870646559576366e-06, "loss": 0.7954, "step": 2792 }, { "epoch": 0.5022026431718062, "grad_norm": 1.4825519323349, "learning_rate": 9.870514903855658e-06, "loss": 0.8659, "step": 2793 }, { "epoch": 0.5023824507776679, "grad_norm": 1.113163948059082, "learning_rate": 9.870383182048453e-06, "loss": 1.0636, "step": 2794 }, { "epoch": 0.5025622583835296, "grad_norm": 1.6009700298309326, "learning_rate": 9.870251394156538e-06, "loss": 0.8838, "step": 2795 }, { "epoch": 0.5027420659893913, "grad_norm": 1.5178016424179077, "learning_rate": 9.870119540181704e-06, "loss": 0.8055, "step": 2796 }, { "epoch": 0.5029218735952531, "grad_norm": 1.3906980752944946, "learning_rate": 9.869987620125736e-06, "loss": 0.7786, "step": 2797 }, { "epoch": 0.5031016812011148, "grad_norm": 1.5469944477081299, "learning_rate": 9.869855633990428e-06, "loss": 0.8256, "step": 2798 }, { "epoch": 0.5032814888069765, "grad_norm": 1.5894217491149902, "learning_rate": 9.869723581777567e-06, "loss": 0.8416, "step": 2799 }, { "epoch": 0.5034612964128382, "grad_norm": 1.8122004270553589, "learning_rate": 9.869591463488948e-06, "loss": 0.8232, "step": 2800 }, { "epoch": 0.5036411040187, "grad_norm": 1.114648461341858, "learning_rate": 9.86945927912636e-06, "loss": 1.0865, "step": 2801 }, { "epoch": 0.5038209116245618, "grad_norm": 1.5149636268615723, "learning_rate": 9.869327028691602e-06, "loss": 0.8097, "step": 2802 }, { "epoch": 0.5040007192304234, "grad_norm": 1.0683034658432007, "learning_rate": 9.869194712186465e-06, "loss": 1.0987, "step": 2803 }, { "epoch": 0.5041805268362852, "grad_norm": 1.732206106185913, "learning_rate": 9.869062329612744e-06, "loss": 0.8502, "step": 2804 }, { "epoch": 0.5043603344421469, "grad_norm": 1.3989015817642212, "learning_rate": 9.868929880972237e-06, "loss": 0.8125, "step": 2805 }, { "epoch": 0.5045401420480087, "grad_norm": 1.4979443550109863, "learning_rate": 9.86879736626674e-06, "loss": 0.8383, "step": 2806 }, { "epoch": 0.5047199496538703, "grad_norm": 1.687671422958374, "learning_rate": 9.868664785498049e-06, "loss": 0.8575, "step": 2807 }, { "epoch": 0.5048997572597321, "grad_norm": 1.6139440536499023, "learning_rate": 9.868532138667968e-06, "loss": 0.8651, "step": 2808 }, { "epoch": 0.5050795648655938, "grad_norm": 1.459777593612671, "learning_rate": 9.868399425778293e-06, "loss": 0.7566, "step": 2809 }, { "epoch": 0.5052593724714556, "grad_norm": 1.4670398235321045, "learning_rate": 9.868266646830827e-06, "loss": 0.7408, "step": 2810 }, { "epoch": 0.5054391800773173, "grad_norm": 1.457495093345642, "learning_rate": 9.868133801827368e-06, "loss": 0.7338, "step": 2811 }, { "epoch": 0.505618987683179, "grad_norm": 1.4711182117462158, "learning_rate": 9.868000890769722e-06, "loss": 0.7723, "step": 2812 }, { "epoch": 0.5057987952890407, "grad_norm": 1.5023043155670166, "learning_rate": 9.867867913659692e-06, "loss": 0.8863, "step": 2813 }, { "epoch": 0.5059786028949025, "grad_norm": 1.4627606868743896, "learning_rate": 9.867734870499082e-06, "loss": 0.8065, "step": 2814 }, { "epoch": 0.5061584105007642, "grad_norm": 1.1974674463272095, "learning_rate": 9.867601761289696e-06, "loss": 1.0706, "step": 2815 }, { "epoch": 0.5063382181066259, "grad_norm": 1.6733962297439575, "learning_rate": 9.86746858603334e-06, "loss": 0.8053, "step": 2816 }, { "epoch": 0.5065180257124876, "grad_norm": 1.6003751754760742, "learning_rate": 9.867335344731824e-06, "loss": 0.7388, "step": 2817 }, { "epoch": 0.5066978333183494, "grad_norm": 1.582028865814209, "learning_rate": 9.867202037386953e-06, "loss": 0.7571, "step": 2818 }, { "epoch": 0.5068776409242111, "grad_norm": 1.5875829458236694, "learning_rate": 9.867068664000538e-06, "loss": 0.827, "step": 2819 }, { "epoch": 0.5070574485300728, "grad_norm": 1.6548863649368286, "learning_rate": 9.866935224574387e-06, "loss": 0.8135, "step": 2820 }, { "epoch": 0.5072372561359345, "grad_norm": 1.0511245727539062, "learning_rate": 9.866801719110311e-06, "loss": 1.0692, "step": 2821 }, { "epoch": 0.5074170637417963, "grad_norm": 1.5078028440475464, "learning_rate": 9.866668147610122e-06, "loss": 0.8781, "step": 2822 }, { "epoch": 0.507596871347658, "grad_norm": 0.991702139377594, "learning_rate": 9.866534510075629e-06, "loss": 1.0371, "step": 2823 }, { "epoch": 0.5077766789535197, "grad_norm": 1.5570348501205444, "learning_rate": 9.86640080650865e-06, "loss": 0.8648, "step": 2824 }, { "epoch": 0.5079564865593814, "grad_norm": 1.6372255086898804, "learning_rate": 9.866267036911e-06, "loss": 0.8355, "step": 2825 }, { "epoch": 0.5081362941652432, "grad_norm": 1.6670048236846924, "learning_rate": 9.866133201284489e-06, "loss": 0.8539, "step": 2826 }, { "epoch": 0.5083161017711049, "grad_norm": 1.600370168685913, "learning_rate": 9.865999299630936e-06, "loss": 0.79, "step": 2827 }, { "epoch": 0.5084959093769666, "grad_norm": 1.5237643718719482, "learning_rate": 9.865865331952159e-06, "loss": 0.8131, "step": 2828 }, { "epoch": 0.5086757169828283, "grad_norm": 1.7584747076034546, "learning_rate": 9.865731298249971e-06, "loss": 0.8206, "step": 2829 }, { "epoch": 0.5088555245886901, "grad_norm": 1.6575855016708374, "learning_rate": 9.865597198526196e-06, "loss": 0.7259, "step": 2830 }, { "epoch": 0.5090353321945519, "grad_norm": 1.6491365432739258, "learning_rate": 9.86546303278265e-06, "loss": 0.8419, "step": 2831 }, { "epoch": 0.5092151398004136, "grad_norm": 1.726701021194458, "learning_rate": 9.865328801021155e-06, "loss": 0.8203, "step": 2832 }, { "epoch": 0.5093949474062753, "grad_norm": 1.4629673957824707, "learning_rate": 9.865194503243533e-06, "loss": 0.839, "step": 2833 }, { "epoch": 0.509574755012137, "grad_norm": 1.5019280910491943, "learning_rate": 9.865060139451605e-06, "loss": 0.8748, "step": 2834 }, { "epoch": 0.5097545626179988, "grad_norm": 1.2296136617660522, "learning_rate": 9.864925709647194e-06, "loss": 1.055, "step": 2835 }, { "epoch": 0.5099343702238605, "grad_norm": 1.5131192207336426, "learning_rate": 9.864791213832125e-06, "loss": 0.7511, "step": 2836 }, { "epoch": 0.5101141778297222, "grad_norm": 1.7463274002075195, "learning_rate": 9.864656652008223e-06, "loss": 0.8562, "step": 2837 }, { "epoch": 0.5102939854355839, "grad_norm": 1.5388540029525757, "learning_rate": 9.864522024177312e-06, "loss": 0.9179, "step": 2838 }, { "epoch": 0.5104737930414457, "grad_norm": 1.4984623193740845, "learning_rate": 9.864387330341223e-06, "loss": 0.8147, "step": 2839 }, { "epoch": 0.5106536006473074, "grad_norm": 1.5680898427963257, "learning_rate": 9.864252570501777e-06, "loss": 0.8272, "step": 2840 }, { "epoch": 0.5108334082531691, "grad_norm": 1.114948034286499, "learning_rate": 9.864117744660809e-06, "loss": 1.0919, "step": 2841 }, { "epoch": 0.5110132158590308, "grad_norm": 1.4177664518356323, "learning_rate": 9.863982852820144e-06, "loss": 0.7501, "step": 2842 }, { "epoch": 0.5111930234648926, "grad_norm": 1.5137932300567627, "learning_rate": 9.863847894981614e-06, "loss": 0.8377, "step": 2843 }, { "epoch": 0.5113728310707543, "grad_norm": 1.4183534383773804, "learning_rate": 9.863712871147052e-06, "loss": 0.7347, "step": 2844 }, { "epoch": 0.511552638676616, "grad_norm": 1.6793302297592163, "learning_rate": 9.863577781318285e-06, "loss": 0.7793, "step": 2845 }, { "epoch": 0.5117324462824777, "grad_norm": 1.6144028902053833, "learning_rate": 9.863442625497151e-06, "loss": 0.833, "step": 2846 }, { "epoch": 0.5119122538883395, "grad_norm": 1.6071674823760986, "learning_rate": 9.86330740368548e-06, "loss": 0.8363, "step": 2847 }, { "epoch": 0.5120920614942012, "grad_norm": 1.3739421367645264, "learning_rate": 9.863172115885113e-06, "loss": 0.7564, "step": 2848 }, { "epoch": 0.512271869100063, "grad_norm": 1.0346522331237793, "learning_rate": 9.863036762097878e-06, "loss": 1.103, "step": 2849 }, { "epoch": 0.5124516767059246, "grad_norm": 1.9627891778945923, "learning_rate": 9.862901342325617e-06, "loss": 0.9381, "step": 2850 }, { "epoch": 0.5126314843117864, "grad_norm": 1.5541752576828003, "learning_rate": 9.862765856570165e-06, "loss": 0.7803, "step": 2851 }, { "epoch": 0.5128112919176481, "grad_norm": 1.4006567001342773, "learning_rate": 9.862630304833361e-06, "loss": 0.8326, "step": 2852 }, { "epoch": 0.5129910995235099, "grad_norm": 1.5335166454315186, "learning_rate": 9.862494687117043e-06, "loss": 0.8451, "step": 2853 }, { "epoch": 0.5131709071293715, "grad_norm": 1.6571178436279297, "learning_rate": 9.862359003423055e-06, "loss": 0.8368, "step": 2854 }, { "epoch": 0.5133507147352333, "grad_norm": 1.5829408168792725, "learning_rate": 9.862223253753234e-06, "loss": 0.8336, "step": 2855 }, { "epoch": 0.513530522341095, "grad_norm": 1.5073097944259644, "learning_rate": 9.862087438109423e-06, "loss": 0.8453, "step": 2856 }, { "epoch": 0.5137103299469568, "grad_norm": 1.4395487308502197, "learning_rate": 9.861951556493464e-06, "loss": 0.7863, "step": 2857 }, { "epoch": 0.5138901375528185, "grad_norm": 1.4163463115692139, "learning_rate": 9.861815608907204e-06, "loss": 0.8584, "step": 2858 }, { "epoch": 0.5140699451586802, "grad_norm": 1.5346413850784302, "learning_rate": 9.861679595352484e-06, "loss": 0.7946, "step": 2859 }, { "epoch": 0.514249752764542, "grad_norm": 1.2649558782577515, "learning_rate": 9.861543515831152e-06, "loss": 1.0854, "step": 2860 }, { "epoch": 0.5144295603704037, "grad_norm": 1.5281294584274292, "learning_rate": 9.861407370345054e-06, "loss": 0.8247, "step": 2861 }, { "epoch": 0.5146093679762654, "grad_norm": 1.096640944480896, "learning_rate": 9.861271158896036e-06, "loss": 1.0599, "step": 2862 }, { "epoch": 0.5147891755821271, "grad_norm": 1.5111026763916016, "learning_rate": 9.861134881485947e-06, "loss": 0.8547, "step": 2863 }, { "epoch": 0.5149689831879889, "grad_norm": 1.5755475759506226, "learning_rate": 9.860998538116637e-06, "loss": 0.7934, "step": 2864 }, { "epoch": 0.5151487907938506, "grad_norm": 1.114601969718933, "learning_rate": 9.860862128789954e-06, "loss": 1.0628, "step": 2865 }, { "epoch": 0.5153285983997123, "grad_norm": 1.4917776584625244, "learning_rate": 9.86072565350775e-06, "loss": 0.7886, "step": 2866 }, { "epoch": 0.515508406005574, "grad_norm": 1.5675742626190186, "learning_rate": 9.860589112271878e-06, "loss": 0.8754, "step": 2867 }, { "epoch": 0.5156882136114358, "grad_norm": 1.564315915107727, "learning_rate": 9.860452505084188e-06, "loss": 0.7987, "step": 2868 }, { "epoch": 0.5158680212172975, "grad_norm": 1.447059154510498, "learning_rate": 9.860315831946537e-06, "loss": 0.8402, "step": 2869 }, { "epoch": 0.5160478288231592, "grad_norm": 1.1730819940567017, "learning_rate": 9.860179092860776e-06, "loss": 1.0599, "step": 2870 }, { "epoch": 0.5162276364290209, "grad_norm": 1.520383358001709, "learning_rate": 9.860042287828762e-06, "loss": 0.8238, "step": 2871 }, { "epoch": 0.5164074440348827, "grad_norm": 1.657865047454834, "learning_rate": 9.859905416852353e-06, "loss": 0.8802, "step": 2872 }, { "epoch": 0.5165872516407444, "grad_norm": 1.128400444984436, "learning_rate": 9.859768479933402e-06, "loss": 1.0538, "step": 2873 }, { "epoch": 0.5167670592466062, "grad_norm": 1.4449200630187988, "learning_rate": 9.85963147707377e-06, "loss": 0.7879, "step": 2874 }, { "epoch": 0.5169468668524678, "grad_norm": 1.4525744915008545, "learning_rate": 9.859494408275316e-06, "loss": 0.7982, "step": 2875 }, { "epoch": 0.5171266744583296, "grad_norm": 1.437282919883728, "learning_rate": 9.859357273539898e-06, "loss": 0.8006, "step": 2876 }, { "epoch": 0.5173064820641913, "grad_norm": 1.4749571084976196, "learning_rate": 9.85922007286938e-06, "loss": 0.6865, "step": 2877 }, { "epoch": 0.5174862896700531, "grad_norm": 1.6283701658248901, "learning_rate": 9.85908280626562e-06, "loss": 0.8622, "step": 2878 }, { "epoch": 0.5176660972759147, "grad_norm": 1.4818755388259888, "learning_rate": 9.858945473730484e-06, "loss": 0.7949, "step": 2879 }, { "epoch": 0.5178459048817765, "grad_norm": 1.236769199371338, "learning_rate": 9.858808075265831e-06, "loss": 1.1026, "step": 2880 }, { "epoch": 0.5180257124876382, "grad_norm": 1.5856069326400757, "learning_rate": 9.858670610873528e-06, "loss": 0.8504, "step": 2881 }, { "epoch": 0.5182055200935, "grad_norm": 1.688082218170166, "learning_rate": 9.858533080555441e-06, "loss": 0.8734, "step": 2882 }, { "epoch": 0.5183853276993616, "grad_norm": 1.8921289443969727, "learning_rate": 9.858395484313436e-06, "loss": 0.8337, "step": 2883 }, { "epoch": 0.5185651353052234, "grad_norm": 1.047419786453247, "learning_rate": 9.858257822149377e-06, "loss": 1.078, "step": 2884 }, { "epoch": 0.5187449429110852, "grad_norm": 1.649593710899353, "learning_rate": 9.858120094065136e-06, "loss": 0.8013, "step": 2885 }, { "epoch": 0.5189247505169469, "grad_norm": 1.0559779405593872, "learning_rate": 9.857982300062579e-06, "loss": 1.0475, "step": 2886 }, { "epoch": 0.5191045581228086, "grad_norm": 1.4782353639602661, "learning_rate": 9.857844440143577e-06, "loss": 0.8131, "step": 2887 }, { "epoch": 0.5192843657286703, "grad_norm": 1.5970689058303833, "learning_rate": 9.85770651431e-06, "loss": 0.7601, "step": 2888 }, { "epoch": 0.5194641733345321, "grad_norm": 1.5625083446502686, "learning_rate": 9.857568522563718e-06, "loss": 0.9175, "step": 2889 }, { "epoch": 0.5196439809403938, "grad_norm": 1.5294345617294312, "learning_rate": 9.857430464906608e-06, "loss": 0.8192, "step": 2890 }, { "epoch": 0.5198237885462555, "grad_norm": 1.5975404977798462, "learning_rate": 9.857292341340538e-06, "loss": 0.832, "step": 2891 }, { "epoch": 0.5200035961521172, "grad_norm": 1.0977649688720703, "learning_rate": 9.857154151867385e-06, "loss": 1.0732, "step": 2892 }, { "epoch": 0.520183403757979, "grad_norm": 1.5357972383499146, "learning_rate": 9.857015896489022e-06, "loss": 0.8274, "step": 2893 }, { "epoch": 0.5203632113638407, "grad_norm": 1.4695994853973389, "learning_rate": 9.85687757520733e-06, "loss": 0.7991, "step": 2894 }, { "epoch": 0.5205430189697025, "grad_norm": 1.0838979482650757, "learning_rate": 9.856739188024179e-06, "loss": 1.0584, "step": 2895 }, { "epoch": 0.5207228265755641, "grad_norm": 1.4332561492919922, "learning_rate": 9.85660073494145e-06, "loss": 0.8375, "step": 2896 }, { "epoch": 0.5209026341814259, "grad_norm": 1.5555121898651123, "learning_rate": 9.856462215961022e-06, "loss": 0.8358, "step": 2897 }, { "epoch": 0.5210824417872876, "grad_norm": 1.5258874893188477, "learning_rate": 9.856323631084774e-06, "loss": 0.8462, "step": 2898 }, { "epoch": 0.5212622493931494, "grad_norm": 1.517541766166687, "learning_rate": 9.856184980314586e-06, "loss": 0.8059, "step": 2899 }, { "epoch": 0.521442056999011, "grad_norm": 1.6930592060089111, "learning_rate": 9.856046263652343e-06, "loss": 0.8582, "step": 2900 }, { "epoch": 0.5216218646048728, "grad_norm": 1.2000483274459839, "learning_rate": 9.85590748109992e-06, "loss": 1.0692, "step": 2901 }, { "epoch": 0.5218016722107345, "grad_norm": 1.5442404747009277, "learning_rate": 9.855768632659205e-06, "loss": 0.8252, "step": 2902 }, { "epoch": 0.5219814798165963, "grad_norm": 1.4672298431396484, "learning_rate": 9.855629718332083e-06, "loss": 0.8371, "step": 2903 }, { "epoch": 0.5221612874224579, "grad_norm": 1.557005763053894, "learning_rate": 9.855490738120436e-06, "loss": 0.8159, "step": 2904 }, { "epoch": 0.5223410950283197, "grad_norm": 1.4613902568817139, "learning_rate": 9.85535169202615e-06, "loss": 0.8279, "step": 2905 }, { "epoch": 0.5225209026341814, "grad_norm": 1.7315421104431152, "learning_rate": 9.855212580051113e-06, "loss": 0.8641, "step": 2906 }, { "epoch": 0.5227007102400432, "grad_norm": 1.6730095148086548, "learning_rate": 9.855073402197213e-06, "loss": 0.8199, "step": 2907 }, { "epoch": 0.5228805178459048, "grad_norm": 1.4578442573547363, "learning_rate": 9.854934158466336e-06, "loss": 0.8232, "step": 2908 }, { "epoch": 0.5230603254517666, "grad_norm": 1.73176908493042, "learning_rate": 9.854794848860373e-06, "loss": 0.7779, "step": 2909 }, { "epoch": 0.5232401330576283, "grad_norm": 1.522099256515503, "learning_rate": 9.854655473381214e-06, "loss": 0.8908, "step": 2910 }, { "epoch": 0.5234199406634901, "grad_norm": 1.463555932044983, "learning_rate": 9.854516032030752e-06, "loss": 0.8587, "step": 2911 }, { "epoch": 0.5235997482693517, "grad_norm": 1.4704633951187134, "learning_rate": 9.854376524810875e-06, "loss": 0.8069, "step": 2912 }, { "epoch": 0.5237795558752135, "grad_norm": 1.5300673246383667, "learning_rate": 9.854236951723478e-06, "loss": 0.7958, "step": 2913 }, { "epoch": 0.5239593634810753, "grad_norm": 1.125166416168213, "learning_rate": 9.854097312770456e-06, "loss": 1.08, "step": 2914 }, { "epoch": 0.524139171086937, "grad_norm": 1.5498199462890625, "learning_rate": 9.853957607953703e-06, "loss": 0.8192, "step": 2915 }, { "epoch": 0.5243189786927988, "grad_norm": 1.5139758586883545, "learning_rate": 9.853817837275114e-06, "loss": 0.8914, "step": 2916 }, { "epoch": 0.5244987862986604, "grad_norm": 1.5042951107025146, "learning_rate": 9.853678000736585e-06, "loss": 0.8254, "step": 2917 }, { "epoch": 0.5246785939045222, "grad_norm": 1.5047953128814697, "learning_rate": 9.853538098340016e-06, "loss": 0.8269, "step": 2918 }, { "epoch": 0.5248584015103839, "grad_norm": 1.522257685661316, "learning_rate": 9.853398130087302e-06, "loss": 0.8719, "step": 2919 }, { "epoch": 0.5250382091162457, "grad_norm": 1.6084506511688232, "learning_rate": 9.853258095980344e-06, "loss": 0.7993, "step": 2920 }, { "epoch": 0.5252180167221073, "grad_norm": 1.6174088716506958, "learning_rate": 9.853117996021042e-06, "loss": 0.8872, "step": 2921 }, { "epoch": 0.5253978243279691, "grad_norm": 1.1106122732162476, "learning_rate": 9.852977830211297e-06, "loss": 1.0914, "step": 2922 }, { "epoch": 0.5255776319338308, "grad_norm": 1.0264091491699219, "learning_rate": 9.85283759855301e-06, "loss": 1.0853, "step": 2923 }, { "epoch": 0.5257574395396926, "grad_norm": 1.5214885473251343, "learning_rate": 9.852697301048084e-06, "loss": 0.8465, "step": 2924 }, { "epoch": 0.5259372471455542, "grad_norm": 1.4518543481826782, "learning_rate": 9.852556937698423e-06, "loss": 0.8147, "step": 2925 }, { "epoch": 0.526117054751416, "grad_norm": 1.6266547441482544, "learning_rate": 9.852416508505933e-06, "loss": 0.8135, "step": 2926 }, { "epoch": 0.5262968623572777, "grad_norm": 1.6640483140945435, "learning_rate": 9.852276013472516e-06, "loss": 0.7981, "step": 2927 }, { "epoch": 0.5264766699631395, "grad_norm": 1.5620546340942383, "learning_rate": 9.852135452600083e-06, "loss": 0.9133, "step": 2928 }, { "epoch": 0.5266564775690011, "grad_norm": 1.2129878997802734, "learning_rate": 9.851994825890536e-06, "loss": 1.072, "step": 2929 }, { "epoch": 0.5268362851748629, "grad_norm": 1.6514391899108887, "learning_rate": 9.851854133345787e-06, "loss": 0.7956, "step": 2930 }, { "epoch": 0.5270160927807246, "grad_norm": 1.517155408859253, "learning_rate": 9.851713374967743e-06, "loss": 0.8548, "step": 2931 }, { "epoch": 0.5271959003865864, "grad_norm": 1.4086683988571167, "learning_rate": 9.851572550758316e-06, "loss": 0.808, "step": 2932 }, { "epoch": 0.527375707992448, "grad_norm": 1.4861196279525757, "learning_rate": 9.851431660719414e-06, "loss": 0.7919, "step": 2933 }, { "epoch": 0.5275555155983098, "grad_norm": 1.7344998121261597, "learning_rate": 9.851290704852952e-06, "loss": 0.7584, "step": 2934 }, { "epoch": 0.5277353232041715, "grad_norm": 1.5035953521728516, "learning_rate": 9.85114968316084e-06, "loss": 0.7611, "step": 2935 }, { "epoch": 0.5279151308100333, "grad_norm": 1.6596132516860962, "learning_rate": 9.851008595644991e-06, "loss": 0.7892, "step": 2936 }, { "epoch": 0.5280949384158949, "grad_norm": 1.5511903762817383, "learning_rate": 9.85086744230732e-06, "loss": 0.7799, "step": 2937 }, { "epoch": 0.5282747460217567, "grad_norm": 1.5494099855422974, "learning_rate": 9.850726223149744e-06, "loss": 0.8246, "step": 2938 }, { "epoch": 0.5284545536276184, "grad_norm": 1.1974117755889893, "learning_rate": 9.850584938174178e-06, "loss": 1.097, "step": 2939 }, { "epoch": 0.5286343612334802, "grad_norm": 1.7699000835418701, "learning_rate": 9.850443587382538e-06, "loss": 0.8263, "step": 2940 }, { "epoch": 0.528814168839342, "grad_norm": 1.5166035890579224, "learning_rate": 9.850302170776745e-06, "loss": 0.8532, "step": 2941 }, { "epoch": 0.5289939764452036, "grad_norm": 1.570872187614441, "learning_rate": 9.850160688358714e-06, "loss": 0.8116, "step": 2942 }, { "epoch": 0.5291737840510654, "grad_norm": 1.730493187904358, "learning_rate": 9.850019140130367e-06, "loss": 0.8668, "step": 2943 }, { "epoch": 0.5293535916569271, "grad_norm": 1.7648721933364868, "learning_rate": 9.849877526093625e-06, "loss": 0.8533, "step": 2944 }, { "epoch": 0.5295333992627889, "grad_norm": 1.5419971942901611, "learning_rate": 9.849735846250408e-06, "loss": 0.8376, "step": 2945 }, { "epoch": 0.5297132068686505, "grad_norm": 1.6167796850204468, "learning_rate": 9.84959410060264e-06, "loss": 0.8269, "step": 2946 }, { "epoch": 0.5298930144745123, "grad_norm": 1.55133056640625, "learning_rate": 9.849452289152242e-06, "loss": 0.8217, "step": 2947 }, { "epoch": 0.530072822080374, "grad_norm": 1.565618872642517, "learning_rate": 9.84931041190114e-06, "loss": 0.7892, "step": 2948 }, { "epoch": 0.5302526296862358, "grad_norm": 1.5463050603866577, "learning_rate": 9.849168468851257e-06, "loss": 0.8675, "step": 2949 }, { "epoch": 0.5304324372920974, "grad_norm": 1.503185510635376, "learning_rate": 9.849026460004523e-06, "loss": 0.8866, "step": 2950 }, { "epoch": 0.5306122448979592, "grad_norm": 1.4706584215164185, "learning_rate": 9.848884385362862e-06, "loss": 0.7882, "step": 2951 }, { "epoch": 0.5307920525038209, "grad_norm": 1.1215871572494507, "learning_rate": 9.848742244928202e-06, "loss": 1.0845, "step": 2952 }, { "epoch": 0.5309718601096827, "grad_norm": 1.066746473312378, "learning_rate": 9.848600038702473e-06, "loss": 1.0315, "step": 2953 }, { "epoch": 0.5311516677155443, "grad_norm": 2.1084835529327393, "learning_rate": 9.848457766687603e-06, "loss": 0.8243, "step": 2954 }, { "epoch": 0.5313314753214061, "grad_norm": 1.6712312698364258, "learning_rate": 9.848315428885522e-06, "loss": 0.8627, "step": 2955 }, { "epoch": 0.5315112829272678, "grad_norm": 1.1064273118972778, "learning_rate": 9.848173025298161e-06, "loss": 1.0508, "step": 2956 }, { "epoch": 0.5316910905331296, "grad_norm": 1.5828908681869507, "learning_rate": 9.848030555927457e-06, "loss": 0.8327, "step": 2957 }, { "epoch": 0.5318708981389912, "grad_norm": 1.461284875869751, "learning_rate": 9.847888020775338e-06, "loss": 0.8469, "step": 2958 }, { "epoch": 0.532050705744853, "grad_norm": 1.106265664100647, "learning_rate": 9.847745419843739e-06, "loss": 1.0949, "step": 2959 }, { "epoch": 0.5322305133507147, "grad_norm": 1.7573734521865845, "learning_rate": 9.847602753134597e-06, "loss": 0.8428, "step": 2960 }, { "epoch": 0.5324103209565765, "grad_norm": 1.5432183742523193, "learning_rate": 9.847460020649846e-06, "loss": 0.8394, "step": 2961 }, { "epoch": 0.5325901285624381, "grad_norm": 1.61116361618042, "learning_rate": 9.847317222391422e-06, "loss": 0.8373, "step": 2962 }, { "epoch": 0.5327699361682999, "grad_norm": 2.397510290145874, "learning_rate": 9.847174358361265e-06, "loss": 0.7915, "step": 2963 }, { "epoch": 0.5329497437741616, "grad_norm": 1.6126681566238403, "learning_rate": 9.847031428561311e-06, "loss": 0.8007, "step": 2964 }, { "epoch": 0.5331295513800234, "grad_norm": 1.3512828350067139, "learning_rate": 9.8468884329935e-06, "loss": 1.0896, "step": 2965 }, { "epoch": 0.533309358985885, "grad_norm": 1.6347622871398926, "learning_rate": 9.846745371659773e-06, "loss": 0.8541, "step": 2966 }, { "epoch": 0.5334891665917468, "grad_norm": 1.1090973615646362, "learning_rate": 9.846602244562072e-06, "loss": 1.0877, "step": 2967 }, { "epoch": 0.5336689741976086, "grad_norm": 1.462876319885254, "learning_rate": 9.846459051702338e-06, "loss": 0.8655, "step": 2968 }, { "epoch": 0.5338487818034703, "grad_norm": 1.028193712234497, "learning_rate": 9.846315793082512e-06, "loss": 1.1021, "step": 2969 }, { "epoch": 0.5340285894093321, "grad_norm": 1.6157736778259277, "learning_rate": 9.846172468704542e-06, "loss": 0.7986, "step": 2970 }, { "epoch": 0.5342083970151937, "grad_norm": 1.521740436553955, "learning_rate": 9.84602907857037e-06, "loss": 0.7496, "step": 2971 }, { "epoch": 0.5343882046210555, "grad_norm": 1.0997354984283447, "learning_rate": 9.845885622681942e-06, "loss": 1.0604, "step": 2972 }, { "epoch": 0.5345680122269172, "grad_norm": 1.484883189201355, "learning_rate": 9.845742101041203e-06, "loss": 0.8486, "step": 2973 }, { "epoch": 0.534747819832779, "grad_norm": 1.064785361289978, "learning_rate": 9.845598513650104e-06, "loss": 1.0648, "step": 2974 }, { "epoch": 0.5349276274386406, "grad_norm": 1.5408700704574585, "learning_rate": 9.84545486051059e-06, "loss": 0.8332, "step": 2975 }, { "epoch": 0.5351074350445024, "grad_norm": 1.5203317403793335, "learning_rate": 9.845311141624612e-06, "loss": 0.8085, "step": 2976 }, { "epoch": 0.5352872426503641, "grad_norm": 1.4898009300231934, "learning_rate": 9.84516735699412e-06, "loss": 0.8264, "step": 2977 }, { "epoch": 0.5354670502562259, "grad_norm": 1.1881103515625, "learning_rate": 9.845023506621062e-06, "loss": 1.0706, "step": 2978 }, { "epoch": 0.5356468578620875, "grad_norm": 1.42886483669281, "learning_rate": 9.844879590507395e-06, "loss": 0.8555, "step": 2979 }, { "epoch": 0.5358266654679493, "grad_norm": 1.5882699489593506, "learning_rate": 9.844735608655067e-06, "loss": 0.8769, "step": 2980 }, { "epoch": 0.536006473073811, "grad_norm": 1.6584361791610718, "learning_rate": 9.844591561066035e-06, "loss": 0.8368, "step": 2981 }, { "epoch": 0.5361862806796728, "grad_norm": 1.4362291097640991, "learning_rate": 9.844447447742253e-06, "loss": 0.804, "step": 2982 }, { "epoch": 0.5363660882855344, "grad_norm": 1.5066279172897339, "learning_rate": 9.844303268685674e-06, "loss": 0.802, "step": 2983 }, { "epoch": 0.5365458958913962, "grad_norm": 1.5862853527069092, "learning_rate": 9.844159023898256e-06, "loss": 0.8318, "step": 2984 }, { "epoch": 0.5367257034972579, "grad_norm": 1.4140815734863281, "learning_rate": 9.844014713381959e-06, "loss": 1.0598, "step": 2985 }, { "epoch": 0.5369055111031197, "grad_norm": 1.557546854019165, "learning_rate": 9.843870337138737e-06, "loss": 0.8382, "step": 2986 }, { "epoch": 0.5370853187089814, "grad_norm": 1.1728004217147827, "learning_rate": 9.843725895170548e-06, "loss": 1.0573, "step": 2987 }, { "epoch": 0.5372651263148431, "grad_norm": 1.5016533136367798, "learning_rate": 9.843581387479357e-06, "loss": 0.8133, "step": 2988 }, { "epoch": 0.5374449339207048, "grad_norm": 1.49600350856781, "learning_rate": 9.843436814067121e-06, "loss": 0.8299, "step": 2989 }, { "epoch": 0.5376247415265666, "grad_norm": 1.9016444683074951, "learning_rate": 9.843292174935803e-06, "loss": 0.8809, "step": 2990 }, { "epoch": 0.5378045491324283, "grad_norm": 1.5037803649902344, "learning_rate": 9.843147470087366e-06, "loss": 0.8208, "step": 2991 }, { "epoch": 0.53798435673829, "grad_norm": 1.563161849975586, "learning_rate": 9.843002699523771e-06, "loss": 0.7516, "step": 2992 }, { "epoch": 0.5381641643441517, "grad_norm": 1.6190036535263062, "learning_rate": 9.842857863246983e-06, "loss": 0.8652, "step": 2993 }, { "epoch": 0.5383439719500135, "grad_norm": 1.4916863441467285, "learning_rate": 9.842712961258972e-06, "loss": 0.8156, "step": 2994 }, { "epoch": 0.5385237795558752, "grad_norm": 1.453647255897522, "learning_rate": 9.842567993561698e-06, "loss": 0.8119, "step": 2995 }, { "epoch": 0.5387035871617369, "grad_norm": 1.459324598312378, "learning_rate": 9.842422960157133e-06, "loss": 0.9143, "step": 2996 }, { "epoch": 0.5388833947675987, "grad_norm": 1.455328106880188, "learning_rate": 9.842277861047239e-06, "loss": 0.7633, "step": 2997 }, { "epoch": 0.5390632023734604, "grad_norm": 1.441217303276062, "learning_rate": 9.842132696233989e-06, "loss": 0.8861, "step": 2998 }, { "epoch": 0.5392430099793222, "grad_norm": 1.7211816310882568, "learning_rate": 9.841987465719353e-06, "loss": 0.7586, "step": 2999 }, { "epoch": 0.5394228175851838, "grad_norm": 1.64736008644104, "learning_rate": 9.841842169505299e-06, "loss": 0.8688, "step": 3000 }, { "epoch": 0.5394228175851838, "eval_loss": 0.8502148389816284, "eval_runtime": 156.1642, "eval_samples_per_second": 92.095, "eval_steps_per_second": 1.441, "step": 3000 }, { "epoch": 0.5396026251910456, "grad_norm": 1.5359001159667969, "learning_rate": 9.8416968075938e-06, "loss": 0.8085, "step": 3001 }, { "epoch": 0.5397824327969073, "grad_norm": 1.488969087600708, "learning_rate": 9.841551379986829e-06, "loss": 0.8437, "step": 3002 }, { "epoch": 0.5399622404027691, "grad_norm": 1.6371054649353027, "learning_rate": 9.84140588668636e-06, "loss": 0.8008, "step": 3003 }, { "epoch": 0.5401420480086307, "grad_norm": 1.1420865058898926, "learning_rate": 9.841260327694364e-06, "loss": 1.0703, "step": 3004 }, { "epoch": 0.5403218556144925, "grad_norm": 1.6417540311813354, "learning_rate": 9.841114703012817e-06, "loss": 0.8422, "step": 3005 }, { "epoch": 0.5405016632203542, "grad_norm": 1.5901246070861816, "learning_rate": 9.840969012643698e-06, "loss": 0.8681, "step": 3006 }, { "epoch": 0.540681470826216, "grad_norm": 1.5428922176361084, "learning_rate": 9.840823256588979e-06, "loss": 0.8438, "step": 3007 }, { "epoch": 0.5408612784320777, "grad_norm": 1.3879555463790894, "learning_rate": 9.840677434850641e-06, "loss": 0.732, "step": 3008 }, { "epoch": 0.5410410860379394, "grad_norm": 1.6201778650283813, "learning_rate": 9.840531547430663e-06, "loss": 0.8579, "step": 3009 }, { "epoch": 0.5412208936438011, "grad_norm": 1.5032192468643188, "learning_rate": 9.840385594331022e-06, "loss": 0.7614, "step": 3010 }, { "epoch": 0.5414007012496629, "grad_norm": 1.5380648374557495, "learning_rate": 9.8402395755537e-06, "loss": 0.7895, "step": 3011 }, { "epoch": 0.5415805088555246, "grad_norm": 1.4384783506393433, "learning_rate": 9.84009349110068e-06, "loss": 0.8874, "step": 3012 }, { "epoch": 0.5417603164613863, "grad_norm": 1.0695886611938477, "learning_rate": 9.839947340973939e-06, "loss": 1.0562, "step": 3013 }, { "epoch": 0.541940124067248, "grad_norm": 1.4758803844451904, "learning_rate": 9.839801125175465e-06, "loss": 0.8575, "step": 3014 }, { "epoch": 0.5421199316731098, "grad_norm": 1.506447434425354, "learning_rate": 9.839654843707241e-06, "loss": 0.7632, "step": 3015 }, { "epoch": 0.5422997392789715, "grad_norm": 1.0675626993179321, "learning_rate": 9.839508496571249e-06, "loss": 1.0483, "step": 3016 }, { "epoch": 0.5424795468848332, "grad_norm": 1.0524253845214844, "learning_rate": 9.839362083769479e-06, "loss": 1.0446, "step": 3017 }, { "epoch": 0.5426593544906949, "grad_norm": 1.0921516418457031, "learning_rate": 9.839215605303913e-06, "loss": 1.0553, "step": 3018 }, { "epoch": 0.5428391620965567, "grad_norm": 1.539671540260315, "learning_rate": 9.839069061176544e-06, "loss": 0.8215, "step": 3019 }, { "epoch": 0.5430189697024184, "grad_norm": 1.5139814615249634, "learning_rate": 9.838922451389355e-06, "loss": 0.8191, "step": 3020 }, { "epoch": 0.5431987773082801, "grad_norm": 1.0876682996749878, "learning_rate": 9.838775775944336e-06, "loss": 1.0657, "step": 3021 }, { "epoch": 0.5433785849141418, "grad_norm": 1.0241920948028564, "learning_rate": 9.838629034843482e-06, "loss": 1.0754, "step": 3022 }, { "epoch": 0.5435583925200036, "grad_norm": 1.4852111339569092, "learning_rate": 9.838482228088781e-06, "loss": 0.8154, "step": 3023 }, { "epoch": 0.5437382001258654, "grad_norm": 1.1123698949813843, "learning_rate": 9.838335355682222e-06, "loss": 1.0816, "step": 3024 }, { "epoch": 0.543918007731727, "grad_norm": 1.4855645895004272, "learning_rate": 9.838188417625804e-06, "loss": 0.8166, "step": 3025 }, { "epoch": 0.5440978153375888, "grad_norm": 1.5281633138656616, "learning_rate": 9.838041413921517e-06, "loss": 0.7527, "step": 3026 }, { "epoch": 0.5442776229434505, "grad_norm": 1.513279914855957, "learning_rate": 9.837894344571354e-06, "loss": 0.7632, "step": 3027 }, { "epoch": 0.5444574305493123, "grad_norm": 1.4765527248382568, "learning_rate": 9.837747209577316e-06, "loss": 0.7714, "step": 3028 }, { "epoch": 0.544637238155174, "grad_norm": 1.0632929801940918, "learning_rate": 9.837600008941392e-06, "loss": 1.0553, "step": 3029 }, { "epoch": 0.5448170457610357, "grad_norm": 1.5490694046020508, "learning_rate": 9.837452742665587e-06, "loss": 0.8075, "step": 3030 }, { "epoch": 0.5449968533668974, "grad_norm": 1.503934383392334, "learning_rate": 9.837305410751894e-06, "loss": 0.8426, "step": 3031 }, { "epoch": 0.5451766609727592, "grad_norm": 1.0752276182174683, "learning_rate": 9.837158013202314e-06, "loss": 1.0215, "step": 3032 }, { "epoch": 0.5453564685786209, "grad_norm": 1.0944651365280151, "learning_rate": 9.837010550018847e-06, "loss": 1.0635, "step": 3033 }, { "epoch": 0.5455362761844826, "grad_norm": 1.5371551513671875, "learning_rate": 9.836863021203494e-06, "loss": 0.7879, "step": 3034 }, { "epoch": 0.5457160837903443, "grad_norm": 1.4517103433609009, "learning_rate": 9.836715426758256e-06, "loss": 0.8549, "step": 3035 }, { "epoch": 0.5458958913962061, "grad_norm": 1.5156195163726807, "learning_rate": 9.836567766685136e-06, "loss": 0.7686, "step": 3036 }, { "epoch": 0.5460756990020678, "grad_norm": 1.0669790506362915, "learning_rate": 9.836420040986138e-06, "loss": 1.07, "step": 3037 }, { "epoch": 0.5462555066079295, "grad_norm": 1.4552178382873535, "learning_rate": 9.836272249663266e-06, "loss": 0.7743, "step": 3038 }, { "epoch": 0.5464353142137912, "grad_norm": 1.5853277444839478, "learning_rate": 9.836124392718526e-06, "loss": 0.8615, "step": 3039 }, { "epoch": 0.546615121819653, "grad_norm": 1.5473493337631226, "learning_rate": 9.835976470153923e-06, "loss": 0.8559, "step": 3040 }, { "epoch": 0.5467949294255147, "grad_norm": 1.4274662733078003, "learning_rate": 9.835828481971464e-06, "loss": 0.7974, "step": 3041 }, { "epoch": 0.5469747370313764, "grad_norm": 1.4641629457473755, "learning_rate": 9.83568042817316e-06, "loss": 0.8028, "step": 3042 }, { "epoch": 0.5471545446372381, "grad_norm": 1.507948398590088, "learning_rate": 9.835532308761016e-06, "loss": 0.841, "step": 3043 }, { "epoch": 0.5473343522430999, "grad_norm": 1.5385000705718994, "learning_rate": 9.835384123737041e-06, "loss": 0.8771, "step": 3044 }, { "epoch": 0.5475141598489616, "grad_norm": 1.5250197649002075, "learning_rate": 9.835235873103252e-06, "loss": 0.8576, "step": 3045 }, { "epoch": 0.5476939674548233, "grad_norm": 1.3718595504760742, "learning_rate": 9.835087556861655e-06, "loss": 1.1216, "step": 3046 }, { "epoch": 0.547873775060685, "grad_norm": 1.3995808362960815, "learning_rate": 9.834939175014266e-06, "loss": 0.768, "step": 3047 }, { "epoch": 0.5480535826665468, "grad_norm": 1.4893323183059692, "learning_rate": 9.834790727563094e-06, "loss": 0.8523, "step": 3048 }, { "epoch": 0.5482333902724085, "grad_norm": 1.5375484228134155, "learning_rate": 9.834642214510158e-06, "loss": 0.9325, "step": 3049 }, { "epoch": 0.5484131978782703, "grad_norm": 1.069894552230835, "learning_rate": 9.834493635857469e-06, "loss": 1.1062, "step": 3050 }, { "epoch": 0.5485930054841319, "grad_norm": 1.482742190361023, "learning_rate": 9.834344991607045e-06, "loss": 0.8057, "step": 3051 }, { "epoch": 0.5487728130899937, "grad_norm": 1.14958918094635, "learning_rate": 9.834196281760904e-06, "loss": 1.0834, "step": 3052 }, { "epoch": 0.5489526206958555, "grad_norm": 1.4957658052444458, "learning_rate": 9.834047506321062e-06, "loss": 0.8196, "step": 3053 }, { "epoch": 0.5491324283017172, "grad_norm": 1.4548819065093994, "learning_rate": 9.833898665289538e-06, "loss": 0.8131, "step": 3054 }, { "epoch": 0.5493122359075789, "grad_norm": 1.5834230184555054, "learning_rate": 9.833749758668352e-06, "loss": 0.8452, "step": 3055 }, { "epoch": 0.5494920435134406, "grad_norm": 1.5644729137420654, "learning_rate": 9.833600786459524e-06, "loss": 0.8228, "step": 3056 }, { "epoch": 0.5496718511193024, "grad_norm": 1.5300261974334717, "learning_rate": 9.833451748665076e-06, "loss": 0.782, "step": 3057 }, { "epoch": 0.5498516587251641, "grad_norm": 1.5211788415908813, "learning_rate": 9.833302645287031e-06, "loss": 0.862, "step": 3058 }, { "epoch": 0.5500314663310258, "grad_norm": 1.51486337184906, "learning_rate": 9.833153476327408e-06, "loss": 0.801, "step": 3059 }, { "epoch": 0.5502112739368875, "grad_norm": 1.6615500450134277, "learning_rate": 9.833004241788238e-06, "loss": 0.8952, "step": 3060 }, { "epoch": 0.5503910815427493, "grad_norm": 1.1340378522872925, "learning_rate": 9.83285494167154e-06, "loss": 1.0372, "step": 3061 }, { "epoch": 0.550570889148611, "grad_norm": 1.4493610858917236, "learning_rate": 9.832705575979342e-06, "loss": 0.8287, "step": 3062 }, { "epoch": 0.5507506967544727, "grad_norm": 1.6076222658157349, "learning_rate": 9.832556144713669e-06, "loss": 0.7901, "step": 3063 }, { "epoch": 0.5509305043603344, "grad_norm": 1.5784770250320435, "learning_rate": 9.83240664787655e-06, "loss": 0.862, "step": 3064 }, { "epoch": 0.5511103119661962, "grad_norm": 1.486538290977478, "learning_rate": 9.832257085470017e-06, "loss": 0.8038, "step": 3065 }, { "epoch": 0.5512901195720579, "grad_norm": 1.7765896320343018, "learning_rate": 9.832107457496094e-06, "loss": 0.8097, "step": 3066 }, { "epoch": 0.5514699271779196, "grad_norm": 1.5251474380493164, "learning_rate": 9.831957763956814e-06, "loss": 0.8423, "step": 3067 }, { "epoch": 0.5516497347837813, "grad_norm": 1.5898367166519165, "learning_rate": 9.831808004854207e-06, "loss": 0.7793, "step": 3068 }, { "epoch": 0.5518295423896431, "grad_norm": 1.6231433153152466, "learning_rate": 9.831658180190303e-06, "loss": 0.8521, "step": 3069 }, { "epoch": 0.5520093499955048, "grad_norm": 1.4614773988723755, "learning_rate": 9.83150828996714e-06, "loss": 0.8861, "step": 3070 }, { "epoch": 0.5521891576013666, "grad_norm": 1.1962058544158936, "learning_rate": 9.831358334186748e-06, "loss": 1.0603, "step": 3071 }, { "epoch": 0.5523689652072282, "grad_norm": 1.4929777383804321, "learning_rate": 9.831208312851164e-06, "loss": 0.8122, "step": 3072 }, { "epoch": 0.55254877281309, "grad_norm": 1.48836350440979, "learning_rate": 9.83105822596242e-06, "loss": 0.8056, "step": 3073 }, { "epoch": 0.5527285804189517, "grad_norm": 1.5856813192367554, "learning_rate": 9.830908073522558e-06, "loss": 0.7948, "step": 3074 }, { "epoch": 0.5529083880248135, "grad_norm": 1.3775502443313599, "learning_rate": 9.830757855533609e-06, "loss": 0.787, "step": 3075 }, { "epoch": 0.5530881956306751, "grad_norm": 1.3946470022201538, "learning_rate": 9.830607571997617e-06, "loss": 0.7858, "step": 3076 }, { "epoch": 0.5532680032365369, "grad_norm": 1.7729212045669556, "learning_rate": 9.830457222916618e-06, "loss": 0.8056, "step": 3077 }, { "epoch": 0.5534478108423986, "grad_norm": 1.6487913131713867, "learning_rate": 9.830306808292651e-06, "loss": 0.8206, "step": 3078 }, { "epoch": 0.5536276184482604, "grad_norm": 1.6939804553985596, "learning_rate": 9.83015632812776e-06, "loss": 0.7706, "step": 3079 }, { "epoch": 0.5538074260541221, "grad_norm": 1.516866683959961, "learning_rate": 9.830005782423986e-06, "loss": 0.8234, "step": 3080 }, { "epoch": 0.5539872336599838, "grad_norm": 1.5982835292816162, "learning_rate": 9.82985517118337e-06, "loss": 0.7811, "step": 3081 }, { "epoch": 0.5541670412658456, "grad_norm": 1.1978392601013184, "learning_rate": 9.829704494407959e-06, "loss": 1.0338, "step": 3082 }, { "epoch": 0.5543468488717073, "grad_norm": 1.2166255712509155, "learning_rate": 9.829553752099795e-06, "loss": 1.0835, "step": 3083 }, { "epoch": 0.554526656477569, "grad_norm": 1.0156279802322388, "learning_rate": 9.82940294426092e-06, "loss": 1.066, "step": 3084 }, { "epoch": 0.5547064640834307, "grad_norm": 1.4544657468795776, "learning_rate": 9.829252070893388e-06, "loss": 0.7749, "step": 3085 }, { "epoch": 0.5548862716892925, "grad_norm": 1.5428496599197388, "learning_rate": 9.82910113199924e-06, "loss": 0.8003, "step": 3086 }, { "epoch": 0.5550660792951542, "grad_norm": 1.6966893672943115, "learning_rate": 9.828950127580526e-06, "loss": 0.8216, "step": 3087 }, { "epoch": 0.555245886901016, "grad_norm": 1.5140098333358765, "learning_rate": 9.828799057639295e-06, "loss": 0.8686, "step": 3088 }, { "epoch": 0.5554256945068776, "grad_norm": 1.184941291809082, "learning_rate": 9.828647922177597e-06, "loss": 1.053, "step": 3089 }, { "epoch": 0.5556055021127394, "grad_norm": 1.7391283512115479, "learning_rate": 9.828496721197482e-06, "loss": 0.8397, "step": 3090 }, { "epoch": 0.5557853097186011, "grad_norm": 1.530161738395691, "learning_rate": 9.828345454701003e-06, "loss": 0.8128, "step": 3091 }, { "epoch": 0.5559651173244629, "grad_norm": 1.630720615386963, "learning_rate": 9.828194122690212e-06, "loss": 0.8076, "step": 3092 }, { "epoch": 0.5561449249303245, "grad_norm": 1.5675830841064453, "learning_rate": 9.828042725167162e-06, "loss": 0.8079, "step": 3093 }, { "epoch": 0.5563247325361863, "grad_norm": 1.5428543090820312, "learning_rate": 9.827891262133907e-06, "loss": 0.8436, "step": 3094 }, { "epoch": 0.556504540142048, "grad_norm": 1.0387548208236694, "learning_rate": 9.827739733592502e-06, "loss": 1.095, "step": 3095 }, { "epoch": 0.5566843477479098, "grad_norm": 1.6785660982131958, "learning_rate": 9.827588139545003e-06, "loss": 0.8397, "step": 3096 }, { "epoch": 0.5568641553537714, "grad_norm": 1.532672643661499, "learning_rate": 9.827436479993468e-06, "loss": 0.8448, "step": 3097 }, { "epoch": 0.5570439629596332, "grad_norm": 1.5578651428222656, "learning_rate": 9.827284754939954e-06, "loss": 0.8821, "step": 3098 }, { "epoch": 0.5572237705654949, "grad_norm": 1.7641124725341797, "learning_rate": 9.827132964386522e-06, "loss": 0.7751, "step": 3099 }, { "epoch": 0.5574035781713567, "grad_norm": 1.591873288154602, "learning_rate": 9.826981108335227e-06, "loss": 0.8355, "step": 3100 }, { "epoch": 0.5575833857772183, "grad_norm": 1.628553867340088, "learning_rate": 9.826829186788132e-06, "loss": 0.8302, "step": 3101 }, { "epoch": 0.5577631933830801, "grad_norm": 1.4015893936157227, "learning_rate": 9.826677199747298e-06, "loss": 0.8137, "step": 3102 }, { "epoch": 0.5579430009889418, "grad_norm": 1.459652304649353, "learning_rate": 9.82652514721479e-06, "loss": 0.8146, "step": 3103 }, { "epoch": 0.5581228085948036, "grad_norm": 1.4228692054748535, "learning_rate": 9.826373029192668e-06, "loss": 0.8602, "step": 3104 }, { "epoch": 0.5583026162006652, "grad_norm": 1.5491697788238525, "learning_rate": 9.826220845682996e-06, "loss": 0.8513, "step": 3105 }, { "epoch": 0.558482423806527, "grad_norm": 1.429343342781067, "learning_rate": 9.826068596687841e-06, "loss": 0.8226, "step": 3106 }, { "epoch": 0.5586622314123888, "grad_norm": 1.6317003965377808, "learning_rate": 9.825916282209266e-06, "loss": 0.7902, "step": 3107 }, { "epoch": 0.5588420390182505, "grad_norm": 1.554262399673462, "learning_rate": 9.825763902249342e-06, "loss": 0.8318, "step": 3108 }, { "epoch": 0.5590218466241123, "grad_norm": 1.5677167177200317, "learning_rate": 9.825611456810132e-06, "loss": 0.8543, "step": 3109 }, { "epoch": 0.5592016542299739, "grad_norm": 1.6366698741912842, "learning_rate": 9.825458945893706e-06, "loss": 0.8645, "step": 3110 }, { "epoch": 0.5593814618358357, "grad_norm": 1.372869610786438, "learning_rate": 9.825306369502133e-06, "loss": 0.7986, "step": 3111 }, { "epoch": 0.5595612694416974, "grad_norm": 1.5617563724517822, "learning_rate": 9.825153727637487e-06, "loss": 0.7909, "step": 3112 }, { "epoch": 0.5597410770475592, "grad_norm": 1.5426318645477295, "learning_rate": 9.825001020301832e-06, "loss": 0.829, "step": 3113 }, { "epoch": 0.5599208846534208, "grad_norm": 1.555548906326294, "learning_rate": 9.824848247497248e-06, "loss": 0.8598, "step": 3114 }, { "epoch": 0.5601006922592826, "grad_norm": 1.4041861295700073, "learning_rate": 9.824695409225804e-06, "loss": 0.7968, "step": 3115 }, { "epoch": 0.5602804998651443, "grad_norm": 1.6353963613510132, "learning_rate": 9.824542505489572e-06, "loss": 0.8537, "step": 3116 }, { "epoch": 0.5604603074710061, "grad_norm": 1.5505659580230713, "learning_rate": 9.824389536290629e-06, "loss": 0.8349, "step": 3117 }, { "epoch": 0.5606401150768677, "grad_norm": 1.6403194665908813, "learning_rate": 9.82423650163105e-06, "loss": 0.8405, "step": 3118 }, { "epoch": 0.5608199226827295, "grad_norm": 1.516462802886963, "learning_rate": 9.824083401512914e-06, "loss": 0.8225, "step": 3119 }, { "epoch": 0.5609997302885912, "grad_norm": 1.6518354415893555, "learning_rate": 9.823930235938295e-06, "loss": 0.8411, "step": 3120 }, { "epoch": 0.561179537894453, "grad_norm": 1.1082338094711304, "learning_rate": 9.82377700490927e-06, "loss": 1.0718, "step": 3121 }, { "epoch": 0.5613593455003146, "grad_norm": 1.089983344078064, "learning_rate": 9.823623708427923e-06, "loss": 1.0654, "step": 3122 }, { "epoch": 0.5615391531061764, "grad_norm": 1.4570324420928955, "learning_rate": 9.823470346496332e-06, "loss": 0.8557, "step": 3123 }, { "epoch": 0.5617189607120381, "grad_norm": 1.507573127746582, "learning_rate": 9.823316919116574e-06, "loss": 0.8144, "step": 3124 }, { "epoch": 0.5618987683178999, "grad_norm": 1.5208872556686401, "learning_rate": 9.823163426290738e-06, "loss": 0.7565, "step": 3125 }, { "epoch": 0.5620785759237615, "grad_norm": 1.557015299797058, "learning_rate": 9.823009868020901e-06, "loss": 0.887, "step": 3126 }, { "epoch": 0.5622583835296233, "grad_norm": 1.6720424890518188, "learning_rate": 9.822856244309149e-06, "loss": 0.8571, "step": 3127 }, { "epoch": 0.562438191135485, "grad_norm": 1.538332223892212, "learning_rate": 9.822702555157566e-06, "loss": 0.7965, "step": 3128 }, { "epoch": 0.5626179987413468, "grad_norm": 1.4010000228881836, "learning_rate": 9.822548800568238e-06, "loss": 0.7955, "step": 3129 }, { "epoch": 0.5627978063472084, "grad_norm": 1.4737248420715332, "learning_rate": 9.82239498054325e-06, "loss": 0.7782, "step": 3130 }, { "epoch": 0.5629776139530702, "grad_norm": 1.4090917110443115, "learning_rate": 9.822241095084691e-06, "loss": 1.0548, "step": 3131 }, { "epoch": 0.5631574215589319, "grad_norm": 1.8582121133804321, "learning_rate": 9.822087144194645e-06, "loss": 0.8222, "step": 3132 }, { "epoch": 0.5633372291647937, "grad_norm": 1.6156011819839478, "learning_rate": 9.821933127875206e-06, "loss": 0.8606, "step": 3133 }, { "epoch": 0.5635170367706553, "grad_norm": 1.7695740461349487, "learning_rate": 9.821779046128461e-06, "loss": 0.8729, "step": 3134 }, { "epoch": 0.5636968443765171, "grad_norm": 1.4625895023345947, "learning_rate": 9.8216248989565e-06, "loss": 0.8048, "step": 3135 }, { "epoch": 0.5638766519823789, "grad_norm": 1.5219935178756714, "learning_rate": 9.821470686361418e-06, "loss": 0.8397, "step": 3136 }, { "epoch": 0.5640564595882406, "grad_norm": 1.4641802310943604, "learning_rate": 9.821316408345303e-06, "loss": 0.8227, "step": 3137 }, { "epoch": 0.5642362671941024, "grad_norm": 1.5394455194473267, "learning_rate": 9.821162064910252e-06, "loss": 0.804, "step": 3138 }, { "epoch": 0.564416074799964, "grad_norm": 1.4581811428070068, "learning_rate": 9.821007656058357e-06, "loss": 0.8214, "step": 3139 }, { "epoch": 0.5645958824058258, "grad_norm": 1.535071611404419, "learning_rate": 9.820853181791715e-06, "loss": 0.8153, "step": 3140 }, { "epoch": 0.5647756900116875, "grad_norm": 1.6444871425628662, "learning_rate": 9.82069864211242e-06, "loss": 0.836, "step": 3141 }, { "epoch": 0.5649554976175493, "grad_norm": 1.5387294292449951, "learning_rate": 9.820544037022569e-06, "loss": 0.817, "step": 3142 }, { "epoch": 0.5651353052234109, "grad_norm": 1.527838945388794, "learning_rate": 9.820389366524262e-06, "loss": 0.8091, "step": 3143 }, { "epoch": 0.5653151128292727, "grad_norm": 2.080247640609741, "learning_rate": 9.820234630619596e-06, "loss": 0.9305, "step": 3144 }, { "epoch": 0.5654949204351344, "grad_norm": 1.1030356884002686, "learning_rate": 9.820079829310672e-06, "loss": 1.0593, "step": 3145 }, { "epoch": 0.5656747280409962, "grad_norm": 1.3964165449142456, "learning_rate": 9.819924962599588e-06, "loss": 0.7928, "step": 3146 }, { "epoch": 0.5658545356468578, "grad_norm": 1.5281990766525269, "learning_rate": 9.819770030488446e-06, "loss": 0.8183, "step": 3147 }, { "epoch": 0.5660343432527196, "grad_norm": 1.5667917728424072, "learning_rate": 9.819615032979349e-06, "loss": 0.8836, "step": 3148 }, { "epoch": 0.5662141508585813, "grad_norm": 1.433397889137268, "learning_rate": 9.819459970074401e-06, "loss": 0.7589, "step": 3149 }, { "epoch": 0.5663939584644431, "grad_norm": 1.534466028213501, "learning_rate": 9.819304841775705e-06, "loss": 0.8537, "step": 3150 }, { "epoch": 0.5665737660703047, "grad_norm": 1.471846580505371, "learning_rate": 9.819149648085365e-06, "loss": 0.7436, "step": 3151 }, { "epoch": 0.5667535736761665, "grad_norm": 1.5448321104049683, "learning_rate": 9.818994389005489e-06, "loss": 0.8638, "step": 3152 }, { "epoch": 0.5669333812820282, "grad_norm": 1.5022252798080444, "learning_rate": 9.818839064538181e-06, "loss": 0.7984, "step": 3153 }, { "epoch": 0.56711318888789, "grad_norm": 1.559412956237793, "learning_rate": 9.81868367468555e-06, "loss": 0.9056, "step": 3154 }, { "epoch": 0.5672929964937516, "grad_norm": 1.4432408809661865, "learning_rate": 9.818528219449705e-06, "loss": 0.7759, "step": 3155 }, { "epoch": 0.5674728040996134, "grad_norm": 1.5043352842330933, "learning_rate": 9.818372698832755e-06, "loss": 0.8787, "step": 3156 }, { "epoch": 0.5676526117054751, "grad_norm": 1.470491647720337, "learning_rate": 9.818217112836808e-06, "loss": 0.8076, "step": 3157 }, { "epoch": 0.5678324193113369, "grad_norm": 2.0266568660736084, "learning_rate": 9.818061461463978e-06, "loss": 0.743, "step": 3158 }, { "epoch": 0.5680122269171985, "grad_norm": 1.2655277252197266, "learning_rate": 9.817905744716377e-06, "loss": 1.0439, "step": 3159 }, { "epoch": 0.5681920345230603, "grad_norm": 1.4844913482666016, "learning_rate": 9.817749962596115e-06, "loss": 0.8033, "step": 3160 }, { "epoch": 0.568371842128922, "grad_norm": 1.8765629529953003, "learning_rate": 9.817594115105309e-06, "loss": 0.8522, "step": 3161 }, { "epoch": 0.5685516497347838, "grad_norm": 1.4829819202423096, "learning_rate": 9.817438202246073e-06, "loss": 0.786, "step": 3162 }, { "epoch": 0.5687314573406456, "grad_norm": 1.550199270248413, "learning_rate": 9.817282224020518e-06, "loss": 0.8643, "step": 3163 }, { "epoch": 0.5689112649465072, "grad_norm": 1.0938630104064941, "learning_rate": 9.817126180430766e-06, "loss": 1.0402, "step": 3164 }, { "epoch": 0.569091072552369, "grad_norm": 1.494110345840454, "learning_rate": 9.816970071478936e-06, "loss": 0.8793, "step": 3165 }, { "epoch": 0.5692708801582307, "grad_norm": 1.4775644540786743, "learning_rate": 9.816813897167138e-06, "loss": 0.7794, "step": 3166 }, { "epoch": 0.5694506877640925, "grad_norm": 1.5725728273391724, "learning_rate": 9.816657657497497e-06, "loss": 0.8843, "step": 3167 }, { "epoch": 0.5696304953699541, "grad_norm": 1.4727985858917236, "learning_rate": 9.816501352472132e-06, "loss": 0.8664, "step": 3168 }, { "epoch": 0.5698103029758159, "grad_norm": 1.495704174041748, "learning_rate": 9.816344982093164e-06, "loss": 0.7373, "step": 3169 }, { "epoch": 0.5699901105816776, "grad_norm": 1.1810884475708008, "learning_rate": 9.816188546362714e-06, "loss": 1.1293, "step": 3170 }, { "epoch": 0.5701699181875394, "grad_norm": 1.5145182609558105, "learning_rate": 9.816032045282905e-06, "loss": 0.8312, "step": 3171 }, { "epoch": 0.570349725793401, "grad_norm": 1.5014539957046509, "learning_rate": 9.81587547885586e-06, "loss": 0.8093, "step": 3172 }, { "epoch": 0.5705295333992628, "grad_norm": 1.5346250534057617, "learning_rate": 9.815718847083704e-06, "loss": 0.8377, "step": 3173 }, { "epoch": 0.5707093410051245, "grad_norm": 1.5661073923110962, "learning_rate": 9.815562149968563e-06, "loss": 0.7878, "step": 3174 }, { "epoch": 0.5708891486109863, "grad_norm": 1.4284381866455078, "learning_rate": 9.81540538751256e-06, "loss": 0.7558, "step": 3175 }, { "epoch": 0.571068956216848, "grad_norm": 1.4551022052764893, "learning_rate": 9.815248559717827e-06, "loss": 0.824, "step": 3176 }, { "epoch": 0.5712487638227097, "grad_norm": 1.2139379978179932, "learning_rate": 9.815091666586487e-06, "loss": 1.031, "step": 3177 }, { "epoch": 0.5714285714285714, "grad_norm": 1.0383492708206177, "learning_rate": 9.814934708120673e-06, "loss": 1.0468, "step": 3178 }, { "epoch": 0.5716083790344332, "grad_norm": 1.6009517908096313, "learning_rate": 9.814777684322512e-06, "loss": 0.8327, "step": 3179 }, { "epoch": 0.5717881866402948, "grad_norm": 1.6127575635910034, "learning_rate": 9.814620595194135e-06, "loss": 0.8362, "step": 3180 }, { "epoch": 0.5719679942461566, "grad_norm": 1.452379822731018, "learning_rate": 9.814463440737674e-06, "loss": 0.8204, "step": 3181 }, { "epoch": 0.5721478018520183, "grad_norm": 1.5719444751739502, "learning_rate": 9.814306220955263e-06, "loss": 0.8043, "step": 3182 }, { "epoch": 0.5723276094578801, "grad_norm": 1.6674351692199707, "learning_rate": 9.814148935849032e-06, "loss": 0.862, "step": 3183 }, { "epoch": 0.5725074170637418, "grad_norm": 1.519735336303711, "learning_rate": 9.813991585421118e-06, "loss": 0.8375, "step": 3184 }, { "epoch": 0.5726872246696035, "grad_norm": 1.4645483493804932, "learning_rate": 9.813834169673654e-06, "loss": 0.8384, "step": 3185 }, { "epoch": 0.5728670322754652, "grad_norm": 1.5709505081176758, "learning_rate": 9.813676688608777e-06, "loss": 0.7962, "step": 3186 }, { "epoch": 0.573046839881327, "grad_norm": 1.5576730966567993, "learning_rate": 9.813519142228623e-06, "loss": 0.8438, "step": 3187 }, { "epoch": 0.5732266474871887, "grad_norm": 1.5211129188537598, "learning_rate": 9.81336153053533e-06, "loss": 0.8124, "step": 3188 }, { "epoch": 0.5734064550930504, "grad_norm": 1.6278390884399414, "learning_rate": 9.813203853531038e-06, "loss": 0.8321, "step": 3189 }, { "epoch": 0.5735862626989122, "grad_norm": 1.537804126739502, "learning_rate": 9.813046111217886e-06, "loss": 0.8095, "step": 3190 }, { "epoch": 0.5737660703047739, "grad_norm": 1.495128870010376, "learning_rate": 9.812888303598012e-06, "loss": 0.7902, "step": 3191 }, { "epoch": 0.5739458779106357, "grad_norm": 1.4917432069778442, "learning_rate": 9.812730430673559e-06, "loss": 0.8468, "step": 3192 }, { "epoch": 0.5741256855164973, "grad_norm": 1.4217278957366943, "learning_rate": 9.812572492446668e-06, "loss": 0.8132, "step": 3193 }, { "epoch": 0.5743054931223591, "grad_norm": 1.7588492631912231, "learning_rate": 9.812414488919485e-06, "loss": 0.8148, "step": 3194 }, { "epoch": 0.5744853007282208, "grad_norm": 1.1905556917190552, "learning_rate": 9.812256420094151e-06, "loss": 1.0804, "step": 3195 }, { "epoch": 0.5746651083340826, "grad_norm": 2.1330995559692383, "learning_rate": 9.812098285972812e-06, "loss": 0.855, "step": 3196 }, { "epoch": 0.5748449159399442, "grad_norm": 1.195853590965271, "learning_rate": 9.811940086557614e-06, "loss": 1.0506, "step": 3197 }, { "epoch": 0.575024723545806, "grad_norm": 1.5511068105697632, "learning_rate": 9.811781821850701e-06, "loss": 0.8806, "step": 3198 }, { "epoch": 0.5752045311516677, "grad_norm": 2.0003879070281982, "learning_rate": 9.811623491854225e-06, "loss": 0.7959, "step": 3199 }, { "epoch": 0.5753843387575295, "grad_norm": 1.5545151233673096, "learning_rate": 9.81146509657033e-06, "loss": 0.7154, "step": 3200 }, { "epoch": 0.5755641463633911, "grad_norm": 1.4775266647338867, "learning_rate": 9.811306636001168e-06, "loss": 0.8402, "step": 3201 }, { "epoch": 0.5757439539692529, "grad_norm": 1.544385552406311, "learning_rate": 9.811148110148887e-06, "loss": 0.8263, "step": 3202 }, { "epoch": 0.5759237615751146, "grad_norm": 1.2587311267852783, "learning_rate": 9.810989519015638e-06, "loss": 1.0845, "step": 3203 }, { "epoch": 0.5761035691809764, "grad_norm": 1.5698603391647339, "learning_rate": 9.810830862603576e-06, "loss": 0.7861, "step": 3204 }, { "epoch": 0.576283376786838, "grad_norm": 1.2640399932861328, "learning_rate": 9.810672140914852e-06, "loss": 1.0194, "step": 3205 }, { "epoch": 0.5764631843926998, "grad_norm": 1.4168354272842407, "learning_rate": 9.810513353951617e-06, "loss": 0.7601, "step": 3206 }, { "epoch": 0.5766429919985615, "grad_norm": 1.5145752429962158, "learning_rate": 9.81035450171603e-06, "loss": 0.8525, "step": 3207 }, { "epoch": 0.5768227996044233, "grad_norm": 1.0275648832321167, "learning_rate": 9.810195584210243e-06, "loss": 1.0304, "step": 3208 }, { "epoch": 0.577002607210285, "grad_norm": 1.5032380819320679, "learning_rate": 9.810036601436414e-06, "loss": 0.8332, "step": 3209 }, { "epoch": 0.5771824148161467, "grad_norm": 1.1117286682128906, "learning_rate": 9.809877553396699e-06, "loss": 1.0894, "step": 3210 }, { "epoch": 0.5773622224220084, "grad_norm": 1.4667688608169556, "learning_rate": 9.809718440093257e-06, "loss": 0.8344, "step": 3211 }, { "epoch": 0.5775420300278702, "grad_norm": 1.528243064880371, "learning_rate": 9.809559261528247e-06, "loss": 0.8448, "step": 3212 }, { "epoch": 0.5777218376337319, "grad_norm": 1.091701865196228, "learning_rate": 9.80940001770383e-06, "loss": 1.078, "step": 3213 }, { "epoch": 0.5779016452395936, "grad_norm": 1.494596242904663, "learning_rate": 9.809240708622163e-06, "loss": 0.8004, "step": 3214 }, { "epoch": 0.5780814528454553, "grad_norm": 1.4706432819366455, "learning_rate": 9.809081334285414e-06, "loss": 0.8163, "step": 3215 }, { "epoch": 0.5782612604513171, "grad_norm": 1.411158561706543, "learning_rate": 9.808921894695738e-06, "loss": 0.7845, "step": 3216 }, { "epoch": 0.5784410680571788, "grad_norm": 1.4291174411773682, "learning_rate": 9.808762389855302e-06, "loss": 0.8543, "step": 3217 }, { "epoch": 0.5786208756630405, "grad_norm": 1.4708938598632812, "learning_rate": 9.80860281976627e-06, "loss": 0.8458, "step": 3218 }, { "epoch": 0.5788006832689023, "grad_norm": 1.449688196182251, "learning_rate": 9.808443184430808e-06, "loss": 0.8046, "step": 3219 }, { "epoch": 0.578980490874764, "grad_norm": 1.4977099895477295, "learning_rate": 9.808283483851082e-06, "loss": 0.8135, "step": 3220 }, { "epoch": 0.5791602984806258, "grad_norm": 1.545802116394043, "learning_rate": 9.808123718029257e-06, "loss": 0.8703, "step": 3221 }, { "epoch": 0.5793401060864874, "grad_norm": 1.616217017173767, "learning_rate": 9.807963886967502e-06, "loss": 0.7874, "step": 3222 }, { "epoch": 0.5795199136923492, "grad_norm": 1.146705985069275, "learning_rate": 9.807803990667986e-06, "loss": 1.0592, "step": 3223 }, { "epoch": 0.5796997212982109, "grad_norm": 1.3547940254211426, "learning_rate": 9.80764402913288e-06, "loss": 0.7688, "step": 3224 }, { "epoch": 0.5798795289040727, "grad_norm": 1.0525094270706177, "learning_rate": 9.807484002364352e-06, "loss": 1.0279, "step": 3225 }, { "epoch": 0.5800593365099344, "grad_norm": 2.5813519954681396, "learning_rate": 9.807323910364572e-06, "loss": 0.8665, "step": 3226 }, { "epoch": 0.5802391441157961, "grad_norm": 1.7207649946212769, "learning_rate": 9.807163753135715e-06, "loss": 0.797, "step": 3227 }, { "epoch": 0.5804189517216578, "grad_norm": 1.4461437463760376, "learning_rate": 9.807003530679956e-06, "loss": 0.811, "step": 3228 }, { "epoch": 0.5805987593275196, "grad_norm": 1.1363089084625244, "learning_rate": 9.806843242999465e-06, "loss": 1.057, "step": 3229 }, { "epoch": 0.5807785669333813, "grad_norm": 1.5251433849334717, "learning_rate": 9.806682890096419e-06, "loss": 0.7815, "step": 3230 }, { "epoch": 0.580958374539243, "grad_norm": 1.4130932092666626, "learning_rate": 9.806522471972993e-06, "loss": 0.7981, "step": 3231 }, { "epoch": 0.5811381821451047, "grad_norm": 1.761950135231018, "learning_rate": 9.806361988631364e-06, "loss": 0.7911, "step": 3232 }, { "epoch": 0.5813179897509665, "grad_norm": 1.4532842636108398, "learning_rate": 9.806201440073708e-06, "loss": 0.8065, "step": 3233 }, { "epoch": 0.5814977973568282, "grad_norm": 1.3654026985168457, "learning_rate": 9.806040826302206e-06, "loss": 0.8721, "step": 3234 }, { "epoch": 0.5816776049626899, "grad_norm": 1.4709317684173584, "learning_rate": 9.805880147319035e-06, "loss": 0.7961, "step": 3235 }, { "epoch": 0.5818574125685516, "grad_norm": 1.483281135559082, "learning_rate": 9.805719403126378e-06, "loss": 0.7609, "step": 3236 }, { "epoch": 0.5820372201744134, "grad_norm": 1.081370234489441, "learning_rate": 9.805558593726414e-06, "loss": 1.0146, "step": 3237 }, { "epoch": 0.5822170277802751, "grad_norm": 1.0022914409637451, "learning_rate": 9.805397719121326e-06, "loss": 1.0646, "step": 3238 }, { "epoch": 0.5823968353861368, "grad_norm": 1.7615394592285156, "learning_rate": 9.805236779313294e-06, "loss": 0.787, "step": 3239 }, { "epoch": 0.5825766429919985, "grad_norm": 1.0162765979766846, "learning_rate": 9.805075774304507e-06, "loss": 1.048, "step": 3240 }, { "epoch": 0.5827564505978603, "grad_norm": 1.534578561782837, "learning_rate": 9.804914704097144e-06, "loss": 0.8474, "step": 3241 }, { "epoch": 0.582936258203722, "grad_norm": 0.9953423142433167, "learning_rate": 9.804753568693395e-06, "loss": 1.064, "step": 3242 }, { "epoch": 0.5831160658095838, "grad_norm": 1.6132168769836426, "learning_rate": 9.804592368095444e-06, "loss": 0.8081, "step": 3243 }, { "epoch": 0.5832958734154454, "grad_norm": 1.501449465751648, "learning_rate": 9.804431102305478e-06, "loss": 0.8142, "step": 3244 }, { "epoch": 0.5834756810213072, "grad_norm": 1.12730872631073, "learning_rate": 9.804269771325687e-06, "loss": 1.0803, "step": 3245 }, { "epoch": 0.583655488627169, "grad_norm": 1.218518853187561, "learning_rate": 9.804108375158258e-06, "loss": 1.0787, "step": 3246 }, { "epoch": 0.5838352962330307, "grad_norm": 1.0089482069015503, "learning_rate": 9.803946913805385e-06, "loss": 1.0397, "step": 3247 }, { "epoch": 0.5840151038388924, "grad_norm": 1.5348833799362183, "learning_rate": 9.803785387269254e-06, "loss": 0.7811, "step": 3248 }, { "epoch": 0.5841949114447541, "grad_norm": 1.5606862306594849, "learning_rate": 9.803623795552057e-06, "loss": 0.7995, "step": 3249 }, { "epoch": 0.5843747190506159, "grad_norm": 1.7130401134490967, "learning_rate": 9.80346213865599e-06, "loss": 0.7843, "step": 3250 }, { "epoch": 0.5845545266564776, "grad_norm": 1.5576673746109009, "learning_rate": 9.803300416583243e-06, "loss": 0.834, "step": 3251 }, { "epoch": 0.5847343342623393, "grad_norm": 1.1392661333084106, "learning_rate": 9.803138629336013e-06, "loss": 1.0605, "step": 3252 }, { "epoch": 0.584914141868201, "grad_norm": 1.516062617301941, "learning_rate": 9.802976776916493e-06, "loss": 0.8195, "step": 3253 }, { "epoch": 0.5850939494740628, "grad_norm": 1.4482028484344482, "learning_rate": 9.802814859326882e-06, "loss": 0.8113, "step": 3254 }, { "epoch": 0.5852737570799245, "grad_norm": 1.5802934169769287, "learning_rate": 9.802652876569375e-06, "loss": 0.801, "step": 3255 }, { "epoch": 0.5854535646857862, "grad_norm": 1.070637822151184, "learning_rate": 9.80249082864617e-06, "loss": 1.0684, "step": 3256 }, { "epoch": 0.5856333722916479, "grad_norm": 1.6336556673049927, "learning_rate": 9.802328715559465e-06, "loss": 0.9058, "step": 3257 }, { "epoch": 0.5858131798975097, "grad_norm": 1.5251275300979614, "learning_rate": 9.802166537311462e-06, "loss": 0.865, "step": 3258 }, { "epoch": 0.5859929875033714, "grad_norm": 1.5004208087921143, "learning_rate": 9.802004293904359e-06, "loss": 0.8494, "step": 3259 }, { "epoch": 0.5861727951092331, "grad_norm": 1.4789659976959229, "learning_rate": 9.80184198534036e-06, "loss": 0.7589, "step": 3260 }, { "epoch": 0.5863526027150948, "grad_norm": 1.1235870122909546, "learning_rate": 9.801679611621667e-06, "loss": 1.0734, "step": 3261 }, { "epoch": 0.5865324103209566, "grad_norm": 1.4375584125518799, "learning_rate": 9.801517172750478e-06, "loss": 0.7745, "step": 3262 }, { "epoch": 0.5867122179268183, "grad_norm": 1.092891812324524, "learning_rate": 9.801354668729003e-06, "loss": 1.0495, "step": 3263 }, { "epoch": 0.58689202553268, "grad_norm": 1.5216209888458252, "learning_rate": 9.801192099559446e-06, "loss": 0.7993, "step": 3264 }, { "epoch": 0.5870718331385417, "grad_norm": 1.601683259010315, "learning_rate": 9.801029465244013e-06, "loss": 0.8888, "step": 3265 }, { "epoch": 0.5872516407444035, "grad_norm": 1.160072922706604, "learning_rate": 9.800866765784908e-06, "loss": 1.0607, "step": 3266 }, { "epoch": 0.5874314483502652, "grad_norm": 1.4915870428085327, "learning_rate": 9.80070400118434e-06, "loss": 0.8074, "step": 3267 }, { "epoch": 0.587611255956127, "grad_norm": 1.5728096961975098, "learning_rate": 9.80054117144452e-06, "loss": 0.8478, "step": 3268 }, { "epoch": 0.5877910635619886, "grad_norm": 1.4958136081695557, "learning_rate": 9.800378276567653e-06, "loss": 0.7886, "step": 3269 }, { "epoch": 0.5879708711678504, "grad_norm": 1.037546157836914, "learning_rate": 9.800215316555952e-06, "loss": 1.0312, "step": 3270 }, { "epoch": 0.5881506787737121, "grad_norm": 1.5071032047271729, "learning_rate": 9.80005229141163e-06, "loss": 0.82, "step": 3271 }, { "epoch": 0.5883304863795739, "grad_norm": 1.6891374588012695, "learning_rate": 9.799889201136893e-06, "loss": 0.8752, "step": 3272 }, { "epoch": 0.5885102939854355, "grad_norm": 1.1184735298156738, "learning_rate": 9.799726045733962e-06, "loss": 1.0022, "step": 3273 }, { "epoch": 0.5886901015912973, "grad_norm": 1.5105068683624268, "learning_rate": 9.799562825205043e-06, "loss": 0.8042, "step": 3274 }, { "epoch": 0.5888699091971591, "grad_norm": 1.2315144538879395, "learning_rate": 9.799399539552356e-06, "loss": 1.0499, "step": 3275 }, { "epoch": 0.5890497168030208, "grad_norm": 1.556138277053833, "learning_rate": 9.799236188778114e-06, "loss": 0.7946, "step": 3276 }, { "epoch": 0.5892295244088825, "grad_norm": 1.422428846359253, "learning_rate": 9.799072772884534e-06, "loss": 0.7898, "step": 3277 }, { "epoch": 0.5894093320147442, "grad_norm": 1.623464584350586, "learning_rate": 9.798909291873833e-06, "loss": 0.8633, "step": 3278 }, { "epoch": 0.589589139620606, "grad_norm": 1.5892428159713745, "learning_rate": 9.79874574574823e-06, "loss": 0.7994, "step": 3279 }, { "epoch": 0.5897689472264677, "grad_norm": 1.4535413980484009, "learning_rate": 9.798582134509944e-06, "loss": 0.8437, "step": 3280 }, { "epoch": 0.5899487548323294, "grad_norm": 1.531145691871643, "learning_rate": 9.798418458161197e-06, "loss": 0.8285, "step": 3281 }, { "epoch": 0.5901285624381911, "grad_norm": 1.480149507522583, "learning_rate": 9.798254716704206e-06, "loss": 0.8908, "step": 3282 }, { "epoch": 0.5903083700440529, "grad_norm": 1.5496126413345337, "learning_rate": 9.798090910141192e-06, "loss": 0.7923, "step": 3283 }, { "epoch": 0.5904881776499146, "grad_norm": 1.4960272312164307, "learning_rate": 9.797927038474383e-06, "loss": 0.7202, "step": 3284 }, { "epoch": 0.5906679852557764, "grad_norm": 1.487824559211731, "learning_rate": 9.797763101705999e-06, "loss": 0.8933, "step": 3285 }, { "epoch": 0.590847792861638, "grad_norm": 1.7487937211990356, "learning_rate": 9.797599099838264e-06, "loss": 0.8261, "step": 3286 }, { "epoch": 0.5910276004674998, "grad_norm": 1.5932027101516724, "learning_rate": 9.797435032873406e-06, "loss": 0.8073, "step": 3287 }, { "epoch": 0.5912074080733615, "grad_norm": 1.0775760412216187, "learning_rate": 9.797270900813649e-06, "loss": 1.0631, "step": 3288 }, { "epoch": 0.5913872156792233, "grad_norm": 1.5464116334915161, "learning_rate": 9.797106703661221e-06, "loss": 0.7842, "step": 3289 }, { "epoch": 0.5915670232850849, "grad_norm": 1.6212879419326782, "learning_rate": 9.796942441418348e-06, "loss": 0.8614, "step": 3290 }, { "epoch": 0.5917468308909467, "grad_norm": 1.4677863121032715, "learning_rate": 9.796778114087261e-06, "loss": 0.7764, "step": 3291 }, { "epoch": 0.5919266384968084, "grad_norm": 1.4922081232070923, "learning_rate": 9.79661372167019e-06, "loss": 0.799, "step": 3292 }, { "epoch": 0.5921064461026702, "grad_norm": 2.1835455894470215, "learning_rate": 9.796449264169363e-06, "loss": 0.8329, "step": 3293 }, { "epoch": 0.5922862537085318, "grad_norm": 1.4169793128967285, "learning_rate": 9.796284741587014e-06, "loss": 0.839, "step": 3294 }, { "epoch": 0.5924660613143936, "grad_norm": 1.5434385538101196, "learning_rate": 9.796120153925374e-06, "loss": 0.7548, "step": 3295 }, { "epoch": 0.5926458689202553, "grad_norm": 1.086956262588501, "learning_rate": 9.795955501186677e-06, "loss": 1.0524, "step": 3296 }, { "epoch": 0.5928256765261171, "grad_norm": 1.6051973104476929, "learning_rate": 9.795790783373157e-06, "loss": 0.8152, "step": 3297 }, { "epoch": 0.5930054841319787, "grad_norm": 1.4670206308364868, "learning_rate": 9.795626000487048e-06, "loss": 0.7793, "step": 3298 }, { "epoch": 0.5931852917378405, "grad_norm": 1.663385272026062, "learning_rate": 9.795461152530588e-06, "loss": 0.8512, "step": 3299 }, { "epoch": 0.5933650993437022, "grad_norm": 1.688636302947998, "learning_rate": 9.795296239506011e-06, "loss": 0.9445, "step": 3300 }, { "epoch": 0.593544906949564, "grad_norm": 1.5646578073501587, "learning_rate": 9.795131261415557e-06, "loss": 0.7389, "step": 3301 }, { "epoch": 0.5937247145554257, "grad_norm": 1.4920532703399658, "learning_rate": 9.794966218261463e-06, "loss": 0.8127, "step": 3302 }, { "epoch": 0.5939045221612874, "grad_norm": 1.4845365285873413, "learning_rate": 9.79480111004597e-06, "loss": 0.8212, "step": 3303 }, { "epoch": 0.5940843297671492, "grad_norm": 2.880014181137085, "learning_rate": 9.794635936771318e-06, "loss": 0.8216, "step": 3304 }, { "epoch": 0.5942641373730109, "grad_norm": 1.4158941507339478, "learning_rate": 9.794470698439745e-06, "loss": 0.7754, "step": 3305 }, { "epoch": 0.5944439449788727, "grad_norm": 1.5470995903015137, "learning_rate": 9.794305395053498e-06, "loss": 0.7434, "step": 3306 }, { "epoch": 0.5946237525847343, "grad_norm": 1.4507981538772583, "learning_rate": 9.794140026614816e-06, "loss": 0.8446, "step": 3307 }, { "epoch": 0.5948035601905961, "grad_norm": 1.3720799684524536, "learning_rate": 9.793974593125946e-06, "loss": 0.7543, "step": 3308 }, { "epoch": 0.5949833677964578, "grad_norm": 1.57358980178833, "learning_rate": 9.79380909458913e-06, "loss": 0.7486, "step": 3309 }, { "epoch": 0.5951631754023196, "grad_norm": 1.4648728370666504, "learning_rate": 9.793643531006613e-06, "loss": 0.831, "step": 3310 }, { "epoch": 0.5953429830081812, "grad_norm": 1.2271358966827393, "learning_rate": 9.793477902380646e-06, "loss": 1.0499, "step": 3311 }, { "epoch": 0.595522790614043, "grad_norm": 1.8438969850540161, "learning_rate": 9.793312208713473e-06, "loss": 0.8822, "step": 3312 }, { "epoch": 0.5957025982199047, "grad_norm": 1.5089046955108643, "learning_rate": 9.793146450007343e-06, "loss": 0.8908, "step": 3313 }, { "epoch": 0.5958824058257665, "grad_norm": 1.5914708375930786, "learning_rate": 9.792980626264504e-06, "loss": 0.7807, "step": 3314 }, { "epoch": 0.5960622134316281, "grad_norm": 1.510814905166626, "learning_rate": 9.792814737487207e-06, "loss": 0.8258, "step": 3315 }, { "epoch": 0.5962420210374899, "grad_norm": 1.4790410995483398, "learning_rate": 9.792648783677703e-06, "loss": 0.8574, "step": 3316 }, { "epoch": 0.5964218286433516, "grad_norm": 1.734568476676941, "learning_rate": 9.792482764838245e-06, "loss": 0.8756, "step": 3317 }, { "epoch": 0.5966016362492134, "grad_norm": 1.5417505502700806, "learning_rate": 9.792316680971082e-06, "loss": 0.8385, "step": 3318 }, { "epoch": 0.596781443855075, "grad_norm": 1.5601292848587036, "learning_rate": 9.79215053207847e-06, "loss": 0.8033, "step": 3319 }, { "epoch": 0.5969612514609368, "grad_norm": 1.48030686378479, "learning_rate": 9.791984318162665e-06, "loss": 0.8156, "step": 3320 }, { "epoch": 0.5971410590667985, "grad_norm": 1.3943098783493042, "learning_rate": 9.79181803922592e-06, "loss": 0.8097, "step": 3321 }, { "epoch": 0.5973208666726603, "grad_norm": 1.4191292524337769, "learning_rate": 9.791651695270492e-06, "loss": 0.8284, "step": 3322 }, { "epoch": 0.5975006742785219, "grad_norm": 1.4399136304855347, "learning_rate": 9.791485286298637e-06, "loss": 0.7623, "step": 3323 }, { "epoch": 0.5976804818843837, "grad_norm": 1.6516892910003662, "learning_rate": 9.791318812312614e-06, "loss": 0.831, "step": 3324 }, { "epoch": 0.5978602894902454, "grad_norm": 1.5622586011886597, "learning_rate": 9.791152273314682e-06, "loss": 0.8394, "step": 3325 }, { "epoch": 0.5980400970961072, "grad_norm": 1.4842205047607422, "learning_rate": 9.790985669307099e-06, "loss": 0.8046, "step": 3326 }, { "epoch": 0.5982199047019688, "grad_norm": 1.5064386129379272, "learning_rate": 9.790819000292128e-06, "loss": 0.8173, "step": 3327 }, { "epoch": 0.5983997123078306, "grad_norm": 1.6848162412643433, "learning_rate": 9.79065226627203e-06, "loss": 0.7846, "step": 3328 }, { "epoch": 0.5985795199136924, "grad_norm": 1.4632717370986938, "learning_rate": 9.790485467249065e-06, "loss": 0.7803, "step": 3329 }, { "epoch": 0.5987593275195541, "grad_norm": 1.5559582710266113, "learning_rate": 9.790318603225499e-06, "loss": 0.7997, "step": 3330 }, { "epoch": 0.5989391351254159, "grad_norm": 1.457704782485962, "learning_rate": 9.790151674203593e-06, "loss": 0.8202, "step": 3331 }, { "epoch": 0.5991189427312775, "grad_norm": 1.5046590566635132, "learning_rate": 9.789984680185618e-06, "loss": 0.8406, "step": 3332 }, { "epoch": 0.5992987503371393, "grad_norm": 1.498106837272644, "learning_rate": 9.789817621173833e-06, "loss": 0.7866, "step": 3333 }, { "epoch": 0.599478557943001, "grad_norm": 1.5081857442855835, "learning_rate": 9.789650497170509e-06, "loss": 0.7505, "step": 3334 }, { "epoch": 0.5996583655488628, "grad_norm": 1.6063034534454346, "learning_rate": 9.789483308177912e-06, "loss": 0.7492, "step": 3335 }, { "epoch": 0.5998381731547244, "grad_norm": 1.2376888990402222, "learning_rate": 9.789316054198311e-06, "loss": 1.064, "step": 3336 }, { "epoch": 0.6000179807605862, "grad_norm": 1.5587955713272095, "learning_rate": 9.789148735233975e-06, "loss": 0.8333, "step": 3337 }, { "epoch": 0.6001977883664479, "grad_norm": 1.568503499031067, "learning_rate": 9.788981351287176e-06, "loss": 0.7651, "step": 3338 }, { "epoch": 0.6003775959723097, "grad_norm": 1.5350134372711182, "learning_rate": 9.788813902360183e-06, "loss": 0.8221, "step": 3339 }, { "epoch": 0.6005574035781713, "grad_norm": 1.4955077171325684, "learning_rate": 9.78864638845527e-06, "loss": 0.7998, "step": 3340 }, { "epoch": 0.6007372111840331, "grad_norm": 1.5360511541366577, "learning_rate": 9.788478809574707e-06, "loss": 0.8235, "step": 3341 }, { "epoch": 0.6009170187898948, "grad_norm": 1.5243498086929321, "learning_rate": 9.78831116572077e-06, "loss": 0.7999, "step": 3342 }, { "epoch": 0.6010968263957566, "grad_norm": 1.5482056140899658, "learning_rate": 9.788143456895734e-06, "loss": 0.8362, "step": 3343 }, { "epoch": 0.6012766340016182, "grad_norm": 1.4209396839141846, "learning_rate": 9.787975683101875e-06, "loss": 0.7997, "step": 3344 }, { "epoch": 0.60145644160748, "grad_norm": 1.2429369688034058, "learning_rate": 9.787807844341467e-06, "loss": 1.0232, "step": 3345 }, { "epoch": 0.6016362492133417, "grad_norm": 1.6225254535675049, "learning_rate": 9.787639940616789e-06, "loss": 0.7597, "step": 3346 }, { "epoch": 0.6018160568192035, "grad_norm": 1.1330385208129883, "learning_rate": 9.78747197193012e-06, "loss": 1.0666, "step": 3347 }, { "epoch": 0.6019958644250651, "grad_norm": 1.0333209037780762, "learning_rate": 9.787303938283736e-06, "loss": 1.086, "step": 3348 }, { "epoch": 0.6021756720309269, "grad_norm": 2.0748636722564697, "learning_rate": 9.787135839679923e-06, "loss": 0.7664, "step": 3349 }, { "epoch": 0.6023554796367886, "grad_norm": 1.0525293350219727, "learning_rate": 9.786967676120954e-06, "loss": 1.0327, "step": 3350 }, { "epoch": 0.6025352872426504, "grad_norm": 1.5836749076843262, "learning_rate": 9.786799447609116e-06, "loss": 0.8553, "step": 3351 }, { "epoch": 0.602715094848512, "grad_norm": 1.6369322538375854, "learning_rate": 9.786631154146691e-06, "loss": 0.7791, "step": 3352 }, { "epoch": 0.6028949024543738, "grad_norm": 1.4325602054595947, "learning_rate": 9.786462795735962e-06, "loss": 0.7575, "step": 3353 }, { "epoch": 0.6030747100602355, "grad_norm": 1.436121940612793, "learning_rate": 9.786294372379214e-06, "loss": 0.8508, "step": 3354 }, { "epoch": 0.6032545176660973, "grad_norm": 1.4792640209197998, "learning_rate": 9.78612588407873e-06, "loss": 0.8238, "step": 3355 }, { "epoch": 0.603434325271959, "grad_norm": 2.399123430252075, "learning_rate": 9.7859573308368e-06, "loss": 0.8121, "step": 3356 }, { "epoch": 0.6036141328778207, "grad_norm": 1.675129771232605, "learning_rate": 9.785788712655706e-06, "loss": 0.858, "step": 3357 }, { "epoch": 0.6037939404836825, "grad_norm": 1.7552344799041748, "learning_rate": 9.785620029537741e-06, "loss": 0.8256, "step": 3358 }, { "epoch": 0.6039737480895442, "grad_norm": 1.4854068756103516, "learning_rate": 9.78545128148519e-06, "loss": 0.8688, "step": 3359 }, { "epoch": 0.604153555695406, "grad_norm": 1.5975661277770996, "learning_rate": 9.785282468500345e-06, "loss": 0.8373, "step": 3360 }, { "epoch": 0.6043333633012676, "grad_norm": 1.4275513887405396, "learning_rate": 9.785113590585497e-06, "loss": 0.7851, "step": 3361 }, { "epoch": 0.6045131709071294, "grad_norm": 1.5623143911361694, "learning_rate": 9.784944647742936e-06, "loss": 0.8106, "step": 3362 }, { "epoch": 0.6046929785129911, "grad_norm": 1.6040343046188354, "learning_rate": 9.784775639974952e-06, "loss": 0.8248, "step": 3363 }, { "epoch": 0.6048727861188529, "grad_norm": 1.5488693714141846, "learning_rate": 9.784606567283843e-06, "loss": 0.8721, "step": 3364 }, { "epoch": 0.6050525937247145, "grad_norm": 1.577392578125, "learning_rate": 9.784437429671901e-06, "loss": 0.7616, "step": 3365 }, { "epoch": 0.6052324013305763, "grad_norm": 2.7159979343414307, "learning_rate": 9.78426822714142e-06, "loss": 0.7959, "step": 3366 }, { "epoch": 0.605412208936438, "grad_norm": 1.4732085466384888, "learning_rate": 9.784098959694699e-06, "loss": 0.8264, "step": 3367 }, { "epoch": 0.6055920165422998, "grad_norm": 1.4835654497146606, "learning_rate": 9.78392962733403e-06, "loss": 0.8285, "step": 3368 }, { "epoch": 0.6057718241481614, "grad_norm": 1.583545446395874, "learning_rate": 9.783760230061714e-06, "loss": 0.862, "step": 3369 }, { "epoch": 0.6059516317540232, "grad_norm": 1.2008827924728394, "learning_rate": 9.78359076788005e-06, "loss": 1.0049, "step": 3370 }, { "epoch": 0.6061314393598849, "grad_norm": 1.4112858772277832, "learning_rate": 9.783421240791334e-06, "loss": 0.8095, "step": 3371 }, { "epoch": 0.6063112469657467, "grad_norm": 1.514825463294983, "learning_rate": 9.783251648797869e-06, "loss": 0.8641, "step": 3372 }, { "epoch": 0.6064910545716083, "grad_norm": 1.6009634733200073, "learning_rate": 9.783081991901955e-06, "loss": 0.8386, "step": 3373 }, { "epoch": 0.6066708621774701, "grad_norm": 1.4834179878234863, "learning_rate": 9.782912270105893e-06, "loss": 0.8444, "step": 3374 }, { "epoch": 0.6068506697833318, "grad_norm": 1.2074540853500366, "learning_rate": 9.78274248341199e-06, "loss": 1.0687, "step": 3375 }, { "epoch": 0.6070304773891936, "grad_norm": 1.5416754484176636, "learning_rate": 9.782572631822547e-06, "loss": 0.8384, "step": 3376 }, { "epoch": 0.6072102849950552, "grad_norm": 1.5861644744873047, "learning_rate": 9.782402715339866e-06, "loss": 0.8556, "step": 3377 }, { "epoch": 0.607390092600917, "grad_norm": 1.5655597448349, "learning_rate": 9.782232733966258e-06, "loss": 0.782, "step": 3378 }, { "epoch": 0.6075699002067787, "grad_norm": 1.4792035818099976, "learning_rate": 9.782062687704026e-06, "loss": 0.8812, "step": 3379 }, { "epoch": 0.6077497078126405, "grad_norm": 1.1131714582443237, "learning_rate": 9.781892576555478e-06, "loss": 1.0898, "step": 3380 }, { "epoch": 0.6079295154185022, "grad_norm": 1.5662554502487183, "learning_rate": 9.781722400522922e-06, "loss": 0.8673, "step": 3381 }, { "epoch": 0.6081093230243639, "grad_norm": 1.5451499223709106, "learning_rate": 9.781552159608668e-06, "loss": 0.7791, "step": 3382 }, { "epoch": 0.6082891306302256, "grad_norm": 1.0939857959747314, "learning_rate": 9.781381853815024e-06, "loss": 1.0568, "step": 3383 }, { "epoch": 0.6084689382360874, "grad_norm": 1.5810092687606812, "learning_rate": 9.781211483144304e-06, "loss": 0.7902, "step": 3384 }, { "epoch": 0.6086487458419492, "grad_norm": 1.5539205074310303, "learning_rate": 9.781041047598815e-06, "loss": 0.8692, "step": 3385 }, { "epoch": 0.6088285534478108, "grad_norm": 1.6811671257019043, "learning_rate": 9.780870547180874e-06, "loss": 0.8361, "step": 3386 }, { "epoch": 0.6090083610536726, "grad_norm": 1.5141291618347168, "learning_rate": 9.780699981892793e-06, "loss": 0.7909, "step": 3387 }, { "epoch": 0.6091881686595343, "grad_norm": 1.5618785619735718, "learning_rate": 9.780529351736887e-06, "loss": 0.8136, "step": 3388 }, { "epoch": 0.6093679762653961, "grad_norm": 1.5106369256973267, "learning_rate": 9.78035865671547e-06, "loss": 0.8014, "step": 3389 }, { "epoch": 0.6095477838712577, "grad_norm": 1.4575921297073364, "learning_rate": 9.780187896830857e-06, "loss": 0.7778, "step": 3390 }, { "epoch": 0.6097275914771195, "grad_norm": 1.4846312999725342, "learning_rate": 9.780017072085368e-06, "loss": 0.9152, "step": 3391 }, { "epoch": 0.6099073990829812, "grad_norm": 1.5830379724502563, "learning_rate": 9.779846182481319e-06, "loss": 0.823, "step": 3392 }, { "epoch": 0.610087206688843, "grad_norm": 1.4686930179595947, "learning_rate": 9.779675228021028e-06, "loss": 0.821, "step": 3393 }, { "epoch": 0.6102670142947046, "grad_norm": 1.1231708526611328, "learning_rate": 9.779504208706819e-06, "loss": 1.0915, "step": 3394 }, { "epoch": 0.6104468219005664, "grad_norm": 1.5641006231307983, "learning_rate": 9.779333124541006e-06, "loss": 0.8222, "step": 3395 }, { "epoch": 0.6106266295064281, "grad_norm": 1.4693138599395752, "learning_rate": 9.779161975525914e-06, "loss": 0.8082, "step": 3396 }, { "epoch": 0.6108064371122899, "grad_norm": 1.3758944272994995, "learning_rate": 9.778990761663864e-06, "loss": 0.8043, "step": 3397 }, { "epoch": 0.6109862447181515, "grad_norm": 1.7014389038085938, "learning_rate": 9.778819482957182e-06, "loss": 0.8682, "step": 3398 }, { "epoch": 0.6111660523240133, "grad_norm": 1.681020736694336, "learning_rate": 9.77864813940819e-06, "loss": 0.8366, "step": 3399 }, { "epoch": 0.611345859929875, "grad_norm": 1.7121474742889404, "learning_rate": 9.778476731019212e-06, "loss": 0.8619, "step": 3400 }, { "epoch": 0.6115256675357368, "grad_norm": 1.4972491264343262, "learning_rate": 9.778305257792576e-06, "loss": 0.8718, "step": 3401 }, { "epoch": 0.6117054751415985, "grad_norm": 1.0801684856414795, "learning_rate": 9.778133719730606e-06, "loss": 1.0271, "step": 3402 }, { "epoch": 0.6118852827474602, "grad_norm": 1.477835774421692, "learning_rate": 9.777962116835633e-06, "loss": 0.8589, "step": 3403 }, { "epoch": 0.6120650903533219, "grad_norm": 1.5093804597854614, "learning_rate": 9.777790449109981e-06, "loss": 0.8663, "step": 3404 }, { "epoch": 0.6122448979591837, "grad_norm": 1.472908616065979, "learning_rate": 9.777618716555984e-06, "loss": 0.8286, "step": 3405 }, { "epoch": 0.6124247055650454, "grad_norm": 1.6176666021347046, "learning_rate": 9.777446919175968e-06, "loss": 0.865, "step": 3406 }, { "epoch": 0.6126045131709071, "grad_norm": 1.481029748916626, "learning_rate": 9.777275056972268e-06, "loss": 0.8325, "step": 3407 }, { "epoch": 0.6127843207767688, "grad_norm": 0.9874873161315918, "learning_rate": 9.777103129947212e-06, "loss": 1.0499, "step": 3408 }, { "epoch": 0.6129641283826306, "grad_norm": 1.5698529481887817, "learning_rate": 9.776931138103136e-06, "loss": 0.8471, "step": 3409 }, { "epoch": 0.6131439359884923, "grad_norm": 1.0876522064208984, "learning_rate": 9.77675908144237e-06, "loss": 1.0613, "step": 3410 }, { "epoch": 0.613323743594354, "grad_norm": 1.5336103439331055, "learning_rate": 9.776586959967254e-06, "loss": 0.8571, "step": 3411 }, { "epoch": 0.6135035512002158, "grad_norm": 1.4189844131469727, "learning_rate": 9.77641477368012e-06, "loss": 0.8182, "step": 3412 }, { "epoch": 0.6136833588060775, "grad_norm": 1.0240546464920044, "learning_rate": 9.776242522583304e-06, "loss": 1.0351, "step": 3413 }, { "epoch": 0.6138631664119393, "grad_norm": 1.0970382690429688, "learning_rate": 9.776070206679145e-06, "loss": 1.0587, "step": 3414 }, { "epoch": 0.614042974017801, "grad_norm": 1.5223926305770874, "learning_rate": 9.775897825969978e-06, "loss": 0.8423, "step": 3415 }, { "epoch": 0.6142227816236627, "grad_norm": 1.515265703201294, "learning_rate": 9.775725380458145e-06, "loss": 0.8322, "step": 3416 }, { "epoch": 0.6144025892295244, "grad_norm": 1.710983395576477, "learning_rate": 9.775552870145987e-06, "loss": 0.8977, "step": 3417 }, { "epoch": 0.6145823968353862, "grad_norm": 1.497302532196045, "learning_rate": 9.775380295035841e-06, "loss": 0.853, "step": 3418 }, { "epoch": 0.6147622044412479, "grad_norm": 1.5375587940216064, "learning_rate": 9.77520765513005e-06, "loss": 0.8015, "step": 3419 }, { "epoch": 0.6149420120471096, "grad_norm": 1.4296332597732544, "learning_rate": 9.775034950430957e-06, "loss": 0.8196, "step": 3420 }, { "epoch": 0.6151218196529713, "grad_norm": 1.4907188415527344, "learning_rate": 9.774862180940908e-06, "loss": 0.8417, "step": 3421 }, { "epoch": 0.6153016272588331, "grad_norm": 1.1435331106185913, "learning_rate": 9.77468934666224e-06, "loss": 1.0475, "step": 3422 }, { "epoch": 0.6154814348646948, "grad_norm": 1.0655461549758911, "learning_rate": 9.774516447597305e-06, "loss": 1.0826, "step": 3423 }, { "epoch": 0.6156612424705565, "grad_norm": 1.5880253314971924, "learning_rate": 9.774343483748448e-06, "loss": 0.79, "step": 3424 }, { "epoch": 0.6158410500764182, "grad_norm": 1.403638482093811, "learning_rate": 9.774170455118012e-06, "loss": 0.8376, "step": 3425 }, { "epoch": 0.61602085768228, "grad_norm": 1.5542000532150269, "learning_rate": 9.773997361708347e-06, "loss": 0.8323, "step": 3426 }, { "epoch": 0.6162006652881417, "grad_norm": 1.506783127784729, "learning_rate": 9.773824203521804e-06, "loss": 0.8108, "step": 3427 }, { "epoch": 0.6163804728940034, "grad_norm": 1.5990583896636963, "learning_rate": 9.77365098056073e-06, "loss": 0.8369, "step": 3428 }, { "epoch": 0.6165602804998651, "grad_norm": 1.494623064994812, "learning_rate": 9.773477692827476e-06, "loss": 0.7741, "step": 3429 }, { "epoch": 0.6167400881057269, "grad_norm": 1.4372174739837646, "learning_rate": 9.773304340324392e-06, "loss": 0.8098, "step": 3430 }, { "epoch": 0.6169198957115886, "grad_norm": 1.5868796110153198, "learning_rate": 9.773130923053832e-06, "loss": 0.9325, "step": 3431 }, { "epoch": 0.6170997033174503, "grad_norm": 1.381146788597107, "learning_rate": 9.772957441018148e-06, "loss": 1.0916, "step": 3432 }, { "epoch": 0.617279510923312, "grad_norm": 1.4867686033248901, "learning_rate": 9.772783894219695e-06, "loss": 0.7598, "step": 3433 }, { "epoch": 0.6174593185291738, "grad_norm": 1.4221917390823364, "learning_rate": 9.772610282660826e-06, "loss": 0.7758, "step": 3434 }, { "epoch": 0.6176391261350355, "grad_norm": 1.1363418102264404, "learning_rate": 9.772436606343899e-06, "loss": 1.0519, "step": 3435 }, { "epoch": 0.6178189337408972, "grad_norm": 1.4615731239318848, "learning_rate": 9.77226286527127e-06, "loss": 0.8368, "step": 3436 }, { "epoch": 0.6179987413467589, "grad_norm": 1.4904664754867554, "learning_rate": 9.772089059445293e-06, "loss": 0.7856, "step": 3437 }, { "epoch": 0.6181785489526207, "grad_norm": 1.5986502170562744, "learning_rate": 9.77191518886833e-06, "loss": 0.7452, "step": 3438 }, { "epoch": 0.6183583565584824, "grad_norm": 1.6174569129943848, "learning_rate": 9.771741253542742e-06, "loss": 0.8799, "step": 3439 }, { "epoch": 0.6185381641643442, "grad_norm": 1.5084404945373535, "learning_rate": 9.771567253470884e-06, "loss": 0.7729, "step": 3440 }, { "epoch": 0.6187179717702059, "grad_norm": 1.4044272899627686, "learning_rate": 9.771393188655119e-06, "loss": 0.7817, "step": 3441 }, { "epoch": 0.6188977793760676, "grad_norm": 1.3972868919372559, "learning_rate": 9.77121905909781e-06, "loss": 0.7198, "step": 3442 }, { "epoch": 0.6190775869819294, "grad_norm": 1.6210927963256836, "learning_rate": 9.771044864801319e-06, "loss": 0.8264, "step": 3443 }, { "epoch": 0.619257394587791, "grad_norm": 1.4798904657363892, "learning_rate": 9.770870605768009e-06, "loss": 0.8432, "step": 3444 }, { "epoch": 0.6194372021936528, "grad_norm": 1.518361210823059, "learning_rate": 9.770696282000245e-06, "loss": 0.8367, "step": 3445 }, { "epoch": 0.6196170097995145, "grad_norm": 1.465500831604004, "learning_rate": 9.770521893500394e-06, "loss": 0.7769, "step": 3446 }, { "epoch": 0.6197968174053763, "grad_norm": 1.246908187866211, "learning_rate": 9.770347440270818e-06, "loss": 0.9982, "step": 3447 }, { "epoch": 0.619976625011238, "grad_norm": 1.616281509399414, "learning_rate": 9.770172922313887e-06, "loss": 0.8627, "step": 3448 }, { "epoch": 0.6201564326170997, "grad_norm": 1.5493781566619873, "learning_rate": 9.76999833963197e-06, "loss": 0.8409, "step": 3449 }, { "epoch": 0.6203362402229614, "grad_norm": 1.5929161310195923, "learning_rate": 9.769823692227431e-06, "loss": 0.8406, "step": 3450 }, { "epoch": 0.6205160478288232, "grad_norm": 1.1220825910568237, "learning_rate": 9.769648980102647e-06, "loss": 1.085, "step": 3451 }, { "epoch": 0.6206958554346849, "grad_norm": 1.0340633392333984, "learning_rate": 9.769474203259983e-06, "loss": 1.0494, "step": 3452 }, { "epoch": 0.6208756630405466, "grad_norm": 1.5822497606277466, "learning_rate": 9.769299361701812e-06, "loss": 0.8604, "step": 3453 }, { "epoch": 0.6210554706464083, "grad_norm": 1.586508870124817, "learning_rate": 9.769124455430508e-06, "loss": 0.8633, "step": 3454 }, { "epoch": 0.6212352782522701, "grad_norm": 1.4510546922683716, "learning_rate": 9.768949484448442e-06, "loss": 0.7717, "step": 3455 }, { "epoch": 0.6214150858581318, "grad_norm": 1.4524308443069458, "learning_rate": 9.768774448757989e-06, "loss": 0.8314, "step": 3456 }, { "epoch": 0.6215948934639935, "grad_norm": 1.513131022453308, "learning_rate": 9.768599348361524e-06, "loss": 0.8236, "step": 3457 }, { "epoch": 0.6217747010698552, "grad_norm": 1.5710314512252808, "learning_rate": 9.768424183261423e-06, "loss": 0.7758, "step": 3458 }, { "epoch": 0.621954508675717, "grad_norm": 1.2006813287734985, "learning_rate": 9.768248953460062e-06, "loss": 1.0569, "step": 3459 }, { "epoch": 0.6221343162815787, "grad_norm": 1.5427957773208618, "learning_rate": 9.76807365895982e-06, "loss": 0.8391, "step": 3460 }, { "epoch": 0.6223141238874405, "grad_norm": 1.5935083627700806, "learning_rate": 9.767898299763074e-06, "loss": 0.775, "step": 3461 }, { "epoch": 0.6224939314933021, "grad_norm": 1.5112255811691284, "learning_rate": 9.767722875872207e-06, "loss": 0.7881, "step": 3462 }, { "epoch": 0.6226737390991639, "grad_norm": 1.4118330478668213, "learning_rate": 9.767547387289594e-06, "loss": 0.8152, "step": 3463 }, { "epoch": 0.6228535467050256, "grad_norm": 1.515035629272461, "learning_rate": 9.767371834017618e-06, "loss": 0.8583, "step": 3464 }, { "epoch": 0.6230333543108874, "grad_norm": 1.5265992879867554, "learning_rate": 9.767196216058663e-06, "loss": 0.8048, "step": 3465 }, { "epoch": 0.623213161916749, "grad_norm": 1.6187078952789307, "learning_rate": 9.76702053341511e-06, "loss": 0.8505, "step": 3466 }, { "epoch": 0.6233929695226108, "grad_norm": 1.602996826171875, "learning_rate": 9.766844786089345e-06, "loss": 0.8572, "step": 3467 }, { "epoch": 0.6235727771284726, "grad_norm": 1.551213026046753, "learning_rate": 9.766668974083749e-06, "loss": 0.8096, "step": 3468 }, { "epoch": 0.6237525847343343, "grad_norm": 1.5882290601730347, "learning_rate": 9.766493097400711e-06, "loss": 0.7718, "step": 3469 }, { "epoch": 0.623932392340196, "grad_norm": 1.4494166374206543, "learning_rate": 9.766317156042615e-06, "loss": 0.8519, "step": 3470 }, { "epoch": 0.6241121999460577, "grad_norm": 1.120184063911438, "learning_rate": 9.766141150011849e-06, "loss": 1.0422, "step": 3471 }, { "epoch": 0.6242920075519195, "grad_norm": 1.4807640314102173, "learning_rate": 9.765965079310802e-06, "loss": 0.8184, "step": 3472 }, { "epoch": 0.6244718151577812, "grad_norm": 1.58218252658844, "learning_rate": 9.765788943941862e-06, "loss": 0.7657, "step": 3473 }, { "epoch": 0.6246516227636429, "grad_norm": 1.470329999923706, "learning_rate": 9.76561274390742e-06, "loss": 0.8294, "step": 3474 }, { "epoch": 0.6248314303695046, "grad_norm": 1.6287899017333984, "learning_rate": 9.765436479209866e-06, "loss": 0.8135, "step": 3475 }, { "epoch": 0.6250112379753664, "grad_norm": 1.6211212873458862, "learning_rate": 9.765260149851592e-06, "loss": 0.8174, "step": 3476 }, { "epoch": 0.6251910455812281, "grad_norm": 1.5861625671386719, "learning_rate": 9.76508375583499e-06, "loss": 0.8063, "step": 3477 }, { "epoch": 0.6253708531870898, "grad_norm": 1.4849228858947754, "learning_rate": 9.764907297162454e-06, "loss": 0.8009, "step": 3478 }, { "epoch": 0.6255506607929515, "grad_norm": 1.4876426458358765, "learning_rate": 9.764730773836377e-06, "loss": 0.7991, "step": 3479 }, { "epoch": 0.6257304683988133, "grad_norm": 1.4752585887908936, "learning_rate": 9.764554185859158e-06, "loss": 0.7557, "step": 3480 }, { "epoch": 0.625910276004675, "grad_norm": 1.396493911743164, "learning_rate": 9.76437753323319e-06, "loss": 0.7569, "step": 3481 }, { "epoch": 0.6260900836105368, "grad_norm": 1.347867488861084, "learning_rate": 9.764200815960869e-06, "loss": 0.7771, "step": 3482 }, { "epoch": 0.6262698912163984, "grad_norm": 1.168237328529358, "learning_rate": 9.764024034044594e-06, "loss": 1.0608, "step": 3483 }, { "epoch": 0.6264496988222602, "grad_norm": 1.482656478881836, "learning_rate": 9.763847187486763e-06, "loss": 0.7895, "step": 3484 }, { "epoch": 0.6266295064281219, "grad_norm": 1.6178290843963623, "learning_rate": 9.76367027628978e-06, "loss": 0.8613, "step": 3485 }, { "epoch": 0.6268093140339837, "grad_norm": 1.4976242780685425, "learning_rate": 9.763493300456039e-06, "loss": 0.7772, "step": 3486 }, { "epoch": 0.6269891216398453, "grad_norm": 1.4696662425994873, "learning_rate": 9.763316259987944e-06, "loss": 0.8509, "step": 3487 }, { "epoch": 0.6271689292457071, "grad_norm": 1.6042771339416504, "learning_rate": 9.763139154887899e-06, "loss": 0.7775, "step": 3488 }, { "epoch": 0.6273487368515688, "grad_norm": 1.6972230672836304, "learning_rate": 9.762961985158306e-06, "loss": 0.8464, "step": 3489 }, { "epoch": 0.6275285444574306, "grad_norm": 1.1649380922317505, "learning_rate": 9.762784750801568e-06, "loss": 1.0336, "step": 3490 }, { "epoch": 0.6277083520632922, "grad_norm": 1.6106438636779785, "learning_rate": 9.762607451820091e-06, "loss": 0.8188, "step": 3491 }, { "epoch": 0.627888159669154, "grad_norm": 1.4491779804229736, "learning_rate": 9.76243008821628e-06, "loss": 0.8647, "step": 3492 }, { "epoch": 0.6280679672750157, "grad_norm": 1.4598335027694702, "learning_rate": 9.76225265999254e-06, "loss": 0.787, "step": 3493 }, { "epoch": 0.6282477748808775, "grad_norm": 1.4600250720977783, "learning_rate": 9.762075167151282e-06, "loss": 0.7756, "step": 3494 }, { "epoch": 0.6284275824867391, "grad_norm": 1.5835294723510742, "learning_rate": 9.76189760969491e-06, "loss": 0.7684, "step": 3495 }, { "epoch": 0.6286073900926009, "grad_norm": 1.5338704586029053, "learning_rate": 9.761719987625838e-06, "loss": 0.7347, "step": 3496 }, { "epoch": 0.6287871976984627, "grad_norm": 1.6284774541854858, "learning_rate": 9.761542300946472e-06, "loss": 0.7639, "step": 3497 }, { "epoch": 0.6289670053043244, "grad_norm": 1.181209921836853, "learning_rate": 9.761364549659227e-06, "loss": 1.0143, "step": 3498 }, { "epoch": 0.6291468129101861, "grad_norm": 1.6595048904418945, "learning_rate": 9.76118673376651e-06, "loss": 0.809, "step": 3499 }, { "epoch": 0.6293266205160478, "grad_norm": 1.4188779592514038, "learning_rate": 9.761008853270739e-06, "loss": 0.7911, "step": 3500 }, { "epoch": 0.6293266205160478, "eval_loss": 0.8417044281959534, "eval_runtime": 148.7772, "eval_samples_per_second": 96.668, "eval_steps_per_second": 1.512, "step": 3500 }, { "epoch": 0.6295064281219096, "grad_norm": 1.5968626737594604, "learning_rate": 9.760830908174323e-06, "loss": 0.8494, "step": 3501 }, { "epoch": 0.6296862357277713, "grad_norm": 1.1334625482559204, "learning_rate": 9.760652898479679e-06, "loss": 1.0068, "step": 3502 }, { "epoch": 0.629866043333633, "grad_norm": 1.5654323101043701, "learning_rate": 9.760474824189222e-06, "loss": 0.8031, "step": 3503 }, { "epoch": 0.6300458509394947, "grad_norm": 1.550211787223816, "learning_rate": 9.760296685305368e-06, "loss": 0.855, "step": 3504 }, { "epoch": 0.6302256585453565, "grad_norm": 0.9999522566795349, "learning_rate": 9.760118481830534e-06, "loss": 1.0798, "step": 3505 }, { "epoch": 0.6304054661512182, "grad_norm": 1.4096649885177612, "learning_rate": 9.759940213767139e-06, "loss": 0.7454, "step": 3506 }, { "epoch": 0.63058527375708, "grad_norm": 1.7810910940170288, "learning_rate": 9.7597618811176e-06, "loss": 0.8497, "step": 3507 }, { "epoch": 0.6307650813629416, "grad_norm": 1.514887809753418, "learning_rate": 9.759583483884338e-06, "loss": 0.793, "step": 3508 }, { "epoch": 0.6309448889688034, "grad_norm": 1.3306680917739868, "learning_rate": 9.759405022069773e-06, "loss": 0.7412, "step": 3509 }, { "epoch": 0.6311246965746651, "grad_norm": 1.465456485748291, "learning_rate": 9.759226495676328e-06, "loss": 0.7999, "step": 3510 }, { "epoch": 0.6313045041805269, "grad_norm": 1.5285333395004272, "learning_rate": 9.759047904706422e-06, "loss": 0.8463, "step": 3511 }, { "epoch": 0.6314843117863885, "grad_norm": 1.6598564386367798, "learning_rate": 9.758869249162483e-06, "loss": 0.8364, "step": 3512 }, { "epoch": 0.6316641193922503, "grad_norm": 1.6431975364685059, "learning_rate": 9.75869052904693e-06, "loss": 0.8555, "step": 3513 }, { "epoch": 0.631843926998112, "grad_norm": 1.4663519859313965, "learning_rate": 9.758511744362193e-06, "loss": 0.7639, "step": 3514 }, { "epoch": 0.6320237346039738, "grad_norm": 1.4794623851776123, "learning_rate": 9.758332895110693e-06, "loss": 0.7499, "step": 3515 }, { "epoch": 0.6322035422098354, "grad_norm": 1.5352468490600586, "learning_rate": 9.758153981294863e-06, "loss": 0.8569, "step": 3516 }, { "epoch": 0.6323833498156972, "grad_norm": 1.4641882181167603, "learning_rate": 9.757975002917124e-06, "loss": 0.7071, "step": 3517 }, { "epoch": 0.6325631574215589, "grad_norm": 1.45225191116333, "learning_rate": 9.757795959979906e-06, "loss": 0.8184, "step": 3518 }, { "epoch": 0.6327429650274207, "grad_norm": 1.4993889331817627, "learning_rate": 9.757616852485642e-06, "loss": 0.8622, "step": 3519 }, { "epoch": 0.6329227726332823, "grad_norm": 1.5364187955856323, "learning_rate": 9.75743768043676e-06, "loss": 0.8297, "step": 3520 }, { "epoch": 0.6331025802391441, "grad_norm": 1.514627456665039, "learning_rate": 9.75725844383569e-06, "loss": 0.772, "step": 3521 }, { "epoch": 0.6332823878450058, "grad_norm": 1.157080054283142, "learning_rate": 9.757079142684866e-06, "loss": 1.0827, "step": 3522 }, { "epoch": 0.6334621954508676, "grad_norm": 1.6068296432495117, "learning_rate": 9.75689977698672e-06, "loss": 0.884, "step": 3523 }, { "epoch": 0.6336420030567294, "grad_norm": 1.415485143661499, "learning_rate": 9.756720346743685e-06, "loss": 0.794, "step": 3524 }, { "epoch": 0.633821810662591, "grad_norm": 1.4237548112869263, "learning_rate": 9.756540851958196e-06, "loss": 0.7721, "step": 3525 }, { "epoch": 0.6340016182684528, "grad_norm": 1.4646574258804321, "learning_rate": 9.75636129263269e-06, "loss": 0.7876, "step": 3526 }, { "epoch": 0.6341814258743145, "grad_norm": 1.4729052782058716, "learning_rate": 9.756181668769601e-06, "loss": 0.7574, "step": 3527 }, { "epoch": 0.6343612334801763, "grad_norm": 1.132448673248291, "learning_rate": 9.756001980371368e-06, "loss": 1.082, "step": 3528 }, { "epoch": 0.6345410410860379, "grad_norm": 1.4438998699188232, "learning_rate": 9.755822227440431e-06, "loss": 0.7477, "step": 3529 }, { "epoch": 0.6347208486918997, "grad_norm": 1.4861159324645996, "learning_rate": 9.755642409979222e-06, "loss": 0.7712, "step": 3530 }, { "epoch": 0.6349006562977614, "grad_norm": 1.070526361465454, "learning_rate": 9.75546252799019e-06, "loss": 1.0557, "step": 3531 }, { "epoch": 0.6350804639036232, "grad_norm": 1.4596112966537476, "learning_rate": 9.755282581475769e-06, "loss": 0.7612, "step": 3532 }, { "epoch": 0.6352602715094848, "grad_norm": 1.6020896434783936, "learning_rate": 9.755102570438402e-06, "loss": 0.746, "step": 3533 }, { "epoch": 0.6354400791153466, "grad_norm": 1.5195907354354858, "learning_rate": 9.754922494880535e-06, "loss": 0.8184, "step": 3534 }, { "epoch": 0.6356198867212083, "grad_norm": 1.0349743366241455, "learning_rate": 9.754742354804607e-06, "loss": 1.0488, "step": 3535 }, { "epoch": 0.6357996943270701, "grad_norm": 1.541253685951233, "learning_rate": 9.754562150213064e-06, "loss": 0.8433, "step": 3536 }, { "epoch": 0.6359795019329317, "grad_norm": 1.5152885913848877, "learning_rate": 9.754381881108353e-06, "loss": 0.7795, "step": 3537 }, { "epoch": 0.6361593095387935, "grad_norm": 1.1698734760284424, "learning_rate": 9.754201547492918e-06, "loss": 1.029, "step": 3538 }, { "epoch": 0.6363391171446552, "grad_norm": 1.0250229835510254, "learning_rate": 9.754021149369206e-06, "loss": 1.0887, "step": 3539 }, { "epoch": 0.636518924750517, "grad_norm": 1.824450969696045, "learning_rate": 9.753840686739664e-06, "loss": 0.7697, "step": 3540 }, { "epoch": 0.6366987323563786, "grad_norm": 1.7179101705551147, "learning_rate": 9.753660159606742e-06, "loss": 0.8082, "step": 3541 }, { "epoch": 0.6368785399622404, "grad_norm": 1.508944034576416, "learning_rate": 9.75347956797289e-06, "loss": 0.7953, "step": 3542 }, { "epoch": 0.6370583475681021, "grad_norm": 1.438408374786377, "learning_rate": 9.753298911840556e-06, "loss": 0.7205, "step": 3543 }, { "epoch": 0.6372381551739639, "grad_norm": 1.5127010345458984, "learning_rate": 9.753118191212191e-06, "loss": 0.7908, "step": 3544 }, { "epoch": 0.6374179627798255, "grad_norm": 1.4438978433609009, "learning_rate": 9.752937406090252e-06, "loss": 0.8415, "step": 3545 }, { "epoch": 0.6375977703856873, "grad_norm": 1.1138522624969482, "learning_rate": 9.752756556477189e-06, "loss": 1.0461, "step": 3546 }, { "epoch": 0.637777577991549, "grad_norm": 1.4847413301467896, "learning_rate": 9.752575642375454e-06, "loss": 0.809, "step": 3547 }, { "epoch": 0.6379573855974108, "grad_norm": 1.5216268301010132, "learning_rate": 9.752394663787505e-06, "loss": 0.8159, "step": 3548 }, { "epoch": 0.6381371932032724, "grad_norm": 1.0850328207015991, "learning_rate": 9.752213620715796e-06, "loss": 1.1024, "step": 3549 }, { "epoch": 0.6383170008091342, "grad_norm": 1.073303461074829, "learning_rate": 9.752032513162783e-06, "loss": 1.0574, "step": 3550 }, { "epoch": 0.638496808414996, "grad_norm": 1.6652092933654785, "learning_rate": 9.751851341130925e-06, "loss": 0.8299, "step": 3551 }, { "epoch": 0.6386766160208577, "grad_norm": 1.5327032804489136, "learning_rate": 9.751670104622679e-06, "loss": 0.8229, "step": 3552 }, { "epoch": 0.6388564236267195, "grad_norm": 1.4297258853912354, "learning_rate": 9.751488803640505e-06, "loss": 0.7987, "step": 3553 }, { "epoch": 0.6390362312325811, "grad_norm": 1.4587271213531494, "learning_rate": 9.75130743818686e-06, "loss": 0.7999, "step": 3554 }, { "epoch": 0.6392160388384429, "grad_norm": 1.5220246315002441, "learning_rate": 9.75112600826421e-06, "loss": 0.8851, "step": 3555 }, { "epoch": 0.6393958464443046, "grad_norm": 1.516781210899353, "learning_rate": 9.750944513875013e-06, "loss": 0.8132, "step": 3556 }, { "epoch": 0.6395756540501664, "grad_norm": 1.5542508363723755, "learning_rate": 9.750762955021734e-06, "loss": 0.8172, "step": 3557 }, { "epoch": 0.639755461656028, "grad_norm": 1.5272094011306763, "learning_rate": 9.750581331706836e-06, "loss": 0.8504, "step": 3558 }, { "epoch": 0.6399352692618898, "grad_norm": 1.5472115278244019, "learning_rate": 9.750399643932781e-06, "loss": 0.8361, "step": 3559 }, { "epoch": 0.6401150768677515, "grad_norm": 1.5284212827682495, "learning_rate": 9.750217891702036e-06, "loss": 0.8503, "step": 3560 }, { "epoch": 0.6402948844736133, "grad_norm": 1.4539434909820557, "learning_rate": 9.750036075017068e-06, "loss": 0.7916, "step": 3561 }, { "epoch": 0.6404746920794749, "grad_norm": 1.6381803750991821, "learning_rate": 9.749854193880343e-06, "loss": 0.7718, "step": 3562 }, { "epoch": 0.6406544996853367, "grad_norm": 1.495832085609436, "learning_rate": 9.749672248294328e-06, "loss": 0.809, "step": 3563 }, { "epoch": 0.6408343072911984, "grad_norm": 1.45931875705719, "learning_rate": 9.749490238261494e-06, "loss": 0.8073, "step": 3564 }, { "epoch": 0.6410141148970602, "grad_norm": 1.7689231634140015, "learning_rate": 9.749308163784309e-06, "loss": 0.7318, "step": 3565 }, { "epoch": 0.6411939225029218, "grad_norm": 1.4603713750839233, "learning_rate": 9.749126024865244e-06, "loss": 0.8724, "step": 3566 }, { "epoch": 0.6413737301087836, "grad_norm": 1.542287826538086, "learning_rate": 9.748943821506771e-06, "loss": 0.8034, "step": 3567 }, { "epoch": 0.6415535377146453, "grad_norm": 1.4420233964920044, "learning_rate": 9.74876155371136e-06, "loss": 0.7798, "step": 3568 }, { "epoch": 0.6417333453205071, "grad_norm": 1.5677013397216797, "learning_rate": 9.748579221481487e-06, "loss": 0.8152, "step": 3569 }, { "epoch": 0.6419131529263687, "grad_norm": 1.4620765447616577, "learning_rate": 9.748396824819626e-06, "loss": 0.8328, "step": 3570 }, { "epoch": 0.6420929605322305, "grad_norm": 1.618614673614502, "learning_rate": 9.748214363728247e-06, "loss": 0.8245, "step": 3571 }, { "epoch": 0.6422727681380922, "grad_norm": 1.1777781248092651, "learning_rate": 9.748031838209832e-06, "loss": 1.0642, "step": 3572 }, { "epoch": 0.642452575743954, "grad_norm": 1.4854320287704468, "learning_rate": 9.747849248266855e-06, "loss": 0.7623, "step": 3573 }, { "epoch": 0.6426323833498157, "grad_norm": 1.5718951225280762, "learning_rate": 9.747666593901793e-06, "loss": 0.8601, "step": 3574 }, { "epoch": 0.6428121909556774, "grad_norm": 1.3914039134979248, "learning_rate": 9.747483875117126e-06, "loss": 0.8158, "step": 3575 }, { "epoch": 0.6429919985615391, "grad_norm": 1.6221790313720703, "learning_rate": 9.74730109191533e-06, "loss": 0.9344, "step": 3576 }, { "epoch": 0.6431718061674009, "grad_norm": 1.0413683652877808, "learning_rate": 9.747118244298887e-06, "loss": 1.0659, "step": 3577 }, { "epoch": 0.6433516137732626, "grad_norm": 1.400679349899292, "learning_rate": 9.746935332270282e-06, "loss": 0.7519, "step": 3578 }, { "epoch": 0.6435314213791243, "grad_norm": 1.4763565063476562, "learning_rate": 9.74675235583199e-06, "loss": 0.7671, "step": 3579 }, { "epoch": 0.6437112289849861, "grad_norm": 1.520615577697754, "learning_rate": 9.746569314986499e-06, "loss": 0.8822, "step": 3580 }, { "epoch": 0.6438910365908478, "grad_norm": 1.6117584705352783, "learning_rate": 9.746386209736288e-06, "loss": 0.8083, "step": 3581 }, { "epoch": 0.6440708441967096, "grad_norm": 1.551820993423462, "learning_rate": 9.746203040083845e-06, "loss": 0.8661, "step": 3582 }, { "epoch": 0.6442506518025712, "grad_norm": 1.6171252727508545, "learning_rate": 9.746019806031655e-06, "loss": 0.8555, "step": 3583 }, { "epoch": 0.644430459408433, "grad_norm": 1.5005089044570923, "learning_rate": 9.745836507582204e-06, "loss": 0.7658, "step": 3584 }, { "epoch": 0.6446102670142947, "grad_norm": 1.4260317087173462, "learning_rate": 9.745653144737978e-06, "loss": 0.7296, "step": 3585 }, { "epoch": 0.6447900746201565, "grad_norm": 1.55814528465271, "learning_rate": 9.745469717501466e-06, "loss": 0.7275, "step": 3586 }, { "epoch": 0.6449698822260181, "grad_norm": 1.5949256420135498, "learning_rate": 9.745286225875157e-06, "loss": 0.8636, "step": 3587 }, { "epoch": 0.6451496898318799, "grad_norm": 1.5288445949554443, "learning_rate": 9.745102669861539e-06, "loss": 0.8124, "step": 3588 }, { "epoch": 0.6453294974377416, "grad_norm": 1.636270523071289, "learning_rate": 9.744919049463106e-06, "loss": 0.8411, "step": 3589 }, { "epoch": 0.6455093050436034, "grad_norm": 1.4972796440124512, "learning_rate": 9.744735364682347e-06, "loss": 0.825, "step": 3590 }, { "epoch": 0.645689112649465, "grad_norm": 1.4566186666488647, "learning_rate": 9.744551615521754e-06, "loss": 0.7371, "step": 3591 }, { "epoch": 0.6458689202553268, "grad_norm": 1.5528841018676758, "learning_rate": 9.744367801983821e-06, "loss": 0.7933, "step": 3592 }, { "epoch": 0.6460487278611885, "grad_norm": 1.6850050687789917, "learning_rate": 9.744183924071042e-06, "loss": 0.8091, "step": 3593 }, { "epoch": 0.6462285354670503, "grad_norm": 1.7917765378952026, "learning_rate": 9.743999981785914e-06, "loss": 0.8879, "step": 3594 }, { "epoch": 0.646408343072912, "grad_norm": 1.1520549058914185, "learning_rate": 9.74381597513093e-06, "loss": 1.0435, "step": 3595 }, { "epoch": 0.6465881506787737, "grad_norm": 1.4814785718917847, "learning_rate": 9.743631904108586e-06, "loss": 0.8434, "step": 3596 }, { "epoch": 0.6467679582846354, "grad_norm": 1.5357893705368042, "learning_rate": 9.743447768721384e-06, "loss": 0.8194, "step": 3597 }, { "epoch": 0.6469477658904972, "grad_norm": 1.182021141052246, "learning_rate": 9.743263568971818e-06, "loss": 1.0534, "step": 3598 }, { "epoch": 0.6471275734963589, "grad_norm": 1.401508092880249, "learning_rate": 9.74307930486239e-06, "loss": 0.7641, "step": 3599 }, { "epoch": 0.6473073811022206, "grad_norm": 1.550618290901184, "learning_rate": 9.7428949763956e-06, "loss": 0.8406, "step": 3600 }, { "epoch": 0.6474871887080823, "grad_norm": 1.4297524690628052, "learning_rate": 9.742710583573947e-06, "loss": 0.7754, "step": 3601 }, { "epoch": 0.6476669963139441, "grad_norm": 1.503138542175293, "learning_rate": 9.742526126399936e-06, "loss": 0.8233, "step": 3602 }, { "epoch": 0.6478468039198058, "grad_norm": 1.4487662315368652, "learning_rate": 9.742341604876067e-06, "loss": 0.7938, "step": 3603 }, { "epoch": 0.6480266115256675, "grad_norm": 1.671191692352295, "learning_rate": 9.742157019004845e-06, "loss": 0.7879, "step": 3604 }, { "epoch": 0.6482064191315292, "grad_norm": 1.5115082263946533, "learning_rate": 9.741972368788776e-06, "loss": 0.822, "step": 3605 }, { "epoch": 0.648386226737391, "grad_norm": 1.5457860231399536, "learning_rate": 9.741787654230364e-06, "loss": 0.8121, "step": 3606 }, { "epoch": 0.6485660343432528, "grad_norm": 1.637526273727417, "learning_rate": 9.741602875332114e-06, "loss": 0.8127, "step": 3607 }, { "epoch": 0.6487458419491144, "grad_norm": 1.5024702548980713, "learning_rate": 9.741418032096535e-06, "loss": 0.807, "step": 3608 }, { "epoch": 0.6489256495549762, "grad_norm": 1.4665318727493286, "learning_rate": 9.741233124526135e-06, "loss": 0.7633, "step": 3609 }, { "epoch": 0.6491054571608379, "grad_norm": 1.221701741218567, "learning_rate": 9.741048152623423e-06, "loss": 1.0504, "step": 3610 }, { "epoch": 0.6492852647666997, "grad_norm": 1.5320767164230347, "learning_rate": 9.740863116390908e-06, "loss": 0.807, "step": 3611 }, { "epoch": 0.6494650723725613, "grad_norm": 1.471488118171692, "learning_rate": 9.740678015831101e-06, "loss": 0.8254, "step": 3612 }, { "epoch": 0.6496448799784231, "grad_norm": 1.0838472843170166, "learning_rate": 9.740492850946513e-06, "loss": 1.0517, "step": 3613 }, { "epoch": 0.6498246875842848, "grad_norm": 1.4305020570755005, "learning_rate": 9.740307621739659e-06, "loss": 0.7211, "step": 3614 }, { "epoch": 0.6500044951901466, "grad_norm": 1.522152304649353, "learning_rate": 9.74012232821305e-06, "loss": 0.8065, "step": 3615 }, { "epoch": 0.6501843027960083, "grad_norm": 1.675482153892517, "learning_rate": 9.7399369703692e-06, "loss": 0.8821, "step": 3616 }, { "epoch": 0.65036411040187, "grad_norm": 1.5212664604187012, "learning_rate": 9.739751548210625e-06, "loss": 0.8172, "step": 3617 }, { "epoch": 0.6505439180077317, "grad_norm": 1.0926482677459717, "learning_rate": 9.73956606173984e-06, "loss": 1.0555, "step": 3618 }, { "epoch": 0.6507237256135935, "grad_norm": 1.5917837619781494, "learning_rate": 9.739380510959365e-06, "loss": 0.8732, "step": 3619 }, { "epoch": 0.6509035332194552, "grad_norm": 1.537092924118042, "learning_rate": 9.739194895871713e-06, "loss": 0.7709, "step": 3620 }, { "epoch": 0.6510833408253169, "grad_norm": 1.458234429359436, "learning_rate": 9.739009216479404e-06, "loss": 0.7512, "step": 3621 }, { "epoch": 0.6512631484311786, "grad_norm": 1.6237130165100098, "learning_rate": 9.73882347278496e-06, "loss": 0.852, "step": 3622 }, { "epoch": 0.6514429560370404, "grad_norm": 1.6027717590332031, "learning_rate": 9.7386376647909e-06, "loss": 0.8064, "step": 3623 }, { "epoch": 0.6516227636429021, "grad_norm": 1.1426303386688232, "learning_rate": 9.738451792499744e-06, "loss": 1.0812, "step": 3624 }, { "epoch": 0.6518025712487638, "grad_norm": 1.5709972381591797, "learning_rate": 9.738265855914014e-06, "loss": 0.8137, "step": 3625 }, { "epoch": 0.6519823788546255, "grad_norm": 1.0465116500854492, "learning_rate": 9.738079855036233e-06, "loss": 1.0597, "step": 3626 }, { "epoch": 0.6521621864604873, "grad_norm": 1.5251506567001343, "learning_rate": 9.737893789868926e-06, "loss": 0.8099, "step": 3627 }, { "epoch": 0.652341994066349, "grad_norm": 1.4762020111083984, "learning_rate": 9.737707660414617e-06, "loss": 0.8341, "step": 3628 }, { "epoch": 0.6525218016722107, "grad_norm": 1.4596500396728516, "learning_rate": 9.737521466675832e-06, "loss": 0.7836, "step": 3629 }, { "epoch": 0.6527016092780724, "grad_norm": 1.6166903972625732, "learning_rate": 9.737335208655096e-06, "loss": 0.8369, "step": 3630 }, { "epoch": 0.6528814168839342, "grad_norm": 1.5585180521011353, "learning_rate": 9.737148886354939e-06, "loss": 0.7899, "step": 3631 }, { "epoch": 0.6530612244897959, "grad_norm": 1.4616619348526, "learning_rate": 9.736962499777887e-06, "loss": 0.8147, "step": 3632 }, { "epoch": 0.6532410320956576, "grad_norm": 1.6043236255645752, "learning_rate": 9.736776048926469e-06, "loss": 0.7948, "step": 3633 }, { "epoch": 0.6534208397015194, "grad_norm": 1.6027629375457764, "learning_rate": 9.736589533803214e-06, "loss": 0.7891, "step": 3634 }, { "epoch": 0.6536006473073811, "grad_norm": 1.6428239345550537, "learning_rate": 9.736402954410656e-06, "loss": 0.8262, "step": 3635 }, { "epoch": 0.6537804549132429, "grad_norm": 1.3330355882644653, "learning_rate": 9.736216310751323e-06, "loss": 1.0535, "step": 3636 }, { "epoch": 0.6539602625191046, "grad_norm": 1.5242527723312378, "learning_rate": 9.73602960282775e-06, "loss": 0.8466, "step": 3637 }, { "epoch": 0.6541400701249663, "grad_norm": 1.1978466510772705, "learning_rate": 9.735842830642471e-06, "loss": 1.0187, "step": 3638 }, { "epoch": 0.654319877730828, "grad_norm": 1.4826231002807617, "learning_rate": 9.735655994198016e-06, "loss": 0.8132, "step": 3639 }, { "epoch": 0.6544996853366898, "grad_norm": 1.5667388439178467, "learning_rate": 9.735469093496925e-06, "loss": 0.8628, "step": 3640 }, { "epoch": 0.6546794929425515, "grad_norm": 1.589587926864624, "learning_rate": 9.735282128541733e-06, "loss": 0.8022, "step": 3641 }, { "epoch": 0.6548593005484132, "grad_norm": 1.513352394104004, "learning_rate": 9.735095099334973e-06, "loss": 0.8097, "step": 3642 }, { "epoch": 0.6550391081542749, "grad_norm": 1.516206979751587, "learning_rate": 9.734908005879187e-06, "loss": 0.8597, "step": 3643 }, { "epoch": 0.6552189157601367, "grad_norm": 1.4368250370025635, "learning_rate": 9.734720848176913e-06, "loss": 0.8311, "step": 3644 }, { "epoch": 0.6553987233659984, "grad_norm": 1.5292997360229492, "learning_rate": 9.734533626230687e-06, "loss": 0.8401, "step": 3645 }, { "epoch": 0.6555785309718601, "grad_norm": 1.488613486289978, "learning_rate": 9.734346340043056e-06, "loss": 0.7959, "step": 3646 }, { "epoch": 0.6557583385777218, "grad_norm": 1.353033185005188, "learning_rate": 9.734158989616554e-06, "loss": 1.0719, "step": 3647 }, { "epoch": 0.6559381461835836, "grad_norm": 1.4660396575927734, "learning_rate": 9.733971574953726e-06, "loss": 0.8089, "step": 3648 }, { "epoch": 0.6561179537894453, "grad_norm": 1.4269843101501465, "learning_rate": 9.733784096057119e-06, "loss": 0.7982, "step": 3649 }, { "epoch": 0.656297761395307, "grad_norm": 1.4933074712753296, "learning_rate": 9.73359655292927e-06, "loss": 0.7064, "step": 3650 }, { "epoch": 0.6564775690011687, "grad_norm": 1.5751664638519287, "learning_rate": 9.73340894557273e-06, "loss": 0.82, "step": 3651 }, { "epoch": 0.6566573766070305, "grad_norm": 1.5121303796768188, "learning_rate": 9.733221273990038e-06, "loss": 0.8235, "step": 3652 }, { "epoch": 0.6568371842128922, "grad_norm": 1.551291584968567, "learning_rate": 9.733033538183745e-06, "loss": 0.8151, "step": 3653 }, { "epoch": 0.657016991818754, "grad_norm": 1.5078496932983398, "learning_rate": 9.732845738156399e-06, "loss": 0.7368, "step": 3654 }, { "epoch": 0.6571967994246156, "grad_norm": 1.580718755722046, "learning_rate": 9.732657873910544e-06, "loss": 0.8689, "step": 3655 }, { "epoch": 0.6573766070304774, "grad_norm": 1.5073480606079102, "learning_rate": 9.732469945448732e-06, "loss": 0.7707, "step": 3656 }, { "epoch": 0.6575564146363391, "grad_norm": 1.1522324085235596, "learning_rate": 9.732281952773514e-06, "loss": 1.0388, "step": 3657 }, { "epoch": 0.6577362222422009, "grad_norm": 1.4922292232513428, "learning_rate": 9.73209389588744e-06, "loss": 0.8144, "step": 3658 }, { "epoch": 0.6579160298480625, "grad_norm": 1.51448392868042, "learning_rate": 9.731905774793057e-06, "loss": 0.7688, "step": 3659 }, { "epoch": 0.6580958374539243, "grad_norm": 1.528419017791748, "learning_rate": 9.731717589492925e-06, "loss": 0.7406, "step": 3660 }, { "epoch": 0.658275645059786, "grad_norm": 1.526841163635254, "learning_rate": 9.731529339989593e-06, "loss": 0.8304, "step": 3661 }, { "epoch": 0.6584554526656478, "grad_norm": 1.473093867301941, "learning_rate": 9.731341026285616e-06, "loss": 0.824, "step": 3662 }, { "epoch": 0.6586352602715095, "grad_norm": 1.6287931203842163, "learning_rate": 9.731152648383551e-06, "loss": 0.8492, "step": 3663 }, { "epoch": 0.6588150678773712, "grad_norm": 1.7586151361465454, "learning_rate": 9.73096420628595e-06, "loss": 0.8311, "step": 3664 }, { "epoch": 0.658994875483233, "grad_norm": 1.4988594055175781, "learning_rate": 9.730775699995375e-06, "loss": 0.8203, "step": 3665 }, { "epoch": 0.6591746830890947, "grad_norm": 1.5772701501846313, "learning_rate": 9.73058712951438e-06, "loss": 0.8581, "step": 3666 }, { "epoch": 0.6593544906949564, "grad_norm": 1.7368348836898804, "learning_rate": 9.730398494845523e-06, "loss": 0.8857, "step": 3667 }, { "epoch": 0.6595342983008181, "grad_norm": 1.5998865365982056, "learning_rate": 9.730209795991367e-06, "loss": 0.8283, "step": 3668 }, { "epoch": 0.6597141059066799, "grad_norm": 1.4214359521865845, "learning_rate": 9.730021032954472e-06, "loss": 0.7338, "step": 3669 }, { "epoch": 0.6598939135125416, "grad_norm": 1.6189512014389038, "learning_rate": 9.729832205737397e-06, "loss": 0.8542, "step": 3670 }, { "epoch": 0.6600737211184033, "grad_norm": 1.3795052766799927, "learning_rate": 9.729643314342704e-06, "loss": 0.7535, "step": 3671 }, { "epoch": 0.660253528724265, "grad_norm": 1.6343189477920532, "learning_rate": 9.729454358772958e-06, "loss": 0.79, "step": 3672 }, { "epoch": 0.6604333363301268, "grad_norm": 1.1495234966278076, "learning_rate": 9.729265339030722e-06, "loss": 1.0418, "step": 3673 }, { "epoch": 0.6606131439359885, "grad_norm": 1.2390364408493042, "learning_rate": 9.72907625511856e-06, "loss": 1.0271, "step": 3674 }, { "epoch": 0.6607929515418502, "grad_norm": 1.1263293027877808, "learning_rate": 9.72888710703904e-06, "loss": 1.0634, "step": 3675 }, { "epoch": 0.6609727591477119, "grad_norm": 1.570291519165039, "learning_rate": 9.728697894794727e-06, "loss": 0.8736, "step": 3676 }, { "epoch": 0.6611525667535737, "grad_norm": 1.494615077972412, "learning_rate": 9.728508618388186e-06, "loss": 0.8327, "step": 3677 }, { "epoch": 0.6613323743594354, "grad_norm": 1.6542047262191772, "learning_rate": 9.728319277821989e-06, "loss": 0.7939, "step": 3678 }, { "epoch": 0.6615121819652972, "grad_norm": 1.636653184890747, "learning_rate": 9.728129873098704e-06, "loss": 0.8432, "step": 3679 }, { "epoch": 0.6616919895711588, "grad_norm": 1.550619125366211, "learning_rate": 9.7279404042209e-06, "loss": 0.8694, "step": 3680 }, { "epoch": 0.6618717971770206, "grad_norm": 1.563703179359436, "learning_rate": 9.727750871191149e-06, "loss": 0.8237, "step": 3681 }, { "epoch": 0.6620516047828823, "grad_norm": 1.5108219385147095, "learning_rate": 9.727561274012023e-06, "loss": 0.8433, "step": 3682 }, { "epoch": 0.6622314123887441, "grad_norm": 1.4421114921569824, "learning_rate": 9.727371612686092e-06, "loss": 0.8039, "step": 3683 }, { "epoch": 0.6624112199946057, "grad_norm": 1.4506313800811768, "learning_rate": 9.727181887215931e-06, "loss": 0.8045, "step": 3684 }, { "epoch": 0.6625910276004675, "grad_norm": 1.577696681022644, "learning_rate": 9.726992097604115e-06, "loss": 0.8784, "step": 3685 }, { "epoch": 0.6627708352063292, "grad_norm": 1.9009571075439453, "learning_rate": 9.726802243853218e-06, "loss": 0.807, "step": 3686 }, { "epoch": 0.662950642812191, "grad_norm": 1.3327809572219849, "learning_rate": 9.726612325965819e-06, "loss": 1.0814, "step": 3687 }, { "epoch": 0.6631304504180526, "grad_norm": 1.499761939048767, "learning_rate": 9.72642234394449e-06, "loss": 0.7642, "step": 3688 }, { "epoch": 0.6633102580239144, "grad_norm": 1.5217567682266235, "learning_rate": 9.726232297791813e-06, "loss": 0.819, "step": 3689 }, { "epoch": 0.6634900656297762, "grad_norm": 1.604862928390503, "learning_rate": 9.726042187510365e-06, "loss": 0.8996, "step": 3690 }, { "epoch": 0.6636698732356379, "grad_norm": 1.4502894878387451, "learning_rate": 9.725852013102725e-06, "loss": 0.8084, "step": 3691 }, { "epoch": 0.6638496808414996, "grad_norm": 1.168459177017212, "learning_rate": 9.725661774571475e-06, "loss": 1.0563, "step": 3692 }, { "epoch": 0.6640294884473613, "grad_norm": 1.3826706409454346, "learning_rate": 9.725471471919195e-06, "loss": 0.7629, "step": 3693 }, { "epoch": 0.6642092960532231, "grad_norm": 1.1328102350234985, "learning_rate": 9.725281105148469e-06, "loss": 1.0717, "step": 3694 }, { "epoch": 0.6643891036590848, "grad_norm": 1.5856376886367798, "learning_rate": 9.725090674261877e-06, "loss": 0.789, "step": 3695 }, { "epoch": 0.6645689112649465, "grad_norm": 1.5159534215927124, "learning_rate": 9.724900179262005e-06, "loss": 0.7847, "step": 3696 }, { "epoch": 0.6647487188708082, "grad_norm": 1.4981683492660522, "learning_rate": 9.724709620151437e-06, "loss": 0.7774, "step": 3697 }, { "epoch": 0.66492852647667, "grad_norm": 1.6006495952606201, "learning_rate": 9.724518996932758e-06, "loss": 0.8492, "step": 3698 }, { "epoch": 0.6651083340825317, "grad_norm": 1.448997139930725, "learning_rate": 9.724328309608558e-06, "loss": 0.7759, "step": 3699 }, { "epoch": 0.6652881416883935, "grad_norm": 1.585444688796997, "learning_rate": 9.72413755818142e-06, "loss": 0.7979, "step": 3700 }, { "epoch": 0.6654679492942551, "grad_norm": 1.4667847156524658, "learning_rate": 9.723946742653935e-06, "loss": 0.7814, "step": 3701 }, { "epoch": 0.6656477569001169, "grad_norm": 1.5308371782302856, "learning_rate": 9.72375586302869e-06, "loss": 0.7587, "step": 3702 }, { "epoch": 0.6658275645059786, "grad_norm": 2.2599427700042725, "learning_rate": 9.723564919308278e-06, "loss": 0.787, "step": 3703 }, { "epoch": 0.6660073721118404, "grad_norm": 1.5311119556427002, "learning_rate": 9.723373911495285e-06, "loss": 0.7965, "step": 3704 }, { "epoch": 0.666187179717702, "grad_norm": 1.5612621307373047, "learning_rate": 9.723182839592308e-06, "loss": 0.8471, "step": 3705 }, { "epoch": 0.6663669873235638, "grad_norm": 1.5100799798965454, "learning_rate": 9.722991703601936e-06, "loss": 0.8583, "step": 3706 }, { "epoch": 0.6665467949294255, "grad_norm": 1.4936929941177368, "learning_rate": 9.722800503526767e-06, "loss": 0.6987, "step": 3707 }, { "epoch": 0.6667266025352873, "grad_norm": 1.5475149154663086, "learning_rate": 9.722609239369389e-06, "loss": 0.8064, "step": 3708 }, { "epoch": 0.6669064101411489, "grad_norm": 1.4256200790405273, "learning_rate": 9.7224179111324e-06, "loss": 0.842, "step": 3709 }, { "epoch": 0.6670862177470107, "grad_norm": 1.5302598476409912, "learning_rate": 9.722226518818398e-06, "loss": 0.867, "step": 3710 }, { "epoch": 0.6672660253528724, "grad_norm": 1.4348257780075073, "learning_rate": 9.722035062429977e-06, "loss": 0.8017, "step": 3711 }, { "epoch": 0.6674458329587342, "grad_norm": 1.4546993970870972, "learning_rate": 9.721843541969738e-06, "loss": 0.7337, "step": 3712 }, { "epoch": 0.6676256405645958, "grad_norm": 1.4007422924041748, "learning_rate": 9.721651957440276e-06, "loss": 0.7734, "step": 3713 }, { "epoch": 0.6678054481704576, "grad_norm": 1.6148284673690796, "learning_rate": 9.721460308844193e-06, "loss": 0.7082, "step": 3714 }, { "epoch": 0.6679852557763193, "grad_norm": 1.5595769882202148, "learning_rate": 9.72126859618409e-06, "loss": 0.8469, "step": 3715 }, { "epoch": 0.6681650633821811, "grad_norm": 1.5663825273513794, "learning_rate": 9.721076819462565e-06, "loss": 0.8214, "step": 3716 }, { "epoch": 0.6683448709880428, "grad_norm": 1.5693448781967163, "learning_rate": 9.720884978682223e-06, "loss": 0.8749, "step": 3717 }, { "epoch": 0.6685246785939045, "grad_norm": 1.4821289777755737, "learning_rate": 9.720693073845668e-06, "loss": 0.755, "step": 3718 }, { "epoch": 0.6687044861997663, "grad_norm": 1.4630364179611206, "learning_rate": 9.720501104955499e-06, "loss": 0.7691, "step": 3719 }, { "epoch": 0.668884293805628, "grad_norm": 1.483818769454956, "learning_rate": 9.720309072014327e-06, "loss": 0.8359, "step": 3720 }, { "epoch": 0.6690641014114898, "grad_norm": 1.5205862522125244, "learning_rate": 9.720116975024754e-06, "loss": 0.8678, "step": 3721 }, { "epoch": 0.6692439090173514, "grad_norm": 1.5586694478988647, "learning_rate": 9.719924813989386e-06, "loss": 0.8729, "step": 3722 }, { "epoch": 0.6694237166232132, "grad_norm": 1.4206186532974243, "learning_rate": 9.719732588910831e-06, "loss": 0.802, "step": 3723 }, { "epoch": 0.6696035242290749, "grad_norm": 1.534998893737793, "learning_rate": 9.7195402997917e-06, "loss": 1.0088, "step": 3724 }, { "epoch": 0.6697833318349367, "grad_norm": 1.4457776546478271, "learning_rate": 9.719347946634598e-06, "loss": 0.7739, "step": 3725 }, { "epoch": 0.6699631394407983, "grad_norm": 1.5540590286254883, "learning_rate": 9.719155529442137e-06, "loss": 0.877, "step": 3726 }, { "epoch": 0.6701429470466601, "grad_norm": 1.5133055448532104, "learning_rate": 9.718963048216927e-06, "loss": 0.7897, "step": 3727 }, { "epoch": 0.6703227546525218, "grad_norm": 1.5675631761550903, "learning_rate": 9.718770502961581e-06, "loss": 0.8887, "step": 3728 }, { "epoch": 0.6705025622583836, "grad_norm": 1.5509775876998901, "learning_rate": 9.718577893678712e-06, "loss": 0.7788, "step": 3729 }, { "epoch": 0.6706823698642452, "grad_norm": 1.5623753070831299, "learning_rate": 9.718385220370931e-06, "loss": 0.8439, "step": 3730 }, { "epoch": 0.670862177470107, "grad_norm": 1.5317227840423584, "learning_rate": 9.718192483040854e-06, "loss": 0.7518, "step": 3731 }, { "epoch": 0.6710419850759687, "grad_norm": 1.526042103767395, "learning_rate": 9.717999681691098e-06, "loss": 0.8585, "step": 3732 }, { "epoch": 0.6712217926818305, "grad_norm": 1.4783663749694824, "learning_rate": 9.717806816324273e-06, "loss": 0.7381, "step": 3733 }, { "epoch": 0.6714016002876921, "grad_norm": 1.1526157855987549, "learning_rate": 9.717613886943002e-06, "loss": 1.0854, "step": 3734 }, { "epoch": 0.6715814078935539, "grad_norm": 1.4195711612701416, "learning_rate": 9.717420893549902e-06, "loss": 0.8124, "step": 3735 }, { "epoch": 0.6717612154994156, "grad_norm": 1.4211300611495972, "learning_rate": 9.71722783614759e-06, "loss": 0.8334, "step": 3736 }, { "epoch": 0.6719410231052774, "grad_norm": 1.4305775165557861, "learning_rate": 9.717034714738685e-06, "loss": 0.7602, "step": 3737 }, { "epoch": 0.672120830711139, "grad_norm": 1.6252855062484741, "learning_rate": 9.716841529325807e-06, "loss": 0.8721, "step": 3738 }, { "epoch": 0.6723006383170008, "grad_norm": 1.5436071157455444, "learning_rate": 9.716648279911581e-06, "loss": 0.8384, "step": 3739 }, { "epoch": 0.6724804459228625, "grad_norm": 1.7581086158752441, "learning_rate": 9.716454966498625e-06, "loss": 0.8392, "step": 3740 }, { "epoch": 0.6726602535287243, "grad_norm": 1.603235125541687, "learning_rate": 9.716261589089564e-06, "loss": 0.7761, "step": 3741 }, { "epoch": 0.6728400611345859, "grad_norm": 1.5599721670150757, "learning_rate": 9.716068147687024e-06, "loss": 0.8453, "step": 3742 }, { "epoch": 0.6730198687404477, "grad_norm": 1.4383413791656494, "learning_rate": 9.715874642293624e-06, "loss": 0.8202, "step": 3743 }, { "epoch": 0.6731996763463094, "grad_norm": 1.4861652851104736, "learning_rate": 9.715681072911994e-06, "loss": 0.7653, "step": 3744 }, { "epoch": 0.6733794839521712, "grad_norm": 1.1058694124221802, "learning_rate": 9.715487439544761e-06, "loss": 1.055, "step": 3745 }, { "epoch": 0.673559291558033, "grad_norm": 1.6513110399246216, "learning_rate": 9.715293742194549e-06, "loss": 0.8257, "step": 3746 }, { "epoch": 0.6737390991638946, "grad_norm": 1.7299212217330933, "learning_rate": 9.715099980863989e-06, "loss": 0.8319, "step": 3747 }, { "epoch": 0.6739189067697564, "grad_norm": 1.5560834407806396, "learning_rate": 9.714906155555707e-06, "loss": 0.8246, "step": 3748 }, { "epoch": 0.6740987143756181, "grad_norm": 1.5985913276672363, "learning_rate": 9.714712266272339e-06, "loss": 0.7964, "step": 3749 }, { "epoch": 0.6742785219814799, "grad_norm": 1.4438005685806274, "learning_rate": 9.71451831301651e-06, "loss": 0.8259, "step": 3750 }, { "epoch": 0.6744583295873415, "grad_norm": 1.4772571325302124, "learning_rate": 9.714324295790853e-06, "loss": 0.7352, "step": 3751 }, { "epoch": 0.6746381371932033, "grad_norm": 1.7792704105377197, "learning_rate": 9.714130214598e-06, "loss": 0.8477, "step": 3752 }, { "epoch": 0.674817944799065, "grad_norm": 1.5046335458755493, "learning_rate": 9.713936069440588e-06, "loss": 0.8208, "step": 3753 }, { "epoch": 0.6749977524049268, "grad_norm": 1.5111470222473145, "learning_rate": 9.713741860321248e-06, "loss": 0.8686, "step": 3754 }, { "epoch": 0.6751775600107884, "grad_norm": 1.0667306184768677, "learning_rate": 9.713547587242616e-06, "loss": 1.0396, "step": 3755 }, { "epoch": 0.6753573676166502, "grad_norm": 1.120040774345398, "learning_rate": 9.713353250207328e-06, "loss": 1.0251, "step": 3756 }, { "epoch": 0.6755371752225119, "grad_norm": 1.5253963470458984, "learning_rate": 9.71315884921802e-06, "loss": 0.7989, "step": 3757 }, { "epoch": 0.6757169828283737, "grad_norm": 1.044693946838379, "learning_rate": 9.712964384277332e-06, "loss": 1.0217, "step": 3758 }, { "epoch": 0.6758967904342353, "grad_norm": 1.1532503366470337, "learning_rate": 9.712769855387902e-06, "loss": 1.0579, "step": 3759 }, { "epoch": 0.6760765980400971, "grad_norm": 1.5460611581802368, "learning_rate": 9.712575262552369e-06, "loss": 0.8042, "step": 3760 }, { "epoch": 0.6762564056459588, "grad_norm": 1.5180641412734985, "learning_rate": 9.71238060577337e-06, "loss": 0.8606, "step": 3761 }, { "epoch": 0.6764362132518206, "grad_norm": 1.0060049295425415, "learning_rate": 9.712185885053551e-06, "loss": 1.0444, "step": 3762 }, { "epoch": 0.6766160208576822, "grad_norm": 1.587328553199768, "learning_rate": 9.711991100395554e-06, "loss": 0.7784, "step": 3763 }, { "epoch": 0.676795828463544, "grad_norm": 1.3647569417953491, "learning_rate": 9.71179625180202e-06, "loss": 0.831, "step": 3764 }, { "epoch": 0.6769756360694057, "grad_norm": 1.0340577363967896, "learning_rate": 9.711601339275594e-06, "loss": 1.1183, "step": 3765 }, { "epoch": 0.6771554436752675, "grad_norm": 1.6413336992263794, "learning_rate": 9.711406362818919e-06, "loss": 0.807, "step": 3766 }, { "epoch": 0.6773352512811291, "grad_norm": 1.0549800395965576, "learning_rate": 9.711211322434641e-06, "loss": 1.0871, "step": 3767 }, { "epoch": 0.6775150588869909, "grad_norm": 1.5490273237228394, "learning_rate": 9.711016218125408e-06, "loss": 0.8194, "step": 3768 }, { "epoch": 0.6776948664928526, "grad_norm": 1.4067645072937012, "learning_rate": 9.710821049893867e-06, "loss": 0.8064, "step": 3769 }, { "epoch": 0.6778746740987144, "grad_norm": 1.5896844863891602, "learning_rate": 9.710625817742665e-06, "loss": 0.8107, "step": 3770 }, { "epoch": 0.678054481704576, "grad_norm": 1.5068897008895874, "learning_rate": 9.710430521674453e-06, "loss": 0.8004, "step": 3771 }, { "epoch": 0.6782342893104378, "grad_norm": 1.8753280639648438, "learning_rate": 9.710235161691877e-06, "loss": 0.7822, "step": 3772 }, { "epoch": 0.6784140969162996, "grad_norm": 0.9994678497314453, "learning_rate": 9.710039737797591e-06, "loss": 1.0349, "step": 3773 }, { "epoch": 0.6785939045221613, "grad_norm": 1.4419461488723755, "learning_rate": 9.709844249994246e-06, "loss": 0.8117, "step": 3774 }, { "epoch": 0.6787737121280231, "grad_norm": 1.5805779695510864, "learning_rate": 9.709648698284494e-06, "loss": 0.7824, "step": 3775 }, { "epoch": 0.6789535197338847, "grad_norm": 1.3600382804870605, "learning_rate": 9.709453082670992e-06, "loss": 0.8048, "step": 3776 }, { "epoch": 0.6791333273397465, "grad_norm": 1.4729396104812622, "learning_rate": 9.70925740315639e-06, "loss": 0.8605, "step": 3777 }, { "epoch": 0.6793131349456082, "grad_norm": 1.1674599647521973, "learning_rate": 9.709061659743342e-06, "loss": 1.0313, "step": 3778 }, { "epoch": 0.67949294255147, "grad_norm": 1.4726061820983887, "learning_rate": 9.708865852434507e-06, "loss": 0.8451, "step": 3779 }, { "epoch": 0.6796727501573316, "grad_norm": 1.4548929929733276, "learning_rate": 9.708669981232542e-06, "loss": 0.7829, "step": 3780 }, { "epoch": 0.6798525577631934, "grad_norm": 1.6159697771072388, "learning_rate": 9.708474046140103e-06, "loss": 0.7705, "step": 3781 }, { "epoch": 0.6800323653690551, "grad_norm": 1.5192251205444336, "learning_rate": 9.70827804715985e-06, "loss": 0.8663, "step": 3782 }, { "epoch": 0.6802121729749169, "grad_norm": 1.4952569007873535, "learning_rate": 9.70808198429444e-06, "loss": 0.8574, "step": 3783 }, { "epoch": 0.6803919805807785, "grad_norm": 1.632656455039978, "learning_rate": 9.707885857546537e-06, "loss": 0.7136, "step": 3784 }, { "epoch": 0.6805717881866403, "grad_norm": 1.5209095478057861, "learning_rate": 9.707689666918801e-06, "loss": 0.8631, "step": 3785 }, { "epoch": 0.680751595792502, "grad_norm": 1.1384961605072021, "learning_rate": 9.707493412413892e-06, "loss": 1.0104, "step": 3786 }, { "epoch": 0.6809314033983638, "grad_norm": 1.0225794315338135, "learning_rate": 9.707297094034473e-06, "loss": 1.072, "step": 3787 }, { "epoch": 0.6811112110042254, "grad_norm": 1.547804355621338, "learning_rate": 9.707100711783211e-06, "loss": 0.8031, "step": 3788 }, { "epoch": 0.6812910186100872, "grad_norm": 1.4935109615325928, "learning_rate": 9.706904265662768e-06, "loss": 0.7611, "step": 3789 }, { "epoch": 0.6814708262159489, "grad_norm": 1.1024093627929688, "learning_rate": 9.706707755675811e-06, "loss": 0.9862, "step": 3790 }, { "epoch": 0.6816506338218107, "grad_norm": 1.515934705734253, "learning_rate": 9.706511181825005e-06, "loss": 0.8217, "step": 3791 }, { "epoch": 0.6818304414276724, "grad_norm": 1.4162037372589111, "learning_rate": 9.706314544113017e-06, "loss": 0.7526, "step": 3792 }, { "epoch": 0.6820102490335341, "grad_norm": 1.7474902868270874, "learning_rate": 9.706117842542517e-06, "loss": 0.8233, "step": 3793 }, { "epoch": 0.6821900566393958, "grad_norm": 1.3819835186004639, "learning_rate": 9.705921077116172e-06, "loss": 0.8404, "step": 3794 }, { "epoch": 0.6823698642452576, "grad_norm": 1.5031509399414062, "learning_rate": 9.705724247836655e-06, "loss": 0.7663, "step": 3795 }, { "epoch": 0.6825496718511193, "grad_norm": 1.5624624490737915, "learning_rate": 9.705527354706632e-06, "loss": 0.8147, "step": 3796 }, { "epoch": 0.682729479456981, "grad_norm": 1.495895504951477, "learning_rate": 9.705330397728778e-06, "loss": 0.835, "step": 3797 }, { "epoch": 0.6829092870628427, "grad_norm": 1.458425760269165, "learning_rate": 9.705133376905765e-06, "loss": 0.8544, "step": 3798 }, { "epoch": 0.6830890946687045, "grad_norm": 2.5133426189422607, "learning_rate": 9.704936292240266e-06, "loss": 0.8148, "step": 3799 }, { "epoch": 0.6832689022745662, "grad_norm": 1.5636571645736694, "learning_rate": 9.704739143734954e-06, "loss": 0.7855, "step": 3800 }, { "epoch": 0.6834487098804279, "grad_norm": 1.4528846740722656, "learning_rate": 9.704541931392506e-06, "loss": 0.7683, "step": 3801 }, { "epoch": 0.6836285174862897, "grad_norm": 1.533044695854187, "learning_rate": 9.704344655215596e-06, "loss": 0.8262, "step": 3802 }, { "epoch": 0.6838083250921514, "grad_norm": 1.3996319770812988, "learning_rate": 9.704147315206902e-06, "loss": 0.802, "step": 3803 }, { "epoch": 0.6839881326980132, "grad_norm": 1.4916025400161743, "learning_rate": 9.703949911369102e-06, "loss": 0.8428, "step": 3804 }, { "epoch": 0.6841679403038748, "grad_norm": 1.3397724628448486, "learning_rate": 9.703752443704874e-06, "loss": 1.0533, "step": 3805 }, { "epoch": 0.6843477479097366, "grad_norm": 1.1157087087631226, "learning_rate": 9.703554912216897e-06, "loss": 1.0314, "step": 3806 }, { "epoch": 0.6845275555155983, "grad_norm": 1.470388412475586, "learning_rate": 9.703357316907851e-06, "loss": 0.8376, "step": 3807 }, { "epoch": 0.6847073631214601, "grad_norm": 1.7093629837036133, "learning_rate": 9.703159657780418e-06, "loss": 0.805, "step": 3808 }, { "epoch": 0.6848871707273217, "grad_norm": 1.728999376296997, "learning_rate": 9.70296193483728e-06, "loss": 0.7696, "step": 3809 }, { "epoch": 0.6850669783331835, "grad_norm": 1.5292004346847534, "learning_rate": 9.70276414808112e-06, "loss": 0.7955, "step": 3810 }, { "epoch": 0.6852467859390452, "grad_norm": 1.5652399063110352, "learning_rate": 9.70256629751462e-06, "loss": 0.7677, "step": 3811 }, { "epoch": 0.685426593544907, "grad_norm": 1.252418041229248, "learning_rate": 9.702368383140468e-06, "loss": 1.038, "step": 3812 }, { "epoch": 0.6856064011507687, "grad_norm": 1.505934238433838, "learning_rate": 9.702170404961344e-06, "loss": 0.8102, "step": 3813 }, { "epoch": 0.6857862087566304, "grad_norm": 1.5169826745986938, "learning_rate": 9.701972362979938e-06, "loss": 0.8178, "step": 3814 }, { "epoch": 0.6859660163624921, "grad_norm": 1.502608060836792, "learning_rate": 9.701774257198939e-06, "loss": 0.8078, "step": 3815 }, { "epoch": 0.6861458239683539, "grad_norm": 1.4724136590957642, "learning_rate": 9.701576087621032e-06, "loss": 0.7802, "step": 3816 }, { "epoch": 0.6863256315742156, "grad_norm": 1.0942097902297974, "learning_rate": 9.701377854248906e-06, "loss": 1.0833, "step": 3817 }, { "epoch": 0.6865054391800773, "grad_norm": 1.5375611782073975, "learning_rate": 9.70117955708525e-06, "loss": 0.8375, "step": 3818 }, { "epoch": 0.686685246785939, "grad_norm": 1.4529211521148682, "learning_rate": 9.700981196132758e-06, "loss": 0.8065, "step": 3819 }, { "epoch": 0.6868650543918008, "grad_norm": 1.7171417474746704, "learning_rate": 9.700782771394119e-06, "loss": 0.7365, "step": 3820 }, { "epoch": 0.6870448619976625, "grad_norm": 1.5076102018356323, "learning_rate": 9.700584282872026e-06, "loss": 0.834, "step": 3821 }, { "epoch": 0.6872246696035242, "grad_norm": 1.5394530296325684, "learning_rate": 9.700385730569171e-06, "loss": 0.7342, "step": 3822 }, { "epoch": 0.6874044772093859, "grad_norm": 1.5644261837005615, "learning_rate": 9.700187114488251e-06, "loss": 0.8063, "step": 3823 }, { "epoch": 0.6875842848152477, "grad_norm": 1.571921467781067, "learning_rate": 9.699988434631957e-06, "loss": 0.8468, "step": 3824 }, { "epoch": 0.6877640924211094, "grad_norm": 1.0852415561676025, "learning_rate": 9.699789691002988e-06, "loss": 1.0281, "step": 3825 }, { "epoch": 0.6879439000269711, "grad_norm": 1.5903178453445435, "learning_rate": 9.699590883604039e-06, "loss": 0.8835, "step": 3826 }, { "epoch": 0.6881237076328328, "grad_norm": 1.5245212316513062, "learning_rate": 9.699392012437809e-06, "loss": 0.8394, "step": 3827 }, { "epoch": 0.6883035152386946, "grad_norm": 1.1062533855438232, "learning_rate": 9.699193077506997e-06, "loss": 1.0804, "step": 3828 }, { "epoch": 0.6884833228445564, "grad_norm": 1.6068742275238037, "learning_rate": 9.698994078814298e-06, "loss": 0.8429, "step": 3829 }, { "epoch": 0.688663130450418, "grad_norm": 1.4969208240509033, "learning_rate": 9.698795016362417e-06, "loss": 0.7731, "step": 3830 }, { "epoch": 0.6888429380562798, "grad_norm": 1.5699636936187744, "learning_rate": 9.698595890154051e-06, "loss": 0.8227, "step": 3831 }, { "epoch": 0.6890227456621415, "grad_norm": 1.5886718034744263, "learning_rate": 9.698396700191908e-06, "loss": 0.8353, "step": 3832 }, { "epoch": 0.6892025532680033, "grad_norm": 1.589953064918518, "learning_rate": 9.698197446478683e-06, "loss": 0.7813, "step": 3833 }, { "epoch": 0.689382360873865, "grad_norm": 1.5525542497634888, "learning_rate": 9.697998129017086e-06, "loss": 0.8719, "step": 3834 }, { "epoch": 0.6895621684797267, "grad_norm": 1.49051034450531, "learning_rate": 9.697798747809817e-06, "loss": 0.8634, "step": 3835 }, { "epoch": 0.6897419760855884, "grad_norm": 2.2491109371185303, "learning_rate": 9.697599302859584e-06, "loss": 0.7645, "step": 3836 }, { "epoch": 0.6899217836914502, "grad_norm": 1.5038068294525146, "learning_rate": 9.697399794169091e-06, "loss": 0.7879, "step": 3837 }, { "epoch": 0.6901015912973119, "grad_norm": 1.1356502771377563, "learning_rate": 9.697200221741048e-06, "loss": 1.046, "step": 3838 }, { "epoch": 0.6902813989031736, "grad_norm": 1.3360272645950317, "learning_rate": 9.69700058557816e-06, "loss": 0.76, "step": 3839 }, { "epoch": 0.6904612065090353, "grad_norm": 1.5763764381408691, "learning_rate": 9.696800885683139e-06, "loss": 0.8729, "step": 3840 }, { "epoch": 0.6906410141148971, "grad_norm": 1.5017033815383911, "learning_rate": 9.69660112205869e-06, "loss": 0.7897, "step": 3841 }, { "epoch": 0.6908208217207588, "grad_norm": 1.4676319360733032, "learning_rate": 9.69640129470753e-06, "loss": 0.8014, "step": 3842 }, { "epoch": 0.6910006293266205, "grad_norm": 1.5506401062011719, "learning_rate": 9.696201403632363e-06, "loss": 0.8222, "step": 3843 }, { "epoch": 0.6911804369324822, "grad_norm": 1.4286770820617676, "learning_rate": 9.696001448835907e-06, "loss": 0.8247, "step": 3844 }, { "epoch": 0.691360244538344, "grad_norm": 1.012021780014038, "learning_rate": 9.695801430320875e-06, "loss": 1.0224, "step": 3845 }, { "epoch": 0.6915400521442057, "grad_norm": 1.0586848258972168, "learning_rate": 9.695601348089975e-06, "loss": 1.0226, "step": 3846 }, { "epoch": 0.6917198597500674, "grad_norm": 1.477077841758728, "learning_rate": 9.695401202145929e-06, "loss": 0.8153, "step": 3847 }, { "epoch": 0.6918996673559291, "grad_norm": 1.555341124534607, "learning_rate": 9.695200992491449e-06, "loss": 0.8252, "step": 3848 }, { "epoch": 0.6920794749617909, "grad_norm": 1.5079326629638672, "learning_rate": 9.695000719129252e-06, "loss": 0.8001, "step": 3849 }, { "epoch": 0.6922592825676526, "grad_norm": 1.2249188423156738, "learning_rate": 9.694800382062055e-06, "loss": 1.0523, "step": 3850 }, { "epoch": 0.6924390901735143, "grad_norm": 1.7538155317306519, "learning_rate": 9.694599981292578e-06, "loss": 0.8335, "step": 3851 }, { "epoch": 0.692618897779376, "grad_norm": 1.4939470291137695, "learning_rate": 9.69439951682354e-06, "loss": 0.8066, "step": 3852 }, { "epoch": 0.6927987053852378, "grad_norm": 1.6972206830978394, "learning_rate": 9.69419898865766e-06, "loss": 0.8568, "step": 3853 }, { "epoch": 0.6929785129910995, "grad_norm": 1.4985077381134033, "learning_rate": 9.693998396797656e-06, "loss": 0.7781, "step": 3854 }, { "epoch": 0.6931583205969613, "grad_norm": 1.5280253887176514, "learning_rate": 9.693797741246256e-06, "loss": 0.8101, "step": 3855 }, { "epoch": 0.693338128202823, "grad_norm": 1.4389816522598267, "learning_rate": 9.693597022006179e-06, "loss": 0.8387, "step": 3856 }, { "epoch": 0.6935179358086847, "grad_norm": 1.1539533138275146, "learning_rate": 9.69339623908015e-06, "loss": 1.0303, "step": 3857 }, { "epoch": 0.6936977434145465, "grad_norm": 1.4939353466033936, "learning_rate": 9.69319539247089e-06, "loss": 0.8055, "step": 3858 }, { "epoch": 0.6938775510204082, "grad_norm": 1.4743566513061523, "learning_rate": 9.692994482181129e-06, "loss": 0.8003, "step": 3859 }, { "epoch": 0.6940573586262699, "grad_norm": 1.514685869216919, "learning_rate": 9.692793508213589e-06, "loss": 0.719, "step": 3860 }, { "epoch": 0.6942371662321316, "grad_norm": 1.4669476747512817, "learning_rate": 9.692592470571001e-06, "loss": 0.8235, "step": 3861 }, { "epoch": 0.6944169738379934, "grad_norm": 1.6765168905258179, "learning_rate": 9.692391369256088e-06, "loss": 0.7815, "step": 3862 }, { "epoch": 0.6945967814438551, "grad_norm": 1.4503298997879028, "learning_rate": 9.692190204271581e-06, "loss": 0.8256, "step": 3863 }, { "epoch": 0.6947765890497168, "grad_norm": 1.4447766542434692, "learning_rate": 9.691988975620213e-06, "loss": 0.8742, "step": 3864 }, { "epoch": 0.6949563966555785, "grad_norm": 1.8020964860916138, "learning_rate": 9.691787683304708e-06, "loss": 0.8311, "step": 3865 }, { "epoch": 0.6951362042614403, "grad_norm": 1.559615135192871, "learning_rate": 9.6915863273278e-06, "loss": 0.8176, "step": 3866 }, { "epoch": 0.695316011867302, "grad_norm": 1.4354450702667236, "learning_rate": 9.691384907692224e-06, "loss": 0.8049, "step": 3867 }, { "epoch": 0.6954958194731637, "grad_norm": 2.655728816986084, "learning_rate": 9.69118342440071e-06, "loss": 0.8295, "step": 3868 }, { "epoch": 0.6956756270790254, "grad_norm": 1.5704988241195679, "learning_rate": 9.690981877455991e-06, "loss": 0.8309, "step": 3869 }, { "epoch": 0.6958554346848872, "grad_norm": 1.4965018033981323, "learning_rate": 9.690780266860804e-06, "loss": 0.8197, "step": 3870 }, { "epoch": 0.6960352422907489, "grad_norm": 1.52598237991333, "learning_rate": 9.690578592617884e-06, "loss": 0.8356, "step": 3871 }, { "epoch": 0.6962150498966106, "grad_norm": 1.5402073860168457, "learning_rate": 9.690376854729967e-06, "loss": 0.847, "step": 3872 }, { "epoch": 0.6963948575024723, "grad_norm": 1.533864974975586, "learning_rate": 9.690175053199789e-06, "loss": 0.7397, "step": 3873 }, { "epoch": 0.6965746651083341, "grad_norm": 1.513873815536499, "learning_rate": 9.689973188030091e-06, "loss": 0.8277, "step": 3874 }, { "epoch": 0.6967544727141958, "grad_norm": 1.6302502155303955, "learning_rate": 9.68977125922361e-06, "loss": 0.9012, "step": 3875 }, { "epoch": 0.6969342803200576, "grad_norm": 1.1488949060440063, "learning_rate": 9.68956926678309e-06, "loss": 1.0862, "step": 3876 }, { "epoch": 0.6971140879259192, "grad_norm": 0.9801039099693298, "learning_rate": 9.689367210711264e-06, "loss": 1.0308, "step": 3877 }, { "epoch": 0.697293895531781, "grad_norm": 1.0093413591384888, "learning_rate": 9.689165091010881e-06, "loss": 0.9804, "step": 3878 }, { "epoch": 0.6974737031376427, "grad_norm": 1.4136466979980469, "learning_rate": 9.688962907684678e-06, "loss": 0.8039, "step": 3879 }, { "epoch": 0.6976535107435045, "grad_norm": 1.3970472812652588, "learning_rate": 9.688760660735403e-06, "loss": 0.871, "step": 3880 }, { "epoch": 0.6978333183493661, "grad_norm": 1.5846259593963623, "learning_rate": 9.688558350165798e-06, "loss": 0.7487, "step": 3881 }, { "epoch": 0.6980131259552279, "grad_norm": 1.169155478477478, "learning_rate": 9.688355975978608e-06, "loss": 1.0702, "step": 3882 }, { "epoch": 0.6981929335610896, "grad_norm": 1.5168559551239014, "learning_rate": 9.688153538176577e-06, "loss": 0.8425, "step": 3883 }, { "epoch": 0.6983727411669514, "grad_norm": 1.6443848609924316, "learning_rate": 9.687951036762457e-06, "loss": 0.8063, "step": 3884 }, { "epoch": 0.6985525487728131, "grad_norm": 1.0884569883346558, "learning_rate": 9.687748471738991e-06, "loss": 1.024, "step": 3885 }, { "epoch": 0.6987323563786748, "grad_norm": 1.6193482875823975, "learning_rate": 9.68754584310893e-06, "loss": 0.7659, "step": 3886 }, { "epoch": 0.6989121639845366, "grad_norm": 1.4984506368637085, "learning_rate": 9.687343150875022e-06, "loss": 0.8403, "step": 3887 }, { "epoch": 0.6990919715903983, "grad_norm": 1.493816614151001, "learning_rate": 9.687140395040017e-06, "loss": 0.8079, "step": 3888 }, { "epoch": 0.69927177919626, "grad_norm": 1.594218134880066, "learning_rate": 9.68693757560667e-06, "loss": 0.8283, "step": 3889 }, { "epoch": 0.6994515868021217, "grad_norm": 1.1232703924179077, "learning_rate": 9.686734692577727e-06, "loss": 1.0581, "step": 3890 }, { "epoch": 0.6996313944079835, "grad_norm": 1.5125117301940918, "learning_rate": 9.686531745955944e-06, "loss": 0.7799, "step": 3891 }, { "epoch": 0.6998112020138452, "grad_norm": 1.5630152225494385, "learning_rate": 9.686328735744077e-06, "loss": 0.8097, "step": 3892 }, { "epoch": 0.699991009619707, "grad_norm": 1.5940760374069214, "learning_rate": 9.686125661944876e-06, "loss": 0.855, "step": 3893 }, { "epoch": 0.7001708172255686, "grad_norm": 1.6760796308517456, "learning_rate": 9.6859225245611e-06, "loss": 0.8667, "step": 3894 }, { "epoch": 0.7003506248314304, "grad_norm": 1.5017774105072021, "learning_rate": 9.685719323595503e-06, "loss": 0.7297, "step": 3895 }, { "epoch": 0.7005304324372921, "grad_norm": 1.4861773252487183, "learning_rate": 9.685516059050844e-06, "loss": 0.7584, "step": 3896 }, { "epoch": 0.7007102400431539, "grad_norm": 1.5130248069763184, "learning_rate": 9.685312730929878e-06, "loss": 0.8418, "step": 3897 }, { "epoch": 0.7008900476490155, "grad_norm": 1.4344689846038818, "learning_rate": 9.685109339235368e-06, "loss": 0.8513, "step": 3898 }, { "epoch": 0.7010698552548773, "grad_norm": 1.5749773979187012, "learning_rate": 9.684905883970072e-06, "loss": 0.82, "step": 3899 }, { "epoch": 0.701249662860739, "grad_norm": 1.6305058002471924, "learning_rate": 9.684702365136748e-06, "loss": 0.8447, "step": 3900 }, { "epoch": 0.7014294704666008, "grad_norm": 1.5150883197784424, "learning_rate": 9.684498782738162e-06, "loss": 0.7536, "step": 3901 }, { "epoch": 0.7016092780724624, "grad_norm": 1.553747534751892, "learning_rate": 9.684295136777074e-06, "loss": 0.8435, "step": 3902 }, { "epoch": 0.7017890856783242, "grad_norm": 1.6427017450332642, "learning_rate": 9.684091427256247e-06, "loss": 0.9006, "step": 3903 }, { "epoch": 0.7019688932841859, "grad_norm": 1.5804978609085083, "learning_rate": 9.683887654178446e-06, "loss": 0.8514, "step": 3904 }, { "epoch": 0.7021487008900477, "grad_norm": 1.5993205308914185, "learning_rate": 9.683683817546435e-06, "loss": 0.8006, "step": 3905 }, { "epoch": 0.7023285084959093, "grad_norm": 1.486994743347168, "learning_rate": 9.683479917362981e-06, "loss": 0.7757, "step": 3906 }, { "epoch": 0.7025083161017711, "grad_norm": 1.444880723953247, "learning_rate": 9.683275953630849e-06, "loss": 0.8572, "step": 3907 }, { "epoch": 0.7026881237076328, "grad_norm": 1.5243839025497437, "learning_rate": 9.683071926352807e-06, "loss": 0.7685, "step": 3908 }, { "epoch": 0.7028679313134946, "grad_norm": 1.4603352546691895, "learning_rate": 9.682867835531624e-06, "loss": 0.8138, "step": 3909 }, { "epoch": 0.7030477389193562, "grad_norm": 1.5829975605010986, "learning_rate": 9.682663681170071e-06, "loss": 0.8531, "step": 3910 }, { "epoch": 0.703227546525218, "grad_norm": 1.5113974809646606, "learning_rate": 9.682459463270913e-06, "loss": 0.8126, "step": 3911 }, { "epoch": 0.7034073541310798, "grad_norm": 1.510613203048706, "learning_rate": 9.682255181836926e-06, "loss": 0.8547, "step": 3912 }, { "epoch": 0.7035871617369415, "grad_norm": 1.648880958557129, "learning_rate": 9.68205083687088e-06, "loss": 0.8097, "step": 3913 }, { "epoch": 0.7037669693428033, "grad_norm": 1.5441553592681885, "learning_rate": 9.681846428375548e-06, "loss": 0.7827, "step": 3914 }, { "epoch": 0.7039467769486649, "grad_norm": 1.143916368484497, "learning_rate": 9.6816419563537e-06, "loss": 1.0273, "step": 3915 }, { "epoch": 0.7041265845545267, "grad_norm": 1.7623833417892456, "learning_rate": 9.681437420808118e-06, "loss": 0.8568, "step": 3916 }, { "epoch": 0.7043063921603884, "grad_norm": 1.526855707168579, "learning_rate": 9.68123282174157e-06, "loss": 0.9267, "step": 3917 }, { "epoch": 0.7044861997662502, "grad_norm": 1.5600823163986206, "learning_rate": 9.681028159156836e-06, "loss": 0.8429, "step": 3918 }, { "epoch": 0.7046660073721118, "grad_norm": 1.5509659051895142, "learning_rate": 9.680823433056692e-06, "loss": 0.8111, "step": 3919 }, { "epoch": 0.7048458149779736, "grad_norm": 1.5968208312988281, "learning_rate": 9.680618643443916e-06, "loss": 0.8013, "step": 3920 }, { "epoch": 0.7050256225838353, "grad_norm": 1.6390398740768433, "learning_rate": 9.680413790321286e-06, "loss": 0.8476, "step": 3921 }, { "epoch": 0.7052054301896971, "grad_norm": 1.443353295326233, "learning_rate": 9.680208873691584e-06, "loss": 0.8376, "step": 3922 }, { "epoch": 0.7053852377955587, "grad_norm": 1.1895036697387695, "learning_rate": 9.680003893557587e-06, "loss": 1.0391, "step": 3923 }, { "epoch": 0.7055650454014205, "grad_norm": 1.6412904262542725, "learning_rate": 9.679798849922078e-06, "loss": 0.7538, "step": 3924 }, { "epoch": 0.7057448530072822, "grad_norm": 1.5063642263412476, "learning_rate": 9.679593742787839e-06, "loss": 0.7988, "step": 3925 }, { "epoch": 0.705924660613144, "grad_norm": 1.585739254951477, "learning_rate": 9.679388572157654e-06, "loss": 0.8692, "step": 3926 }, { "epoch": 0.7061044682190056, "grad_norm": 1.5941972732543945, "learning_rate": 9.679183338034306e-06, "loss": 0.7832, "step": 3927 }, { "epoch": 0.7062842758248674, "grad_norm": 1.5077764987945557, "learning_rate": 9.67897804042058e-06, "loss": 0.7832, "step": 3928 }, { "epoch": 0.7064640834307291, "grad_norm": 1.4042909145355225, "learning_rate": 9.678772679319261e-06, "loss": 0.7259, "step": 3929 }, { "epoch": 0.7066438910365909, "grad_norm": 1.0545061826705933, "learning_rate": 9.678567254733135e-06, "loss": 1.0596, "step": 3930 }, { "epoch": 0.7068236986424525, "grad_norm": 1.1082806587219238, "learning_rate": 9.678361766664993e-06, "loss": 1.0202, "step": 3931 }, { "epoch": 0.7070035062483143, "grad_norm": 1.5858904123306274, "learning_rate": 9.678156215117616e-06, "loss": 0.8791, "step": 3932 }, { "epoch": 0.707183313854176, "grad_norm": 1.724012851715088, "learning_rate": 9.677950600093801e-06, "loss": 0.8969, "step": 3933 }, { "epoch": 0.7073631214600378, "grad_norm": 1.5840269327163696, "learning_rate": 9.677744921596334e-06, "loss": 0.7973, "step": 3934 }, { "epoch": 0.7075429290658994, "grad_norm": 1.6977275609970093, "learning_rate": 9.677539179628005e-06, "loss": 0.854, "step": 3935 }, { "epoch": 0.7077227366717612, "grad_norm": 1.417068600654602, "learning_rate": 9.677333374191609e-06, "loss": 0.8141, "step": 3936 }, { "epoch": 0.7079025442776229, "grad_norm": 1.548336386680603, "learning_rate": 9.677127505289935e-06, "loss": 0.7778, "step": 3937 }, { "epoch": 0.7080823518834847, "grad_norm": 1.5634733438491821, "learning_rate": 9.676921572925777e-06, "loss": 0.8238, "step": 3938 }, { "epoch": 0.7082621594893465, "grad_norm": 1.6133602857589722, "learning_rate": 9.676715577101932e-06, "loss": 0.8712, "step": 3939 }, { "epoch": 0.7084419670952081, "grad_norm": 1.4506045579910278, "learning_rate": 9.676509517821193e-06, "loss": 0.8121, "step": 3940 }, { "epoch": 0.7086217747010699, "grad_norm": 1.2593083381652832, "learning_rate": 9.676303395086356e-06, "loss": 1.0194, "step": 3941 }, { "epoch": 0.7088015823069316, "grad_norm": 1.5772409439086914, "learning_rate": 9.676097208900214e-06, "loss": 0.8375, "step": 3942 }, { "epoch": 0.7089813899127934, "grad_norm": 1.4583796262741089, "learning_rate": 9.675890959265573e-06, "loss": 0.8237, "step": 3943 }, { "epoch": 0.709161197518655, "grad_norm": 1.5920201539993286, "learning_rate": 9.675684646185226e-06, "loss": 0.8687, "step": 3944 }, { "epoch": 0.7093410051245168, "grad_norm": 1.5393844842910767, "learning_rate": 9.675478269661974e-06, "loss": 0.8165, "step": 3945 }, { "epoch": 0.7095208127303785, "grad_norm": 1.636012077331543, "learning_rate": 9.675271829698616e-06, "loss": 0.8348, "step": 3946 }, { "epoch": 0.7097006203362403, "grad_norm": 1.5243152379989624, "learning_rate": 9.675065326297953e-06, "loss": 0.7809, "step": 3947 }, { "epoch": 0.7098804279421019, "grad_norm": 1.653577446937561, "learning_rate": 9.674858759462788e-06, "loss": 0.7615, "step": 3948 }, { "epoch": 0.7100602355479637, "grad_norm": 1.4644925594329834, "learning_rate": 9.674652129195926e-06, "loss": 0.8923, "step": 3949 }, { "epoch": 0.7102400431538254, "grad_norm": 1.5927506685256958, "learning_rate": 9.674445435500167e-06, "loss": 0.828, "step": 3950 }, { "epoch": 0.7104198507596872, "grad_norm": 1.4796292781829834, "learning_rate": 9.674238678378317e-06, "loss": 0.7001, "step": 3951 }, { "epoch": 0.7105996583655488, "grad_norm": 1.2974189519882202, "learning_rate": 9.674031857833179e-06, "loss": 1.0772, "step": 3952 }, { "epoch": 0.7107794659714106, "grad_norm": 1.8035025596618652, "learning_rate": 9.673824973867564e-06, "loss": 0.7935, "step": 3953 }, { "epoch": 0.7109592735772723, "grad_norm": 1.5816878080368042, "learning_rate": 9.673618026484277e-06, "loss": 0.8574, "step": 3954 }, { "epoch": 0.7111390811831341, "grad_norm": 1.3898398876190186, "learning_rate": 9.673411015686125e-06, "loss": 0.753, "step": 3955 }, { "epoch": 0.7113188887889957, "grad_norm": 1.5496283769607544, "learning_rate": 9.673203941475917e-06, "loss": 0.8568, "step": 3956 }, { "epoch": 0.7114986963948575, "grad_norm": 1.4997425079345703, "learning_rate": 9.672996803856465e-06, "loss": 0.7996, "step": 3957 }, { "epoch": 0.7116785040007192, "grad_norm": 1.4648672342300415, "learning_rate": 9.672789602830579e-06, "loss": 0.7523, "step": 3958 }, { "epoch": 0.711858311606581, "grad_norm": 1.4203797578811646, "learning_rate": 9.672582338401067e-06, "loss": 0.8002, "step": 3959 }, { "epoch": 0.7120381192124426, "grad_norm": 1.553547978401184, "learning_rate": 9.672375010570745e-06, "loss": 0.8453, "step": 3960 }, { "epoch": 0.7122179268183044, "grad_norm": 1.0750408172607422, "learning_rate": 9.672167619342422e-06, "loss": 1.0368, "step": 3961 }, { "epoch": 0.7123977344241661, "grad_norm": 1.5276696681976318, "learning_rate": 9.671960164718918e-06, "loss": 0.7888, "step": 3962 }, { "epoch": 0.7125775420300279, "grad_norm": 1.4908136129379272, "learning_rate": 9.671752646703045e-06, "loss": 0.8231, "step": 3963 }, { "epoch": 0.7127573496358895, "grad_norm": 1.6784700155258179, "learning_rate": 9.671545065297618e-06, "loss": 0.7683, "step": 3964 }, { "epoch": 0.7129371572417513, "grad_norm": 1.4486477375030518, "learning_rate": 9.671337420505454e-06, "loss": 0.7719, "step": 3965 }, { "epoch": 0.713116964847613, "grad_norm": 1.1116424798965454, "learning_rate": 9.67112971232937e-06, "loss": 1.0278, "step": 3966 }, { "epoch": 0.7132967724534748, "grad_norm": 1.9403681755065918, "learning_rate": 9.670921940772186e-06, "loss": 0.8531, "step": 3967 }, { "epoch": 0.7134765800593366, "grad_norm": 1.4772230386734009, "learning_rate": 9.67071410583672e-06, "loss": 0.8681, "step": 3968 }, { "epoch": 0.7136563876651982, "grad_norm": 1.0437074899673462, "learning_rate": 9.67050620752579e-06, "loss": 1.0625, "step": 3969 }, { "epoch": 0.71383619527106, "grad_norm": 1.5701440572738647, "learning_rate": 9.670298245842222e-06, "loss": 0.7933, "step": 3970 }, { "epoch": 0.7140160028769217, "grad_norm": 1.5906176567077637, "learning_rate": 9.670090220788835e-06, "loss": 0.8323, "step": 3971 }, { "epoch": 0.7141958104827835, "grad_norm": 1.5524063110351562, "learning_rate": 9.669882132368449e-06, "loss": 0.8728, "step": 3972 }, { "epoch": 0.7143756180886451, "grad_norm": 1.1502883434295654, "learning_rate": 9.669673980583891e-06, "loss": 1.0187, "step": 3973 }, { "epoch": 0.7145554256945069, "grad_norm": 1.1816160678863525, "learning_rate": 9.669465765437986e-06, "loss": 1.0398, "step": 3974 }, { "epoch": 0.7147352333003686, "grad_norm": 1.0872254371643066, "learning_rate": 9.669257486933556e-06, "loss": 1.037, "step": 3975 }, { "epoch": 0.7149150409062304, "grad_norm": 1.5088263750076294, "learning_rate": 9.669049145073428e-06, "loss": 0.7646, "step": 3976 }, { "epoch": 0.715094848512092, "grad_norm": 1.5776103734970093, "learning_rate": 9.66884073986043e-06, "loss": 0.799, "step": 3977 }, { "epoch": 0.7152746561179538, "grad_norm": 0.9968087673187256, "learning_rate": 9.66863227129739e-06, "loss": 1.0489, "step": 3978 }, { "epoch": 0.7154544637238155, "grad_norm": 1.0617097616195679, "learning_rate": 9.668423739387137e-06, "loss": 1.0414, "step": 3979 }, { "epoch": 0.7156342713296773, "grad_norm": 1.6849948167800903, "learning_rate": 9.668215144132498e-06, "loss": 0.8293, "step": 3980 }, { "epoch": 0.7158140789355389, "grad_norm": 1.4508168697357178, "learning_rate": 9.668006485536305e-06, "loss": 0.8587, "step": 3981 }, { "epoch": 0.7159938865414007, "grad_norm": 1.0718752145767212, "learning_rate": 9.667797763601387e-06, "loss": 1.0427, "step": 3982 }, { "epoch": 0.7161736941472624, "grad_norm": 1.5352121591567993, "learning_rate": 9.667588978330582e-06, "loss": 0.7795, "step": 3983 }, { "epoch": 0.7163535017531242, "grad_norm": 1.4936171770095825, "learning_rate": 9.667380129726716e-06, "loss": 0.7891, "step": 3984 }, { "epoch": 0.7165333093589858, "grad_norm": 1.7454588413238525, "learning_rate": 9.667171217792628e-06, "loss": 0.8248, "step": 3985 }, { "epoch": 0.7167131169648476, "grad_norm": 1.6414941549301147, "learning_rate": 9.666962242531149e-06, "loss": 0.8278, "step": 3986 }, { "epoch": 0.7168929245707093, "grad_norm": 1.7028820514678955, "learning_rate": 9.666753203945117e-06, "loss": 0.8298, "step": 3987 }, { "epoch": 0.7170727321765711, "grad_norm": 1.4698710441589355, "learning_rate": 9.666544102037367e-06, "loss": 0.785, "step": 3988 }, { "epoch": 0.7172525397824328, "grad_norm": 1.491689920425415, "learning_rate": 9.666334936810737e-06, "loss": 0.7514, "step": 3989 }, { "epoch": 0.7174323473882945, "grad_norm": 1.072808027267456, "learning_rate": 9.666125708268063e-06, "loss": 0.9959, "step": 3990 }, { "epoch": 0.7176121549941562, "grad_norm": 1.4740791320800781, "learning_rate": 9.665916416412189e-06, "loss": 0.8141, "step": 3991 }, { "epoch": 0.717791962600018, "grad_norm": 1.0312591791152954, "learning_rate": 9.66570706124595e-06, "loss": 1.0823, "step": 3992 }, { "epoch": 0.7179717702058797, "grad_norm": 1.4776147603988647, "learning_rate": 9.665497642772188e-06, "loss": 0.7779, "step": 3993 }, { "epoch": 0.7181515778117414, "grad_norm": 1.5337508916854858, "learning_rate": 9.665288160993744e-06, "loss": 0.7763, "step": 3994 }, { "epoch": 0.7183313854176032, "grad_norm": 1.450807809829712, "learning_rate": 9.665078615913463e-06, "loss": 0.7101, "step": 3995 }, { "epoch": 0.7185111930234649, "grad_norm": 1.4487980604171753, "learning_rate": 9.664869007534185e-06, "loss": 0.7923, "step": 3996 }, { "epoch": 0.7186910006293267, "grad_norm": 1.596985936164856, "learning_rate": 9.664659335858755e-06, "loss": 0.8186, "step": 3997 }, { "epoch": 0.7188708082351883, "grad_norm": 1.5924168825149536, "learning_rate": 9.66444960089002e-06, "loss": 0.8845, "step": 3998 }, { "epoch": 0.7190506158410501, "grad_norm": 2.13118052482605, "learning_rate": 9.664239802630824e-06, "loss": 0.8207, "step": 3999 }, { "epoch": 0.7192304234469118, "grad_norm": 1.453985333442688, "learning_rate": 9.664029941084013e-06, "loss": 0.7987, "step": 4000 }, { "epoch": 0.7192304234469118, "eval_loss": 0.8341596126556396, "eval_runtime": 148.698, "eval_samples_per_second": 96.719, "eval_steps_per_second": 1.513, "step": 4000 }, { "epoch": 0.7194102310527736, "grad_norm": 1.2106252908706665, "learning_rate": 9.663820016252436e-06, "loss": 0.995, "step": 4001 }, { "epoch": 0.7195900386586352, "grad_norm": 1.6455057859420776, "learning_rate": 9.663610028138942e-06, "loss": 0.8428, "step": 4002 }, { "epoch": 0.719769846264497, "grad_norm": 1.5479121208190918, "learning_rate": 9.663399976746379e-06, "loss": 0.8896, "step": 4003 }, { "epoch": 0.7199496538703587, "grad_norm": 1.6265536546707153, "learning_rate": 9.663189862077595e-06, "loss": 0.9141, "step": 4004 }, { "epoch": 0.7201294614762205, "grad_norm": 0.99213707447052, "learning_rate": 9.662979684135447e-06, "loss": 1.0348, "step": 4005 }, { "epoch": 0.7203092690820821, "grad_norm": 1.472737431526184, "learning_rate": 9.66276944292278e-06, "loss": 0.8237, "step": 4006 }, { "epoch": 0.7204890766879439, "grad_norm": 1.530456304550171, "learning_rate": 9.66255913844245e-06, "loss": 0.8605, "step": 4007 }, { "epoch": 0.7206688842938056, "grad_norm": 1.5158419609069824, "learning_rate": 9.662348770697312e-06, "loss": 0.7893, "step": 4008 }, { "epoch": 0.7208486918996674, "grad_norm": 1.7519819736480713, "learning_rate": 9.66213833969022e-06, "loss": 0.819, "step": 4009 }, { "epoch": 0.721028499505529, "grad_norm": 1.491977572441101, "learning_rate": 9.661927845424025e-06, "loss": 0.8153, "step": 4010 }, { "epoch": 0.7212083071113908, "grad_norm": 1.422477126121521, "learning_rate": 9.661717287901587e-06, "loss": 0.7378, "step": 4011 }, { "epoch": 0.7213881147172525, "grad_norm": 1.5216100215911865, "learning_rate": 9.661506667125764e-06, "loss": 0.7814, "step": 4012 }, { "epoch": 0.7215679223231143, "grad_norm": 1.4926321506500244, "learning_rate": 9.66129598309941e-06, "loss": 0.8521, "step": 4013 }, { "epoch": 0.721747729928976, "grad_norm": 1.4402446746826172, "learning_rate": 9.661085235825387e-06, "loss": 0.8185, "step": 4014 }, { "epoch": 0.7219275375348377, "grad_norm": 1.5578829050064087, "learning_rate": 9.660874425306552e-06, "loss": 0.7174, "step": 4015 }, { "epoch": 0.7221073451406994, "grad_norm": 1.52943754196167, "learning_rate": 9.660663551545769e-06, "loss": 0.8965, "step": 4016 }, { "epoch": 0.7222871527465612, "grad_norm": 1.1890555620193481, "learning_rate": 9.660452614545895e-06, "loss": 1.0421, "step": 4017 }, { "epoch": 0.7224669603524229, "grad_norm": 1.1469296216964722, "learning_rate": 9.660241614309796e-06, "loss": 1.0497, "step": 4018 }, { "epoch": 0.7226467679582846, "grad_norm": 1.5717893838882446, "learning_rate": 9.660030550840331e-06, "loss": 0.8217, "step": 4019 }, { "epoch": 0.7228265755641463, "grad_norm": 1.4599698781967163, "learning_rate": 9.659819424140368e-06, "loss": 0.7857, "step": 4020 }, { "epoch": 0.7230063831700081, "grad_norm": 1.5323486328125, "learning_rate": 9.659608234212769e-06, "loss": 0.858, "step": 4021 }, { "epoch": 0.7231861907758698, "grad_norm": 1.519055962562561, "learning_rate": 9.659396981060399e-06, "loss": 0.8349, "step": 4022 }, { "epoch": 0.7233659983817315, "grad_norm": 1.5307501554489136, "learning_rate": 9.659185664686127e-06, "loss": 0.7867, "step": 4023 }, { "epoch": 0.7235458059875933, "grad_norm": 6.566014766693115, "learning_rate": 9.658974285092819e-06, "loss": 0.8018, "step": 4024 }, { "epoch": 0.723725613593455, "grad_norm": 1.450638771057129, "learning_rate": 9.658762842283343e-06, "loss": 0.7733, "step": 4025 }, { "epoch": 0.7239054211993168, "grad_norm": 1.4713114500045776, "learning_rate": 9.65855133626057e-06, "loss": 0.8371, "step": 4026 }, { "epoch": 0.7240852288051784, "grad_norm": 1.47627592086792, "learning_rate": 9.658339767027365e-06, "loss": 0.8385, "step": 4027 }, { "epoch": 0.7242650364110402, "grad_norm": 1.5781368017196655, "learning_rate": 9.658128134586601e-06, "loss": 0.8638, "step": 4028 }, { "epoch": 0.7244448440169019, "grad_norm": 1.5369395017623901, "learning_rate": 9.657916438941154e-06, "loss": 0.8097, "step": 4029 }, { "epoch": 0.7246246516227637, "grad_norm": 1.5505834817886353, "learning_rate": 9.657704680093892e-06, "loss": 0.8147, "step": 4030 }, { "epoch": 0.7248044592286254, "grad_norm": 1.5153943300247192, "learning_rate": 9.657492858047688e-06, "loss": 0.7761, "step": 4031 }, { "epoch": 0.7249842668344871, "grad_norm": 1.4609787464141846, "learning_rate": 9.657280972805416e-06, "loss": 0.7988, "step": 4032 }, { "epoch": 0.7251640744403488, "grad_norm": 1.4329712390899658, "learning_rate": 9.657069024369954e-06, "loss": 1.0881, "step": 4033 }, { "epoch": 0.7253438820462106, "grad_norm": 1.4245306253433228, "learning_rate": 9.656857012744175e-06, "loss": 0.7827, "step": 4034 }, { "epoch": 0.7255236896520723, "grad_norm": 1.5177295207977295, "learning_rate": 9.656644937930957e-06, "loss": 0.7607, "step": 4035 }, { "epoch": 0.725703497257934, "grad_norm": 2.9389455318450928, "learning_rate": 9.656432799933178e-06, "loss": 0.7981, "step": 4036 }, { "epoch": 0.7258833048637957, "grad_norm": 1.6119611263275146, "learning_rate": 9.656220598753717e-06, "loss": 0.7843, "step": 4037 }, { "epoch": 0.7260631124696575, "grad_norm": 1.6058622598648071, "learning_rate": 9.656008334395449e-06, "loss": 0.8754, "step": 4038 }, { "epoch": 0.7262429200755192, "grad_norm": 1.491408109664917, "learning_rate": 9.655796006861257e-06, "loss": 0.8273, "step": 4039 }, { "epoch": 0.7264227276813809, "grad_norm": 1.538915753364563, "learning_rate": 9.655583616154026e-06, "loss": 0.7195, "step": 4040 }, { "epoch": 0.7266025352872426, "grad_norm": 1.4869152307510376, "learning_rate": 9.655371162276632e-06, "loss": 0.6963, "step": 4041 }, { "epoch": 0.7267823428931044, "grad_norm": 1.4795942306518555, "learning_rate": 9.65515864523196e-06, "loss": 0.7435, "step": 4042 }, { "epoch": 0.7269621504989661, "grad_norm": 1.4847387075424194, "learning_rate": 9.654946065022891e-06, "loss": 0.7937, "step": 4043 }, { "epoch": 0.7271419581048278, "grad_norm": 1.560849905014038, "learning_rate": 9.654733421652316e-06, "loss": 0.7967, "step": 4044 }, { "epoch": 0.7273217657106895, "grad_norm": 1.696777582168579, "learning_rate": 9.654520715123114e-06, "loss": 0.7947, "step": 4045 }, { "epoch": 0.7275015733165513, "grad_norm": 1.5080698728561401, "learning_rate": 9.654307945438173e-06, "loss": 0.7903, "step": 4046 }, { "epoch": 0.727681380922413, "grad_norm": 1.4414411783218384, "learning_rate": 9.654095112600382e-06, "loss": 0.7932, "step": 4047 }, { "epoch": 0.7278611885282747, "grad_norm": 1.4512460231781006, "learning_rate": 9.653882216612625e-06, "loss": 0.8053, "step": 4048 }, { "epoch": 0.7280409961341364, "grad_norm": 1.4525885581970215, "learning_rate": 9.653669257477793e-06, "loss": 0.7722, "step": 4049 }, { "epoch": 0.7282208037399982, "grad_norm": 1.4344356060028076, "learning_rate": 9.653456235198775e-06, "loss": 0.7604, "step": 4050 }, { "epoch": 0.72840061134586, "grad_norm": 1.504127025604248, "learning_rate": 9.653243149778465e-06, "loss": 0.7696, "step": 4051 }, { "epoch": 0.7285804189517217, "grad_norm": 1.491154432296753, "learning_rate": 9.653030001219747e-06, "loss": 0.7579, "step": 4052 }, { "epoch": 0.7287602265575834, "grad_norm": 1.5170090198516846, "learning_rate": 9.652816789525521e-06, "loss": 0.7844, "step": 4053 }, { "epoch": 0.7289400341634451, "grad_norm": 1.3269082307815552, "learning_rate": 9.652603514698674e-06, "loss": 1.0025, "step": 4054 }, { "epoch": 0.7291198417693069, "grad_norm": 1.105644941329956, "learning_rate": 9.652390176742103e-06, "loss": 1.0509, "step": 4055 }, { "epoch": 0.7292996493751686, "grad_norm": 1.1928398609161377, "learning_rate": 9.652176775658702e-06, "loss": 1.0083, "step": 4056 }, { "epoch": 0.7294794569810303, "grad_norm": 1.1175203323364258, "learning_rate": 9.651963311451366e-06, "loss": 1.0472, "step": 4057 }, { "epoch": 0.729659264586892, "grad_norm": 1.6092276573181152, "learning_rate": 9.651749784122992e-06, "loss": 0.8357, "step": 4058 }, { "epoch": 0.7298390721927538, "grad_norm": 1.5317940711975098, "learning_rate": 9.651536193676476e-06, "loss": 0.8122, "step": 4059 }, { "epoch": 0.7300188797986155, "grad_norm": 1.5653654336929321, "learning_rate": 9.65132254011472e-06, "loss": 0.8341, "step": 4060 }, { "epoch": 0.7301986874044772, "grad_norm": 1.2852815389633179, "learning_rate": 9.651108823440618e-06, "loss": 1.0634, "step": 4061 }, { "epoch": 0.7303784950103389, "grad_norm": 1.5236188173294067, "learning_rate": 9.650895043657073e-06, "loss": 0.8798, "step": 4062 }, { "epoch": 0.7305583026162007, "grad_norm": 1.1129343509674072, "learning_rate": 9.650681200766985e-06, "loss": 1.0585, "step": 4063 }, { "epoch": 0.7307381102220624, "grad_norm": 1.4134286642074585, "learning_rate": 9.650467294773254e-06, "loss": 0.8062, "step": 4064 }, { "epoch": 0.7309179178279241, "grad_norm": 1.3556615114212036, "learning_rate": 9.650253325678787e-06, "loss": 0.8188, "step": 4065 }, { "epoch": 0.7310977254337858, "grad_norm": 1.4607295989990234, "learning_rate": 9.650039293486482e-06, "loss": 0.8469, "step": 4066 }, { "epoch": 0.7312775330396476, "grad_norm": 1.4880503416061401, "learning_rate": 9.649825198199245e-06, "loss": 0.8702, "step": 4067 }, { "epoch": 0.7314573406455093, "grad_norm": 1.5301486253738403, "learning_rate": 9.649611039819981e-06, "loss": 0.8001, "step": 4068 }, { "epoch": 0.731637148251371, "grad_norm": 1.5008455514907837, "learning_rate": 9.649396818351597e-06, "loss": 0.8229, "step": 4069 }, { "epoch": 0.7318169558572327, "grad_norm": 1.5240315198898315, "learning_rate": 9.649182533796999e-06, "loss": 0.7706, "step": 4070 }, { "epoch": 0.7319967634630945, "grad_norm": 1.2492055892944336, "learning_rate": 9.648968186159093e-06, "loss": 1.0298, "step": 4071 }, { "epoch": 0.7321765710689562, "grad_norm": 1.430833339691162, "learning_rate": 9.64875377544079e-06, "loss": 0.7788, "step": 4072 }, { "epoch": 0.732356378674818, "grad_norm": 1.3955460786819458, "learning_rate": 9.648539301645e-06, "loss": 0.7976, "step": 4073 }, { "epoch": 0.7325361862806796, "grad_norm": 1.5446271896362305, "learning_rate": 9.648324764774628e-06, "loss": 0.8078, "step": 4074 }, { "epoch": 0.7327159938865414, "grad_norm": 1.5750716924667358, "learning_rate": 9.648110164832589e-06, "loss": 0.7504, "step": 4075 }, { "epoch": 0.7328958014924031, "grad_norm": 1.374068021774292, "learning_rate": 9.647895501821796e-06, "loss": 0.7086, "step": 4076 }, { "epoch": 0.7330756090982649, "grad_norm": 1.5436629056930542, "learning_rate": 9.647680775745156e-06, "loss": 0.7936, "step": 4077 }, { "epoch": 0.7332554167041266, "grad_norm": 1.7080423831939697, "learning_rate": 9.64746598660559e-06, "loss": 0.8302, "step": 4078 }, { "epoch": 0.7334352243099883, "grad_norm": 1.6526975631713867, "learning_rate": 9.647251134406007e-06, "loss": 0.8527, "step": 4079 }, { "epoch": 0.7336150319158501, "grad_norm": 1.4773167371749878, "learning_rate": 9.647036219149324e-06, "loss": 0.7905, "step": 4080 }, { "epoch": 0.7337948395217118, "grad_norm": 1.480013132095337, "learning_rate": 9.646821240838455e-06, "loss": 0.8206, "step": 4081 }, { "epoch": 0.7339746471275735, "grad_norm": 1.4991215467453003, "learning_rate": 9.646606199476323e-06, "loss": 0.7837, "step": 4082 }, { "epoch": 0.7341544547334352, "grad_norm": 1.5717053413391113, "learning_rate": 9.646391095065838e-06, "loss": 0.7825, "step": 4083 }, { "epoch": 0.734334262339297, "grad_norm": 1.596285343170166, "learning_rate": 9.646175927609925e-06, "loss": 0.8455, "step": 4084 }, { "epoch": 0.7345140699451587, "grad_norm": 1.5163697004318237, "learning_rate": 9.6459606971115e-06, "loss": 0.8561, "step": 4085 }, { "epoch": 0.7346938775510204, "grad_norm": 1.5020040273666382, "learning_rate": 9.645745403573486e-06, "loss": 0.809, "step": 4086 }, { "epoch": 0.7348736851568821, "grad_norm": 1.4662147760391235, "learning_rate": 9.645530046998802e-06, "loss": 0.8662, "step": 4087 }, { "epoch": 0.7350534927627439, "grad_norm": 1.5046043395996094, "learning_rate": 9.645314627390369e-06, "loss": 0.8411, "step": 4088 }, { "epoch": 0.7352333003686056, "grad_norm": 1.4683512449264526, "learning_rate": 9.645099144751113e-06, "loss": 0.8265, "step": 4089 }, { "epoch": 0.7354131079744674, "grad_norm": 1.5052133798599243, "learning_rate": 9.644883599083959e-06, "loss": 0.7596, "step": 4090 }, { "epoch": 0.735592915580329, "grad_norm": 1.4326080083847046, "learning_rate": 9.644667990391826e-06, "loss": 0.8579, "step": 4091 }, { "epoch": 0.7357727231861908, "grad_norm": 1.5468063354492188, "learning_rate": 9.644452318677645e-06, "loss": 0.7995, "step": 4092 }, { "epoch": 0.7359525307920525, "grad_norm": 1.4759025573730469, "learning_rate": 9.64423658394434e-06, "loss": 0.7579, "step": 4093 }, { "epoch": 0.7361323383979143, "grad_norm": 1.5368037223815918, "learning_rate": 9.644020786194837e-06, "loss": 0.7747, "step": 4094 }, { "epoch": 0.7363121460037759, "grad_norm": 1.5523985624313354, "learning_rate": 9.643804925432065e-06, "loss": 0.8134, "step": 4095 }, { "epoch": 0.7364919536096377, "grad_norm": 1.6786314249038696, "learning_rate": 9.643589001658955e-06, "loss": 0.8363, "step": 4096 }, { "epoch": 0.7366717612154994, "grad_norm": 1.1057149171829224, "learning_rate": 9.643373014878435e-06, "loss": 1.0258, "step": 4097 }, { "epoch": 0.7368515688213612, "grad_norm": 1.630057454109192, "learning_rate": 9.643156965093435e-06, "loss": 0.8435, "step": 4098 }, { "epoch": 0.7370313764272228, "grad_norm": 1.7355445623397827, "learning_rate": 9.642940852306888e-06, "loss": 0.7268, "step": 4099 }, { "epoch": 0.7372111840330846, "grad_norm": 1.6549136638641357, "learning_rate": 9.642724676521726e-06, "loss": 0.8526, "step": 4100 }, { "epoch": 0.7373909916389463, "grad_norm": 1.5947575569152832, "learning_rate": 9.642508437740882e-06, "loss": 0.7759, "step": 4101 }, { "epoch": 0.7375707992448081, "grad_norm": 1.4004979133605957, "learning_rate": 9.642292135967291e-06, "loss": 0.7759, "step": 4102 }, { "epoch": 0.7377506068506697, "grad_norm": 1.5490065813064575, "learning_rate": 9.642075771203887e-06, "loss": 0.8918, "step": 4103 }, { "epoch": 0.7379304144565315, "grad_norm": 1.5774871110916138, "learning_rate": 9.641859343453603e-06, "loss": 0.8232, "step": 4104 }, { "epoch": 0.7381102220623932, "grad_norm": 1.5153369903564453, "learning_rate": 9.641642852719382e-06, "loss": 0.7925, "step": 4105 }, { "epoch": 0.738290029668255, "grad_norm": 1.1389232873916626, "learning_rate": 9.641426299004157e-06, "loss": 1.0196, "step": 4106 }, { "epoch": 0.7384698372741167, "grad_norm": 1.4875470399856567, "learning_rate": 9.641209682310866e-06, "loss": 0.7793, "step": 4107 }, { "epoch": 0.7386496448799784, "grad_norm": 1.6036182641983032, "learning_rate": 9.64099300264245e-06, "loss": 0.7721, "step": 4108 }, { "epoch": 0.7388294524858402, "grad_norm": 1.733665943145752, "learning_rate": 9.640776260001849e-06, "loss": 0.8287, "step": 4109 }, { "epoch": 0.7390092600917019, "grad_norm": 1.4802621603012085, "learning_rate": 9.640559454392004e-06, "loss": 0.7849, "step": 4110 }, { "epoch": 0.7391890676975637, "grad_norm": 1.101642370223999, "learning_rate": 9.640342585815855e-06, "loss": 0.9972, "step": 4111 }, { "epoch": 0.7393688753034253, "grad_norm": 1.3697232007980347, "learning_rate": 9.640125654276347e-06, "loss": 0.7846, "step": 4112 }, { "epoch": 0.7395486829092871, "grad_norm": 1.4928250312805176, "learning_rate": 9.639908659776422e-06, "loss": 0.6945, "step": 4113 }, { "epoch": 0.7397284905151488, "grad_norm": 1.5636272430419922, "learning_rate": 9.639691602319024e-06, "loss": 0.7825, "step": 4114 }, { "epoch": 0.7399082981210106, "grad_norm": 1.3946696519851685, "learning_rate": 9.639474481907098e-06, "loss": 0.7864, "step": 4115 }, { "epoch": 0.7400881057268722, "grad_norm": 1.7150615453720093, "learning_rate": 9.639257298543594e-06, "loss": 0.8111, "step": 4116 }, { "epoch": 0.740267913332734, "grad_norm": 1.4133435487747192, "learning_rate": 9.639040052231455e-06, "loss": 0.8356, "step": 4117 }, { "epoch": 0.7404477209385957, "grad_norm": 1.603104829788208, "learning_rate": 9.638822742973627e-06, "loss": 0.7534, "step": 4118 }, { "epoch": 0.7406275285444575, "grad_norm": 1.4052737951278687, "learning_rate": 9.638605370773062e-06, "loss": 0.739, "step": 4119 }, { "epoch": 0.7408073361503191, "grad_norm": 1.4870326519012451, "learning_rate": 9.63838793563271e-06, "loss": 0.7944, "step": 4120 }, { "epoch": 0.7409871437561809, "grad_norm": 1.604610562324524, "learning_rate": 9.63817043755552e-06, "loss": 0.772, "step": 4121 }, { "epoch": 0.7411669513620426, "grad_norm": 1.554479718208313, "learning_rate": 9.637952876544441e-06, "loss": 0.8644, "step": 4122 }, { "epoch": 0.7413467589679044, "grad_norm": 1.6092095375061035, "learning_rate": 9.63773525260243e-06, "loss": 0.8383, "step": 4123 }, { "epoch": 0.741526566573766, "grad_norm": 1.5520098209381104, "learning_rate": 9.637517565732435e-06, "loss": 0.8188, "step": 4124 }, { "epoch": 0.7417063741796278, "grad_norm": 1.462611436843872, "learning_rate": 9.637299815937411e-06, "loss": 0.7682, "step": 4125 }, { "epoch": 0.7418861817854895, "grad_norm": 1.446513295173645, "learning_rate": 9.637082003220315e-06, "loss": 0.7792, "step": 4126 }, { "epoch": 0.7420659893913513, "grad_norm": 1.1972051858901978, "learning_rate": 9.6368641275841e-06, "loss": 1.0386, "step": 4127 }, { "epoch": 0.7422457969972129, "grad_norm": 1.5824817419052124, "learning_rate": 9.636646189031724e-06, "loss": 0.8587, "step": 4128 }, { "epoch": 0.7424256046030747, "grad_norm": 1.0661964416503906, "learning_rate": 9.636428187566142e-06, "loss": 1.0852, "step": 4129 }, { "epoch": 0.7426054122089364, "grad_norm": 1.484782338142395, "learning_rate": 9.636210123190312e-06, "loss": 0.8584, "step": 4130 }, { "epoch": 0.7427852198147982, "grad_norm": 1.1140410900115967, "learning_rate": 9.635991995907196e-06, "loss": 1.0428, "step": 4131 }, { "epoch": 0.7429650274206598, "grad_norm": 1.5359846353530884, "learning_rate": 9.63577380571975e-06, "loss": 0.7622, "step": 4132 }, { "epoch": 0.7431448350265216, "grad_norm": 1.5800130367279053, "learning_rate": 9.635555552630937e-06, "loss": 0.7756, "step": 4133 }, { "epoch": 0.7433246426323834, "grad_norm": 1.3733339309692383, "learning_rate": 9.635337236643718e-06, "loss": 0.7908, "step": 4134 }, { "epoch": 0.7435044502382451, "grad_norm": 1.4264187812805176, "learning_rate": 9.635118857761056e-06, "loss": 0.83, "step": 4135 }, { "epoch": 0.7436842578441069, "grad_norm": 1.4992003440856934, "learning_rate": 9.63490041598591e-06, "loss": 0.7823, "step": 4136 }, { "epoch": 0.7438640654499685, "grad_norm": 1.5196750164031982, "learning_rate": 9.63468191132125e-06, "loss": 0.7252, "step": 4137 }, { "epoch": 0.7440438730558303, "grad_norm": 1.3886370658874512, "learning_rate": 9.634463343770037e-06, "loss": 0.7355, "step": 4138 }, { "epoch": 0.744223680661692, "grad_norm": 1.8243781328201294, "learning_rate": 9.634244713335236e-06, "loss": 0.7893, "step": 4139 }, { "epoch": 0.7444034882675538, "grad_norm": 1.4669818878173828, "learning_rate": 9.634026020019816e-06, "loss": 0.852, "step": 4140 }, { "epoch": 0.7445832958734154, "grad_norm": 1.4400746822357178, "learning_rate": 9.633807263826745e-06, "loss": 0.8792, "step": 4141 }, { "epoch": 0.7447631034792772, "grad_norm": 1.5781941413879395, "learning_rate": 9.633588444758987e-06, "loss": 0.841, "step": 4142 }, { "epoch": 0.7449429110851389, "grad_norm": 1.4785183668136597, "learning_rate": 9.633369562819514e-06, "loss": 0.8463, "step": 4143 }, { "epoch": 0.7451227186910007, "grad_norm": 2.7826478481292725, "learning_rate": 9.633150618011296e-06, "loss": 0.7654, "step": 4144 }, { "epoch": 0.7453025262968623, "grad_norm": 1.514572024345398, "learning_rate": 9.632931610337304e-06, "loss": 0.8076, "step": 4145 }, { "epoch": 0.7454823339027241, "grad_norm": 1.176352620124817, "learning_rate": 9.632712539800509e-06, "loss": 1.0616, "step": 4146 }, { "epoch": 0.7456621415085858, "grad_norm": 1.5656956434249878, "learning_rate": 9.632493406403883e-06, "loss": 0.8413, "step": 4147 }, { "epoch": 0.7458419491144476, "grad_norm": 1.8135008811950684, "learning_rate": 9.6322742101504e-06, "loss": 0.7742, "step": 4148 }, { "epoch": 0.7460217567203092, "grad_norm": 1.098755121231079, "learning_rate": 9.632054951043035e-06, "loss": 1.041, "step": 4149 }, { "epoch": 0.746201564326171, "grad_norm": 1.4106184244155884, "learning_rate": 9.631835629084762e-06, "loss": 0.8218, "step": 4150 }, { "epoch": 0.7463813719320327, "grad_norm": 1.4237171411514282, "learning_rate": 9.631616244278557e-06, "loss": 0.8568, "step": 4151 }, { "epoch": 0.7465611795378945, "grad_norm": 1.490195631980896, "learning_rate": 9.631396796627397e-06, "loss": 0.8126, "step": 4152 }, { "epoch": 0.7467409871437561, "grad_norm": 1.5436843633651733, "learning_rate": 9.631177286134259e-06, "loss": 0.8243, "step": 4153 }, { "epoch": 0.7469207947496179, "grad_norm": 1.6394391059875488, "learning_rate": 9.630957712802122e-06, "loss": 0.8218, "step": 4154 }, { "epoch": 0.7471006023554796, "grad_norm": 1.1877033710479736, "learning_rate": 9.630738076633966e-06, "loss": 1.0555, "step": 4155 }, { "epoch": 0.7472804099613414, "grad_norm": 1.5001486539840698, "learning_rate": 9.63051837763277e-06, "loss": 0.8092, "step": 4156 }, { "epoch": 0.747460217567203, "grad_norm": 1.6289559602737427, "learning_rate": 9.630298615801514e-06, "loss": 0.7738, "step": 4157 }, { "epoch": 0.7476400251730648, "grad_norm": 1.3605972528457642, "learning_rate": 9.630078791143182e-06, "loss": 0.7843, "step": 4158 }, { "epoch": 0.7478198327789265, "grad_norm": 1.6129345893859863, "learning_rate": 9.629858903660758e-06, "loss": 0.8051, "step": 4159 }, { "epoch": 0.7479996403847883, "grad_norm": 1.441138744354248, "learning_rate": 9.629638953357223e-06, "loss": 0.777, "step": 4160 }, { "epoch": 0.7481794479906501, "grad_norm": 1.695955514907837, "learning_rate": 9.629418940235563e-06, "loss": 0.7808, "step": 4161 }, { "epoch": 0.7483592555965117, "grad_norm": 1.4729702472686768, "learning_rate": 9.629198864298759e-06, "loss": 0.7711, "step": 4162 }, { "epoch": 0.7485390632023735, "grad_norm": 1.456132173538208, "learning_rate": 9.628978725549802e-06, "loss": 0.8343, "step": 4163 }, { "epoch": 0.7487188708082352, "grad_norm": 1.529088020324707, "learning_rate": 9.62875852399168e-06, "loss": 0.832, "step": 4164 }, { "epoch": 0.748898678414097, "grad_norm": 1.5371347665786743, "learning_rate": 9.628538259627375e-06, "loss": 0.8, "step": 4165 }, { "epoch": 0.7490784860199586, "grad_norm": 1.341748595237732, "learning_rate": 9.628317932459881e-06, "loss": 0.9136, "step": 4166 }, { "epoch": 0.7492582936258204, "grad_norm": 1.5409941673278809, "learning_rate": 9.628097542492185e-06, "loss": 0.7917, "step": 4167 }, { "epoch": 0.7494381012316821, "grad_norm": 1.484734296798706, "learning_rate": 9.62787708972728e-06, "loss": 0.8115, "step": 4168 }, { "epoch": 0.7496179088375439, "grad_norm": 1.4686784744262695, "learning_rate": 9.627656574168153e-06, "loss": 0.8277, "step": 4169 }, { "epoch": 0.7497977164434055, "grad_norm": 1.1871235370635986, "learning_rate": 9.627435995817799e-06, "loss": 1.0116, "step": 4170 }, { "epoch": 0.7499775240492673, "grad_norm": 1.4884207248687744, "learning_rate": 9.62721535467921e-06, "loss": 0.713, "step": 4171 }, { "epoch": 0.750157331655129, "grad_norm": 1.4423636198043823, "learning_rate": 9.62699465075538e-06, "loss": 0.7583, "step": 4172 }, { "epoch": 0.7503371392609908, "grad_norm": 1.1536445617675781, "learning_rate": 9.626773884049305e-06, "loss": 1.0145, "step": 4173 }, { "epoch": 0.7505169468668524, "grad_norm": 1.5158017873764038, "learning_rate": 9.626553054563979e-06, "loss": 0.7436, "step": 4174 }, { "epoch": 0.7506967544727142, "grad_norm": 1.06093430519104, "learning_rate": 9.6263321623024e-06, "loss": 1.0254, "step": 4175 }, { "epoch": 0.7508765620785759, "grad_norm": 1.581194519996643, "learning_rate": 9.62611120726756e-06, "loss": 0.8737, "step": 4176 }, { "epoch": 0.7510563696844377, "grad_norm": 1.4424879550933838, "learning_rate": 9.625890189462464e-06, "loss": 0.6517, "step": 4177 }, { "epoch": 0.7512361772902993, "grad_norm": 1.5785163640975952, "learning_rate": 9.625669108890107e-06, "loss": 0.822, "step": 4178 }, { "epoch": 0.7514159848961611, "grad_norm": 1.4453362226486206, "learning_rate": 9.62544796555349e-06, "loss": 0.7667, "step": 4179 }, { "epoch": 0.7515957925020228, "grad_norm": 1.4953080415725708, "learning_rate": 9.625226759455616e-06, "loss": 0.7917, "step": 4180 }, { "epoch": 0.7517756001078846, "grad_norm": 1.522438645362854, "learning_rate": 9.62500549059948e-06, "loss": 0.821, "step": 4181 }, { "epoch": 0.7519554077137462, "grad_norm": 1.4340665340423584, "learning_rate": 9.624784158988089e-06, "loss": 0.7752, "step": 4182 }, { "epoch": 0.752135215319608, "grad_norm": 1.599779486656189, "learning_rate": 9.624562764624445e-06, "loss": 0.8985, "step": 4183 }, { "epoch": 0.7523150229254697, "grad_norm": 1.501631259918213, "learning_rate": 9.624341307511553e-06, "loss": 0.8672, "step": 4184 }, { "epoch": 0.7524948305313315, "grad_norm": 1.5786445140838623, "learning_rate": 9.624119787652418e-06, "loss": 0.856, "step": 4185 }, { "epoch": 0.7526746381371932, "grad_norm": 1.4720044136047363, "learning_rate": 9.623898205050045e-06, "loss": 0.8421, "step": 4186 }, { "epoch": 0.7528544457430549, "grad_norm": 1.2575780153274536, "learning_rate": 9.623676559707439e-06, "loss": 1.0653, "step": 4187 }, { "epoch": 0.7530342533489166, "grad_norm": 1.5636850595474243, "learning_rate": 9.623454851627609e-06, "loss": 0.8381, "step": 4188 }, { "epoch": 0.7532140609547784, "grad_norm": 1.4955509901046753, "learning_rate": 9.623233080813563e-06, "loss": 0.7811, "step": 4189 }, { "epoch": 0.7533938685606402, "grad_norm": 1.593629002571106, "learning_rate": 9.623011247268312e-06, "loss": 0.8389, "step": 4190 }, { "epoch": 0.7535736761665018, "grad_norm": 1.4947543144226074, "learning_rate": 9.622789350994863e-06, "loss": 0.8168, "step": 4191 }, { "epoch": 0.7537534837723636, "grad_norm": 1.522971272468567, "learning_rate": 9.62256739199623e-06, "loss": 0.8273, "step": 4192 }, { "epoch": 0.7539332913782253, "grad_norm": 1.6545556783676147, "learning_rate": 9.622345370275422e-06, "loss": 0.7963, "step": 4193 }, { "epoch": 0.7541130989840871, "grad_norm": 1.407492756843567, "learning_rate": 9.622123285835453e-06, "loss": 0.792, "step": 4194 }, { "epoch": 0.7542929065899487, "grad_norm": 1.4791735410690308, "learning_rate": 9.621901138679336e-06, "loss": 0.8528, "step": 4195 }, { "epoch": 0.7544727141958105, "grad_norm": 1.4262908697128296, "learning_rate": 9.621678928810083e-06, "loss": 0.7833, "step": 4196 }, { "epoch": 0.7546525218016722, "grad_norm": 1.5933605432510376, "learning_rate": 9.621456656230713e-06, "loss": 0.9231, "step": 4197 }, { "epoch": 0.754832329407534, "grad_norm": 1.440361499786377, "learning_rate": 9.62123432094424e-06, "loss": 0.8675, "step": 4198 }, { "epoch": 0.7550121370133956, "grad_norm": 1.4115946292877197, "learning_rate": 9.621011922953681e-06, "loss": 0.7568, "step": 4199 }, { "epoch": 0.7551919446192574, "grad_norm": 1.5453343391418457, "learning_rate": 9.620789462262052e-06, "loss": 0.7931, "step": 4200 }, { "epoch": 0.7553717522251191, "grad_norm": 1.5059138536453247, "learning_rate": 9.620566938872375e-06, "loss": 0.8301, "step": 4201 }, { "epoch": 0.7555515598309809, "grad_norm": 1.6144421100616455, "learning_rate": 9.620344352787668e-06, "loss": 0.7488, "step": 4202 }, { "epoch": 0.7557313674368425, "grad_norm": 1.346560001373291, "learning_rate": 9.620121704010947e-06, "loss": 0.854, "step": 4203 }, { "epoch": 0.7559111750427043, "grad_norm": 1.4134944677352905, "learning_rate": 9.61989899254524e-06, "loss": 0.7304, "step": 4204 }, { "epoch": 0.756090982648566, "grad_norm": 1.5169237852096558, "learning_rate": 9.619676218393566e-06, "loss": 0.7908, "step": 4205 }, { "epoch": 0.7562707902544278, "grad_norm": 1.1512335538864136, "learning_rate": 9.619453381558945e-06, "loss": 0.972, "step": 4206 }, { "epoch": 0.7564505978602895, "grad_norm": 1.542114496231079, "learning_rate": 9.619230482044404e-06, "loss": 0.7811, "step": 4207 }, { "epoch": 0.7566304054661512, "grad_norm": 1.4975556135177612, "learning_rate": 9.619007519852968e-06, "loss": 0.8406, "step": 4208 }, { "epoch": 0.7568102130720129, "grad_norm": 1.4767282009124756, "learning_rate": 9.618784494987658e-06, "loss": 0.8577, "step": 4209 }, { "epoch": 0.7569900206778747, "grad_norm": 0.981299102306366, "learning_rate": 9.618561407451506e-06, "loss": 1.0508, "step": 4210 }, { "epoch": 0.7571698282837364, "grad_norm": 1.581968069076538, "learning_rate": 9.618338257247533e-06, "loss": 0.8922, "step": 4211 }, { "epoch": 0.7573496358895981, "grad_norm": 1.497195839881897, "learning_rate": 9.618115044378771e-06, "loss": 0.8172, "step": 4212 }, { "epoch": 0.7575294434954598, "grad_norm": 1.4751615524291992, "learning_rate": 9.617891768848247e-06, "loss": 0.8191, "step": 4213 }, { "epoch": 0.7577092511013216, "grad_norm": 1.6852179765701294, "learning_rate": 9.617668430658991e-06, "loss": 0.8129, "step": 4214 }, { "epoch": 0.7578890587071833, "grad_norm": 1.406842589378357, "learning_rate": 9.617445029814034e-06, "loss": 0.8786, "step": 4215 }, { "epoch": 0.758068866313045, "grad_norm": 1.5292760133743286, "learning_rate": 9.617221566316405e-06, "loss": 0.8212, "step": 4216 }, { "epoch": 0.7582486739189068, "grad_norm": 1.194075345993042, "learning_rate": 9.61699804016914e-06, "loss": 1.0244, "step": 4217 }, { "epoch": 0.7584284815247685, "grad_norm": 1.428310751914978, "learning_rate": 9.61677445137527e-06, "loss": 0.775, "step": 4218 }, { "epoch": 0.7586082891306303, "grad_norm": 1.4596424102783203, "learning_rate": 9.616550799937828e-06, "loss": 0.7669, "step": 4219 }, { "epoch": 0.758788096736492, "grad_norm": 1.767218828201294, "learning_rate": 9.616327085859847e-06, "loss": 0.7371, "step": 4220 }, { "epoch": 0.7589679043423537, "grad_norm": 1.4757704734802246, "learning_rate": 9.616103309144367e-06, "loss": 0.7111, "step": 4221 }, { "epoch": 0.7591477119482154, "grad_norm": 1.4545718431472778, "learning_rate": 9.61587946979442e-06, "loss": 0.8461, "step": 4222 }, { "epoch": 0.7593275195540772, "grad_norm": 1.4824074506759644, "learning_rate": 9.615655567813046e-06, "loss": 0.8412, "step": 4223 }, { "epoch": 0.7595073271599388, "grad_norm": 1.5120991468429565, "learning_rate": 9.615431603203284e-06, "loss": 0.7627, "step": 4224 }, { "epoch": 0.7596871347658006, "grad_norm": 1.5067535638809204, "learning_rate": 9.61520757596817e-06, "loss": 0.7787, "step": 4225 }, { "epoch": 0.7598669423716623, "grad_norm": 1.4538921117782593, "learning_rate": 9.614983486110745e-06, "loss": 0.8011, "step": 4226 }, { "epoch": 0.7600467499775241, "grad_norm": 1.5620859861373901, "learning_rate": 9.61475933363405e-06, "loss": 0.9064, "step": 4227 }, { "epoch": 0.7602265575833858, "grad_norm": 1.64467191696167, "learning_rate": 9.614535118541126e-06, "loss": 0.7782, "step": 4228 }, { "epoch": 0.7604063651892475, "grad_norm": 1.5050774812698364, "learning_rate": 9.614310840835015e-06, "loss": 0.8394, "step": 4229 }, { "epoch": 0.7605861727951092, "grad_norm": 1.5820568799972534, "learning_rate": 9.61408650051876e-06, "loss": 0.8528, "step": 4230 }, { "epoch": 0.760765980400971, "grad_norm": 1.7082109451293945, "learning_rate": 9.613862097595406e-06, "loss": 0.7759, "step": 4231 }, { "epoch": 0.7609457880068327, "grad_norm": 1.6254571676254272, "learning_rate": 9.613637632067998e-06, "loss": 0.8811, "step": 4232 }, { "epoch": 0.7611255956126944, "grad_norm": 1.5120846033096313, "learning_rate": 9.61341310393958e-06, "loss": 0.8328, "step": 4233 }, { "epoch": 0.7613054032185561, "grad_norm": 1.597158670425415, "learning_rate": 9.613188513213199e-06, "loss": 0.8627, "step": 4234 }, { "epoch": 0.7614852108244179, "grad_norm": 1.6326836347579956, "learning_rate": 9.612963859891905e-06, "loss": 0.8577, "step": 4235 }, { "epoch": 0.7616650184302796, "grad_norm": 1.5399583578109741, "learning_rate": 9.612739143978744e-06, "loss": 0.7973, "step": 4236 }, { "epoch": 0.7618448260361413, "grad_norm": 1.2062464952468872, "learning_rate": 9.612514365476765e-06, "loss": 1.0597, "step": 4237 }, { "epoch": 0.762024633642003, "grad_norm": 1.5330121517181396, "learning_rate": 9.612289524389017e-06, "loss": 0.7885, "step": 4238 }, { "epoch": 0.7622044412478648, "grad_norm": 1.50315260887146, "learning_rate": 9.612064620718553e-06, "loss": 0.8052, "step": 4239 }, { "epoch": 0.7623842488537265, "grad_norm": 1.0340938568115234, "learning_rate": 9.611839654468425e-06, "loss": 1.0573, "step": 4240 }, { "epoch": 0.7625640564595882, "grad_norm": 1.5084824562072754, "learning_rate": 9.61161462564168e-06, "loss": 0.8162, "step": 4241 }, { "epoch": 0.7627438640654499, "grad_norm": 1.7696598768234253, "learning_rate": 9.61138953424138e-06, "loss": 0.8837, "step": 4242 }, { "epoch": 0.7629236716713117, "grad_norm": 1.38753080368042, "learning_rate": 9.611164380270575e-06, "loss": 0.8046, "step": 4243 }, { "epoch": 0.7631034792771735, "grad_norm": 1.8255497217178345, "learning_rate": 9.610939163732317e-06, "loss": 0.7824, "step": 4244 }, { "epoch": 0.7632832868830352, "grad_norm": 1.5862418413162231, "learning_rate": 9.610713884629667e-06, "loss": 0.8062, "step": 4245 }, { "epoch": 0.7634630944888969, "grad_norm": 1.5548911094665527, "learning_rate": 9.610488542965678e-06, "loss": 0.8264, "step": 4246 }, { "epoch": 0.7636429020947586, "grad_norm": 1.5671809911727905, "learning_rate": 9.61026313874341e-06, "loss": 0.8741, "step": 4247 }, { "epoch": 0.7638227097006204, "grad_norm": 1.6045546531677246, "learning_rate": 9.61003767196592e-06, "loss": 0.8342, "step": 4248 }, { "epoch": 0.764002517306482, "grad_norm": 1.2238738536834717, "learning_rate": 9.609812142636268e-06, "loss": 1.0276, "step": 4249 }, { "epoch": 0.7641823249123438, "grad_norm": 1.4446173906326294, "learning_rate": 9.609586550757513e-06, "loss": 0.8033, "step": 4250 }, { "epoch": 0.7643621325182055, "grad_norm": 1.6392312049865723, "learning_rate": 9.609360896332718e-06, "loss": 0.7768, "step": 4251 }, { "epoch": 0.7645419401240673, "grad_norm": 1.5834301710128784, "learning_rate": 9.609135179364944e-06, "loss": 0.8114, "step": 4252 }, { "epoch": 0.764721747729929, "grad_norm": 1.5931719541549683, "learning_rate": 9.608909399857253e-06, "loss": 0.7198, "step": 4253 }, { "epoch": 0.7649015553357907, "grad_norm": 1.5330191850662231, "learning_rate": 9.608683557812707e-06, "loss": 0.7995, "step": 4254 }, { "epoch": 0.7650813629416524, "grad_norm": 1.4675334692001343, "learning_rate": 9.608457653234376e-06, "loss": 0.8175, "step": 4255 }, { "epoch": 0.7652611705475142, "grad_norm": 1.5021158456802368, "learning_rate": 9.60823168612532e-06, "loss": 0.7387, "step": 4256 }, { "epoch": 0.7654409781533759, "grad_norm": 1.509697675704956, "learning_rate": 9.608005656488605e-06, "loss": 0.7673, "step": 4257 }, { "epoch": 0.7656207857592376, "grad_norm": 1.6481263637542725, "learning_rate": 9.607779564327303e-06, "loss": 0.7597, "step": 4258 }, { "epoch": 0.7658005933650993, "grad_norm": 1.4607422351837158, "learning_rate": 9.607553409644475e-06, "loss": 0.7667, "step": 4259 }, { "epoch": 0.7659804009709611, "grad_norm": 1.4686989784240723, "learning_rate": 9.607327192443195e-06, "loss": 0.7956, "step": 4260 }, { "epoch": 0.7661602085768228, "grad_norm": 1.5962477922439575, "learning_rate": 9.607100912726529e-06, "loss": 0.8224, "step": 4261 }, { "epoch": 0.7663400161826845, "grad_norm": 1.5351063013076782, "learning_rate": 9.606874570497549e-06, "loss": 0.7676, "step": 4262 }, { "epoch": 0.7665198237885462, "grad_norm": 1.5339285135269165, "learning_rate": 9.606648165759327e-06, "loss": 0.8318, "step": 4263 }, { "epoch": 0.766699631394408, "grad_norm": 1.4462496042251587, "learning_rate": 9.606421698514933e-06, "loss": 0.8208, "step": 4264 }, { "epoch": 0.7668794390002697, "grad_norm": 1.5176657438278198, "learning_rate": 9.606195168767441e-06, "loss": 0.8305, "step": 4265 }, { "epoch": 0.7670592466061315, "grad_norm": 1.7987638711929321, "learning_rate": 9.605968576519924e-06, "loss": 0.9034, "step": 4266 }, { "epoch": 0.7672390542119931, "grad_norm": 1.1026372909545898, "learning_rate": 9.60574192177546e-06, "loss": 0.9998, "step": 4267 }, { "epoch": 0.7674188618178549, "grad_norm": 2.5572848320007324, "learning_rate": 9.605515204537119e-06, "loss": 0.8496, "step": 4268 }, { "epoch": 0.7675986694237166, "grad_norm": 1.5300910472869873, "learning_rate": 9.605288424807978e-06, "loss": 0.7763, "step": 4269 }, { "epoch": 0.7677784770295784, "grad_norm": 1.5492186546325684, "learning_rate": 9.60506158259112e-06, "loss": 0.7993, "step": 4270 }, { "epoch": 0.76795828463544, "grad_norm": 1.4037513732910156, "learning_rate": 9.604834677889617e-06, "loss": 0.7815, "step": 4271 }, { "epoch": 0.7681380922413018, "grad_norm": 1.1199030876159668, "learning_rate": 9.604607710706549e-06, "loss": 1.0202, "step": 4272 }, { "epoch": 0.7683178998471636, "grad_norm": 1.0858330726623535, "learning_rate": 9.604380681044996e-06, "loss": 1.0629, "step": 4273 }, { "epoch": 0.7684977074530253, "grad_norm": 2.0022988319396973, "learning_rate": 9.604153588908039e-06, "loss": 0.8154, "step": 4274 }, { "epoch": 0.768677515058887, "grad_norm": 1.5656222105026245, "learning_rate": 9.60392643429876e-06, "loss": 0.8203, "step": 4275 }, { "epoch": 0.7688573226647487, "grad_norm": 1.119743824005127, "learning_rate": 9.603699217220239e-06, "loss": 1.0359, "step": 4276 }, { "epoch": 0.7690371302706105, "grad_norm": 1.5483342409133911, "learning_rate": 9.60347193767556e-06, "loss": 0.834, "step": 4277 }, { "epoch": 0.7692169378764722, "grad_norm": 1.480387568473816, "learning_rate": 9.603244595667809e-06, "loss": 0.7799, "step": 4278 }, { "epoch": 0.7693967454823339, "grad_norm": 1.509504795074463, "learning_rate": 9.603017191200069e-06, "loss": 0.8346, "step": 4279 }, { "epoch": 0.7695765530881956, "grad_norm": 1.049828290939331, "learning_rate": 9.602789724275422e-06, "loss": 1.0383, "step": 4280 }, { "epoch": 0.7697563606940574, "grad_norm": 1.128085732460022, "learning_rate": 9.602562194896961e-06, "loss": 1.0832, "step": 4281 }, { "epoch": 0.7699361682999191, "grad_norm": 1.593090534210205, "learning_rate": 9.60233460306777e-06, "loss": 0.7921, "step": 4282 }, { "epoch": 0.7701159759057808, "grad_norm": 1.4405161142349243, "learning_rate": 9.602106948790937e-06, "loss": 0.76, "step": 4283 }, { "epoch": 0.7702957835116425, "grad_norm": 1.4634023904800415, "learning_rate": 9.601879232069551e-06, "loss": 0.7714, "step": 4284 }, { "epoch": 0.7704755911175043, "grad_norm": 1.4886044263839722, "learning_rate": 9.601651452906703e-06, "loss": 0.7682, "step": 4285 }, { "epoch": 0.770655398723366, "grad_norm": 1.559643268585205, "learning_rate": 9.601423611305481e-06, "loss": 0.8134, "step": 4286 }, { "epoch": 0.7708352063292278, "grad_norm": 1.5111004114151, "learning_rate": 9.60119570726898e-06, "loss": 0.7986, "step": 4287 }, { "epoch": 0.7710150139350894, "grad_norm": 2.0035436153411865, "learning_rate": 9.60096774080029e-06, "loss": 0.7898, "step": 4288 }, { "epoch": 0.7711948215409512, "grad_norm": 1.57594633102417, "learning_rate": 9.600739711902504e-06, "loss": 0.8831, "step": 4289 }, { "epoch": 0.7713746291468129, "grad_norm": 1.4216649532318115, "learning_rate": 9.600511620578718e-06, "loss": 0.7011, "step": 4290 }, { "epoch": 0.7715544367526747, "grad_norm": 1.4601365327835083, "learning_rate": 9.600283466832026e-06, "loss": 0.745, "step": 4291 }, { "epoch": 0.7717342443585363, "grad_norm": 1.485329031944275, "learning_rate": 9.600055250665523e-06, "loss": 0.7895, "step": 4292 }, { "epoch": 0.7719140519643981, "grad_norm": 1.6471024751663208, "learning_rate": 9.599826972082307e-06, "loss": 0.8151, "step": 4293 }, { "epoch": 0.7720938595702598, "grad_norm": 1.2236953973770142, "learning_rate": 9.599598631085473e-06, "loss": 1.0289, "step": 4294 }, { "epoch": 0.7722736671761216, "grad_norm": 1.5265893936157227, "learning_rate": 9.599370227678122e-06, "loss": 0.8532, "step": 4295 }, { "epoch": 0.7724534747819832, "grad_norm": 1.550748586654663, "learning_rate": 9.599141761863354e-06, "loss": 0.794, "step": 4296 }, { "epoch": 0.772633282387845, "grad_norm": 1.4657453298568726, "learning_rate": 9.598913233644263e-06, "loss": 0.8183, "step": 4297 }, { "epoch": 0.7728130899937067, "grad_norm": 1.7061262130737305, "learning_rate": 9.598684643023957e-06, "loss": 0.7969, "step": 4298 }, { "epoch": 0.7729928975995685, "grad_norm": 1.5448640584945679, "learning_rate": 9.598455990005532e-06, "loss": 0.7701, "step": 4299 }, { "epoch": 0.7731727052054302, "grad_norm": 1.513458490371704, "learning_rate": 9.598227274592094e-06, "loss": 0.8342, "step": 4300 }, { "epoch": 0.7733525128112919, "grad_norm": 1.465094804763794, "learning_rate": 9.597998496786746e-06, "loss": 0.7615, "step": 4301 }, { "epoch": 0.7735323204171537, "grad_norm": 1.5586386919021606, "learning_rate": 9.597769656592592e-06, "loss": 0.8636, "step": 4302 }, { "epoch": 0.7737121280230154, "grad_norm": 1.3988056182861328, "learning_rate": 9.597540754012735e-06, "loss": 0.8078, "step": 4303 }, { "epoch": 0.7738919356288771, "grad_norm": 1.4897031784057617, "learning_rate": 9.597311789050283e-06, "loss": 0.715, "step": 4304 }, { "epoch": 0.7740717432347388, "grad_norm": 1.5443840026855469, "learning_rate": 9.597082761708343e-06, "loss": 0.8374, "step": 4305 }, { "epoch": 0.7742515508406006, "grad_norm": 1.4762544631958008, "learning_rate": 9.596853671990022e-06, "loss": 0.8112, "step": 4306 }, { "epoch": 0.7744313584464623, "grad_norm": 1.5108698606491089, "learning_rate": 9.596624519898428e-06, "loss": 0.7771, "step": 4307 }, { "epoch": 0.774611166052324, "grad_norm": 1.5492534637451172, "learning_rate": 9.59639530543667e-06, "loss": 0.8596, "step": 4308 }, { "epoch": 0.7747909736581857, "grad_norm": 1.194333791732788, "learning_rate": 9.59616602860786e-06, "loss": 1.0606, "step": 4309 }, { "epoch": 0.7749707812640475, "grad_norm": 1.0881038904190063, "learning_rate": 9.595936689415107e-06, "loss": 1.0448, "step": 4310 }, { "epoch": 0.7751505888699092, "grad_norm": 1.55885910987854, "learning_rate": 9.595707287861524e-06, "loss": 0.7985, "step": 4311 }, { "epoch": 0.775330396475771, "grad_norm": 1.5583868026733398, "learning_rate": 9.595477823950224e-06, "loss": 0.788, "step": 4312 }, { "epoch": 0.7755102040816326, "grad_norm": 1.8210043907165527, "learning_rate": 9.595248297684319e-06, "loss": 0.7943, "step": 4313 }, { "epoch": 0.7756900116874944, "grad_norm": 1.4585955142974854, "learning_rate": 9.595018709066923e-06, "loss": 0.842, "step": 4314 }, { "epoch": 0.7758698192933561, "grad_norm": 1.4332501888275146, "learning_rate": 9.594789058101154e-06, "loss": 0.7552, "step": 4315 }, { "epoch": 0.7760496268992179, "grad_norm": 1.5402426719665527, "learning_rate": 9.594559344790127e-06, "loss": 0.8547, "step": 4316 }, { "epoch": 0.7762294345050795, "grad_norm": 1.4804909229278564, "learning_rate": 9.594329569136957e-06, "loss": 0.8246, "step": 4317 }, { "epoch": 0.7764092421109413, "grad_norm": 1.5859451293945312, "learning_rate": 9.594099731144763e-06, "loss": 0.8113, "step": 4318 }, { "epoch": 0.776589049716803, "grad_norm": 1.2685871124267578, "learning_rate": 9.593869830816664e-06, "loss": 1.0281, "step": 4319 }, { "epoch": 0.7767688573226648, "grad_norm": 1.5089824199676514, "learning_rate": 9.59363986815578e-06, "loss": 0.8012, "step": 4320 }, { "epoch": 0.7769486649285264, "grad_norm": 1.6784675121307373, "learning_rate": 9.59340984316523e-06, "loss": 0.7915, "step": 4321 }, { "epoch": 0.7771284725343882, "grad_norm": 1.6570619344711304, "learning_rate": 9.593179755848135e-06, "loss": 0.8407, "step": 4322 }, { "epoch": 0.7773082801402499, "grad_norm": 1.4959429502487183, "learning_rate": 9.59294960620762e-06, "loss": 0.8052, "step": 4323 }, { "epoch": 0.7774880877461117, "grad_norm": 1.4728662967681885, "learning_rate": 9.592719394246802e-06, "loss": 0.8128, "step": 4324 }, { "epoch": 0.7776678953519733, "grad_norm": 1.5053068399429321, "learning_rate": 9.59248911996881e-06, "loss": 0.81, "step": 4325 }, { "epoch": 0.7778477029578351, "grad_norm": 1.490870714187622, "learning_rate": 9.592258783376766e-06, "loss": 0.8034, "step": 4326 }, { "epoch": 0.7780275105636968, "grad_norm": 1.6873738765716553, "learning_rate": 9.592028384473797e-06, "loss": 0.8992, "step": 4327 }, { "epoch": 0.7782073181695586, "grad_norm": 1.4684163331985474, "learning_rate": 9.591797923263026e-06, "loss": 0.7825, "step": 4328 }, { "epoch": 0.7783871257754204, "grad_norm": 1.715948224067688, "learning_rate": 9.591567399747585e-06, "loss": 0.9, "step": 4329 }, { "epoch": 0.778566933381282, "grad_norm": 1.8024405241012573, "learning_rate": 9.591336813930599e-06, "loss": 0.8147, "step": 4330 }, { "epoch": 0.7787467409871438, "grad_norm": 1.4726810455322266, "learning_rate": 9.591106165815194e-06, "loss": 0.7845, "step": 4331 }, { "epoch": 0.7789265485930055, "grad_norm": 1.594380497932434, "learning_rate": 9.590875455404504e-06, "loss": 0.8561, "step": 4332 }, { "epoch": 0.7791063561988673, "grad_norm": 1.4686219692230225, "learning_rate": 9.590644682701659e-06, "loss": 0.7891, "step": 4333 }, { "epoch": 0.7792861638047289, "grad_norm": 1.5837855339050293, "learning_rate": 9.590413847709787e-06, "loss": 0.8114, "step": 4334 }, { "epoch": 0.7794659714105907, "grad_norm": 1.5322213172912598, "learning_rate": 9.590182950432025e-06, "loss": 0.8016, "step": 4335 }, { "epoch": 0.7796457790164524, "grad_norm": 1.565144658088684, "learning_rate": 9.589951990871502e-06, "loss": 0.7641, "step": 4336 }, { "epoch": 0.7798255866223142, "grad_norm": 1.4918359518051147, "learning_rate": 9.589720969031354e-06, "loss": 0.7574, "step": 4337 }, { "epoch": 0.7800053942281758, "grad_norm": 1.9939130544662476, "learning_rate": 9.589489884914714e-06, "loss": 0.7834, "step": 4338 }, { "epoch": 0.7801852018340376, "grad_norm": 1.5815043449401855, "learning_rate": 9.589258738524716e-06, "loss": 0.761, "step": 4339 }, { "epoch": 0.7803650094398993, "grad_norm": 1.588449478149414, "learning_rate": 9.589027529864502e-06, "loss": 0.8153, "step": 4340 }, { "epoch": 0.7805448170457611, "grad_norm": 1.5178545713424683, "learning_rate": 9.588796258937206e-06, "loss": 0.7989, "step": 4341 }, { "epoch": 0.7807246246516227, "grad_norm": 1.4253146648406982, "learning_rate": 9.588564925745964e-06, "loss": 0.7732, "step": 4342 }, { "epoch": 0.7809044322574845, "grad_norm": 1.446340799331665, "learning_rate": 9.588333530293918e-06, "loss": 0.827, "step": 4343 }, { "epoch": 0.7810842398633462, "grad_norm": 1.4760135412216187, "learning_rate": 9.588102072584204e-06, "loss": 0.8172, "step": 4344 }, { "epoch": 0.781264047469208, "grad_norm": 1.5678584575653076, "learning_rate": 9.58787055261997e-06, "loss": 0.8519, "step": 4345 }, { "epoch": 0.7814438550750696, "grad_norm": 1.684882640838623, "learning_rate": 9.587638970404346e-06, "loss": 0.8228, "step": 4346 }, { "epoch": 0.7816236626809314, "grad_norm": 1.4968843460083008, "learning_rate": 9.587407325940485e-06, "loss": 0.793, "step": 4347 }, { "epoch": 0.7818034702867931, "grad_norm": 1.1484839916229248, "learning_rate": 9.587175619231525e-06, "loss": 1.0374, "step": 4348 }, { "epoch": 0.7819832778926549, "grad_norm": 1.4655705690383911, "learning_rate": 9.586943850280613e-06, "loss": 0.7675, "step": 4349 }, { "epoch": 0.7821630854985165, "grad_norm": 1.4783720970153809, "learning_rate": 9.58671201909089e-06, "loss": 0.8375, "step": 4350 }, { "epoch": 0.7823428931043783, "grad_norm": 1.970597505569458, "learning_rate": 9.586480125665502e-06, "loss": 0.7698, "step": 4351 }, { "epoch": 0.78252270071024, "grad_norm": 1.6189435720443726, "learning_rate": 9.586248170007598e-06, "loss": 0.8359, "step": 4352 }, { "epoch": 0.7827025083161018, "grad_norm": 1.6610690355300903, "learning_rate": 9.586016152120324e-06, "loss": 0.768, "step": 4353 }, { "epoch": 0.7828823159219634, "grad_norm": 1.1050196886062622, "learning_rate": 9.585784072006827e-06, "loss": 1.0011, "step": 4354 }, { "epoch": 0.7830621235278252, "grad_norm": 1.0381600856781006, "learning_rate": 9.585551929670259e-06, "loss": 1.0217, "step": 4355 }, { "epoch": 0.783241931133687, "grad_norm": 1.0585277080535889, "learning_rate": 9.585319725113769e-06, "loss": 1.082, "step": 4356 }, { "epoch": 0.7834217387395487, "grad_norm": 1.4964712858200073, "learning_rate": 9.585087458340506e-06, "loss": 0.8731, "step": 4357 }, { "epoch": 0.7836015463454105, "grad_norm": 1.5509288311004639, "learning_rate": 9.58485512935362e-06, "loss": 0.8006, "step": 4358 }, { "epoch": 0.7837813539512721, "grad_norm": 1.5480905771255493, "learning_rate": 9.584622738156269e-06, "loss": 0.7495, "step": 4359 }, { "epoch": 0.7839611615571339, "grad_norm": 1.0861133337020874, "learning_rate": 9.584390284751601e-06, "loss": 1.0203, "step": 4360 }, { "epoch": 0.7841409691629956, "grad_norm": 1.5341219902038574, "learning_rate": 9.584157769142775e-06, "loss": 0.8012, "step": 4361 }, { "epoch": 0.7843207767688574, "grad_norm": 1.4952174425125122, "learning_rate": 9.58392519133294e-06, "loss": 0.814, "step": 4362 }, { "epoch": 0.784500584374719, "grad_norm": 1.4784988164901733, "learning_rate": 9.583692551325257e-06, "loss": 0.8291, "step": 4363 }, { "epoch": 0.7846803919805808, "grad_norm": 1.5379160642623901, "learning_rate": 9.58345984912288e-06, "loss": 0.8063, "step": 4364 }, { "epoch": 0.7848601995864425, "grad_norm": 1.4450147151947021, "learning_rate": 9.583227084728965e-06, "loss": 0.7925, "step": 4365 }, { "epoch": 0.7850400071923043, "grad_norm": 1.4935606718063354, "learning_rate": 9.582994258146674e-06, "loss": 0.8149, "step": 4366 }, { "epoch": 0.7852198147981659, "grad_norm": 1.1853028535842896, "learning_rate": 9.582761369379165e-06, "loss": 1.0488, "step": 4367 }, { "epoch": 0.7853996224040277, "grad_norm": 1.6158244609832764, "learning_rate": 9.582528418429597e-06, "loss": 0.8148, "step": 4368 }, { "epoch": 0.7855794300098894, "grad_norm": 1.561926245689392, "learning_rate": 9.582295405301131e-06, "loss": 0.8098, "step": 4369 }, { "epoch": 0.7857592376157512, "grad_norm": 1.67245352268219, "learning_rate": 9.582062329996928e-06, "loss": 0.8165, "step": 4370 }, { "epoch": 0.7859390452216128, "grad_norm": 1.5130667686462402, "learning_rate": 9.581829192520153e-06, "loss": 0.8259, "step": 4371 }, { "epoch": 0.7861188528274746, "grad_norm": 1.4992512464523315, "learning_rate": 9.581595992873968e-06, "loss": 0.7297, "step": 4372 }, { "epoch": 0.7862986604333363, "grad_norm": 1.4477449655532837, "learning_rate": 9.581362731061537e-06, "loss": 0.812, "step": 4373 }, { "epoch": 0.7864784680391981, "grad_norm": 1.5383840799331665, "learning_rate": 9.581129407086023e-06, "loss": 0.8791, "step": 4374 }, { "epoch": 0.7866582756450597, "grad_norm": 1.376471757888794, "learning_rate": 9.580896020950597e-06, "loss": 0.7532, "step": 4375 }, { "epoch": 0.7868380832509215, "grad_norm": 1.5982245206832886, "learning_rate": 9.58066257265842e-06, "loss": 0.7653, "step": 4376 }, { "epoch": 0.7870178908567832, "grad_norm": 1.5556920766830444, "learning_rate": 9.580429062212664e-06, "loss": 0.7939, "step": 4377 }, { "epoch": 0.787197698462645, "grad_norm": 1.4803444147109985, "learning_rate": 9.580195489616495e-06, "loss": 0.7757, "step": 4378 }, { "epoch": 0.7873775060685066, "grad_norm": 1.4559991359710693, "learning_rate": 9.579961854873084e-06, "loss": 0.7878, "step": 4379 }, { "epoch": 0.7875573136743684, "grad_norm": 1.3729779720306396, "learning_rate": 9.5797281579856e-06, "loss": 0.7608, "step": 4380 }, { "epoch": 0.7877371212802301, "grad_norm": 1.4757769107818604, "learning_rate": 9.579494398957213e-06, "loss": 0.8096, "step": 4381 }, { "epoch": 0.7879169288860919, "grad_norm": 1.609261155128479, "learning_rate": 9.579260577791096e-06, "loss": 0.8271, "step": 4382 }, { "epoch": 0.7880967364919537, "grad_norm": 1.6026029586791992, "learning_rate": 9.579026694490423e-06, "loss": 0.8074, "step": 4383 }, { "epoch": 0.7882765440978153, "grad_norm": 1.5453466176986694, "learning_rate": 9.578792749058366e-06, "loss": 0.802, "step": 4384 }, { "epoch": 0.7884563517036771, "grad_norm": 1.3812458515167236, "learning_rate": 9.578558741498099e-06, "loss": 0.7646, "step": 4385 }, { "epoch": 0.7886361593095388, "grad_norm": 1.700150966644287, "learning_rate": 9.578324671812796e-06, "loss": 0.8899, "step": 4386 }, { "epoch": 0.7888159669154006, "grad_norm": 1.557334303855896, "learning_rate": 9.578090540005635e-06, "loss": 0.7213, "step": 4387 }, { "epoch": 0.7889957745212622, "grad_norm": 1.1353870630264282, "learning_rate": 9.577856346079795e-06, "loss": 1.0045, "step": 4388 }, { "epoch": 0.789175582127124, "grad_norm": 1.5518451929092407, "learning_rate": 9.577622090038448e-06, "loss": 0.8847, "step": 4389 }, { "epoch": 0.7893553897329857, "grad_norm": 1.5567091703414917, "learning_rate": 9.577387771884779e-06, "loss": 0.7875, "step": 4390 }, { "epoch": 0.7895351973388475, "grad_norm": 1.4744032621383667, "learning_rate": 9.577153391621961e-06, "loss": 0.8299, "step": 4391 }, { "epoch": 0.7897150049447091, "grad_norm": 1.5154926776885986, "learning_rate": 9.576918949253179e-06, "loss": 0.7326, "step": 4392 }, { "epoch": 0.7898948125505709, "grad_norm": 1.573290467262268, "learning_rate": 9.576684444781612e-06, "loss": 0.8324, "step": 4393 }, { "epoch": 0.7900746201564326, "grad_norm": 1.1573326587677002, "learning_rate": 9.576449878210442e-06, "loss": 1.0466, "step": 4394 }, { "epoch": 0.7902544277622944, "grad_norm": 1.5557165145874023, "learning_rate": 9.576215249542853e-06, "loss": 0.8073, "step": 4395 }, { "epoch": 0.790434235368156, "grad_norm": 1.5286892652511597, "learning_rate": 9.575980558782028e-06, "loss": 0.7619, "step": 4396 }, { "epoch": 0.7906140429740178, "grad_norm": 1.947002649307251, "learning_rate": 9.57574580593115e-06, "loss": 0.7828, "step": 4397 }, { "epoch": 0.7907938505798795, "grad_norm": 1.5528501272201538, "learning_rate": 9.575510990993404e-06, "loss": 0.7777, "step": 4398 }, { "epoch": 0.7909736581857413, "grad_norm": 1.4600712060928345, "learning_rate": 9.57527611397198e-06, "loss": 0.8211, "step": 4399 }, { "epoch": 0.791153465791603, "grad_norm": 1.4790700674057007, "learning_rate": 9.575041174870062e-06, "loss": 0.8156, "step": 4400 }, { "epoch": 0.7913332733974647, "grad_norm": 1.160812258720398, "learning_rate": 9.574806173690838e-06, "loss": 1.0568, "step": 4401 }, { "epoch": 0.7915130810033264, "grad_norm": 1.5925400257110596, "learning_rate": 9.574571110437496e-06, "loss": 0.7563, "step": 4402 }, { "epoch": 0.7916928886091882, "grad_norm": 1.5165314674377441, "learning_rate": 9.574335985113228e-06, "loss": 0.7939, "step": 4403 }, { "epoch": 0.7918726962150499, "grad_norm": 1.6013230085372925, "learning_rate": 9.574100797721222e-06, "loss": 0.8552, "step": 4404 }, { "epoch": 0.7920525038209116, "grad_norm": 1.0993850231170654, "learning_rate": 9.573865548264671e-06, "loss": 1.0253, "step": 4405 }, { "epoch": 0.7922323114267733, "grad_norm": 1.5158733129501343, "learning_rate": 9.573630236746766e-06, "loss": 0.8518, "step": 4406 }, { "epoch": 0.7924121190326351, "grad_norm": 1.4281165599822998, "learning_rate": 9.5733948631707e-06, "loss": 0.7847, "step": 4407 }, { "epoch": 0.7925919266384968, "grad_norm": 1.0374815464019775, "learning_rate": 9.573159427539665e-06, "loss": 0.9892, "step": 4408 }, { "epoch": 0.7927717342443585, "grad_norm": 1.501980185508728, "learning_rate": 9.572923929856858e-06, "loss": 0.7832, "step": 4409 }, { "epoch": 0.7929515418502202, "grad_norm": 0.9969810247421265, "learning_rate": 9.572688370125474e-06, "loss": 1.0187, "step": 4410 }, { "epoch": 0.793131349456082, "grad_norm": 1.525997519493103, "learning_rate": 9.572452748348709e-06, "loss": 0.8304, "step": 4411 }, { "epoch": 0.7933111570619438, "grad_norm": 1.5136744976043701, "learning_rate": 9.572217064529758e-06, "loss": 0.9124, "step": 4412 }, { "epoch": 0.7934909646678054, "grad_norm": 1.4689534902572632, "learning_rate": 9.571981318671822e-06, "loss": 0.7487, "step": 4413 }, { "epoch": 0.7936707722736672, "grad_norm": 1.360543131828308, "learning_rate": 9.5717455107781e-06, "loss": 0.8154, "step": 4414 }, { "epoch": 0.7938505798795289, "grad_norm": 1.8344807624816895, "learning_rate": 9.571509640851788e-06, "loss": 0.8314, "step": 4415 }, { "epoch": 0.7940303874853907, "grad_norm": 1.4278265237808228, "learning_rate": 9.571273708896089e-06, "loss": 0.7296, "step": 4416 }, { "epoch": 0.7942101950912523, "grad_norm": 1.4883897304534912, "learning_rate": 9.571037714914205e-06, "loss": 0.8234, "step": 4417 }, { "epoch": 0.7943900026971141, "grad_norm": 1.544947624206543, "learning_rate": 9.570801658909336e-06, "loss": 0.8367, "step": 4418 }, { "epoch": 0.7945698103029758, "grad_norm": 1.503657341003418, "learning_rate": 9.570565540884686e-06, "loss": 0.7737, "step": 4419 }, { "epoch": 0.7947496179088376, "grad_norm": 1.4550039768218994, "learning_rate": 9.57032936084346e-06, "loss": 0.7387, "step": 4420 }, { "epoch": 0.7949294255146993, "grad_norm": 1.6309345960617065, "learning_rate": 9.570093118788862e-06, "loss": 0.831, "step": 4421 }, { "epoch": 0.795109233120561, "grad_norm": 1.2297289371490479, "learning_rate": 9.569856814724094e-06, "loss": 1.0405, "step": 4422 }, { "epoch": 0.7952890407264227, "grad_norm": 1.5764994621276855, "learning_rate": 9.569620448652368e-06, "loss": 0.829, "step": 4423 }, { "epoch": 0.7954688483322845, "grad_norm": 1.4322830438613892, "learning_rate": 9.569384020576886e-06, "loss": 0.7888, "step": 4424 }, { "epoch": 0.7956486559381462, "grad_norm": 1.4515600204467773, "learning_rate": 9.569147530500861e-06, "loss": 0.8484, "step": 4425 }, { "epoch": 0.7958284635440079, "grad_norm": 1.1392139196395874, "learning_rate": 9.5689109784275e-06, "loss": 1.01, "step": 4426 }, { "epoch": 0.7960082711498696, "grad_norm": 1.7537407875061035, "learning_rate": 9.568674364360009e-06, "loss": 0.8087, "step": 4427 }, { "epoch": 0.7961880787557314, "grad_norm": 1.6317660808563232, "learning_rate": 9.568437688301603e-06, "loss": 0.8962, "step": 4428 }, { "epoch": 0.7963678863615931, "grad_norm": 1.6023905277252197, "learning_rate": 9.568200950255493e-06, "loss": 0.8143, "step": 4429 }, { "epoch": 0.7965476939674548, "grad_norm": 1.521497368812561, "learning_rate": 9.567964150224888e-06, "loss": 0.7623, "step": 4430 }, { "epoch": 0.7967275015733165, "grad_norm": 1.6244255304336548, "learning_rate": 9.567727288213005e-06, "loss": 0.8142, "step": 4431 }, { "epoch": 0.7969073091791783, "grad_norm": 1.6677100658416748, "learning_rate": 9.567490364223055e-06, "loss": 0.765, "step": 4432 }, { "epoch": 0.79708711678504, "grad_norm": 1.700333833694458, "learning_rate": 9.567253378258255e-06, "loss": 0.8491, "step": 4433 }, { "epoch": 0.7972669243909017, "grad_norm": 1.455029010772705, "learning_rate": 9.56701633032182e-06, "loss": 0.8477, "step": 4434 }, { "epoch": 0.7974467319967634, "grad_norm": 1.517246961593628, "learning_rate": 9.566779220416964e-06, "loss": 0.8261, "step": 4435 }, { "epoch": 0.7976265396026252, "grad_norm": 1.3935534954071045, "learning_rate": 9.566542048546908e-06, "loss": 0.8358, "step": 4436 }, { "epoch": 0.7978063472084869, "grad_norm": 1.6033655405044556, "learning_rate": 9.566304814714869e-06, "loss": 0.8203, "step": 4437 }, { "epoch": 0.7979861548143486, "grad_norm": 1.4495971202850342, "learning_rate": 9.566067518924062e-06, "loss": 0.8566, "step": 4438 }, { "epoch": 0.7981659624202104, "grad_norm": 1.67296302318573, "learning_rate": 9.565830161177713e-06, "loss": 0.8059, "step": 4439 }, { "epoch": 0.7983457700260721, "grad_norm": 1.4113880395889282, "learning_rate": 9.565592741479039e-06, "loss": 0.8011, "step": 4440 }, { "epoch": 0.7985255776319339, "grad_norm": 1.4561039209365845, "learning_rate": 9.565355259831262e-06, "loss": 0.8206, "step": 4441 }, { "epoch": 0.7987053852377956, "grad_norm": 1.6860277652740479, "learning_rate": 9.565117716237603e-06, "loss": 0.8056, "step": 4442 }, { "epoch": 0.7988851928436573, "grad_norm": 1.5139074325561523, "learning_rate": 9.56488011070129e-06, "loss": 0.781, "step": 4443 }, { "epoch": 0.799065000449519, "grad_norm": 1.5417344570159912, "learning_rate": 9.564642443225541e-06, "loss": 0.8745, "step": 4444 }, { "epoch": 0.7992448080553808, "grad_norm": 1.1780281066894531, "learning_rate": 9.564404713813584e-06, "loss": 1.0337, "step": 4445 }, { "epoch": 0.7994246156612425, "grad_norm": 1.4822853803634644, "learning_rate": 9.564166922468644e-06, "loss": 0.8522, "step": 4446 }, { "epoch": 0.7996044232671042, "grad_norm": 1.4922704696655273, "learning_rate": 9.563929069193948e-06, "loss": 0.8343, "step": 4447 }, { "epoch": 0.7997842308729659, "grad_norm": 1.5587807893753052, "learning_rate": 9.563691153992723e-06, "loss": 0.6942, "step": 4448 }, { "epoch": 0.7999640384788277, "grad_norm": 1.550862431526184, "learning_rate": 9.563453176868196e-06, "loss": 0.789, "step": 4449 }, { "epoch": 0.8001438460846894, "grad_norm": 1.659963846206665, "learning_rate": 9.5632151378236e-06, "loss": 0.8659, "step": 4450 }, { "epoch": 0.8003236536905511, "grad_norm": 1.4419831037521362, "learning_rate": 9.562977036862159e-06, "loss": 0.8528, "step": 4451 }, { "epoch": 0.8005034612964128, "grad_norm": 1.6459672451019287, "learning_rate": 9.562738873987109e-06, "loss": 0.8143, "step": 4452 }, { "epoch": 0.8006832689022746, "grad_norm": 2.1532983779907227, "learning_rate": 9.562500649201679e-06, "loss": 0.7592, "step": 4453 }, { "epoch": 0.8008630765081363, "grad_norm": 1.4496604204177856, "learning_rate": 9.562262362509103e-06, "loss": 0.7347, "step": 4454 }, { "epoch": 0.801042884113998, "grad_norm": 1.482582449913025, "learning_rate": 9.562024013912611e-06, "loss": 0.7657, "step": 4455 }, { "epoch": 0.8012226917198597, "grad_norm": 1.4780603647232056, "learning_rate": 9.56178560341544e-06, "loss": 0.7263, "step": 4456 }, { "epoch": 0.8014024993257215, "grad_norm": 1.4147340059280396, "learning_rate": 9.561547131020823e-06, "loss": 0.7684, "step": 4457 }, { "epoch": 0.8015823069315832, "grad_norm": 1.459221363067627, "learning_rate": 9.561308596731999e-06, "loss": 0.8856, "step": 4458 }, { "epoch": 0.801762114537445, "grad_norm": 1.4452511072158813, "learning_rate": 9.561070000552201e-06, "loss": 0.7993, "step": 4459 }, { "epoch": 0.8019419221433066, "grad_norm": 1.5089004039764404, "learning_rate": 9.560831342484668e-06, "loss": 0.8139, "step": 4460 }, { "epoch": 0.8021217297491684, "grad_norm": 1.1539700031280518, "learning_rate": 9.560592622532639e-06, "loss": 1.0549, "step": 4461 }, { "epoch": 0.8023015373550301, "grad_norm": 1.5345532894134521, "learning_rate": 9.56035384069935e-06, "loss": 0.7491, "step": 4462 }, { "epoch": 0.8024813449608919, "grad_norm": 1.4485591650009155, "learning_rate": 9.560114996988045e-06, "loss": 0.8143, "step": 4463 }, { "epoch": 0.8026611525667535, "grad_norm": 1.9461658000946045, "learning_rate": 9.559876091401962e-06, "loss": 0.7789, "step": 4464 }, { "epoch": 0.8028409601726153, "grad_norm": 1.058570384979248, "learning_rate": 9.559637123944344e-06, "loss": 1.0459, "step": 4465 }, { "epoch": 0.8030207677784771, "grad_norm": 1.7363476753234863, "learning_rate": 9.559398094618434e-06, "loss": 0.814, "step": 4466 }, { "epoch": 0.8032005753843388, "grad_norm": 1.077480435371399, "learning_rate": 9.559159003427472e-06, "loss": 1.0471, "step": 4467 }, { "epoch": 0.8033803829902005, "grad_norm": 1.5242615938186646, "learning_rate": 9.558919850374707e-06, "loss": 0.785, "step": 4468 }, { "epoch": 0.8035601905960622, "grad_norm": 1.5156155824661255, "learning_rate": 9.558680635463381e-06, "loss": 0.7842, "step": 4469 }, { "epoch": 0.803739998201924, "grad_norm": 1.4711568355560303, "learning_rate": 9.558441358696739e-06, "loss": 0.7796, "step": 4470 }, { "epoch": 0.8039198058077857, "grad_norm": 1.2457702159881592, "learning_rate": 9.558202020078032e-06, "loss": 1.0253, "step": 4471 }, { "epoch": 0.8040996134136474, "grad_norm": 1.0661221742630005, "learning_rate": 9.557962619610503e-06, "loss": 1.014, "step": 4472 }, { "epoch": 0.8042794210195091, "grad_norm": 1.059015154838562, "learning_rate": 9.557723157297401e-06, "loss": 1.0232, "step": 4473 }, { "epoch": 0.8044592286253709, "grad_norm": 1.5257090330123901, "learning_rate": 9.557483633141978e-06, "loss": 0.7957, "step": 4474 }, { "epoch": 0.8046390362312326, "grad_norm": 1.0786125659942627, "learning_rate": 9.557244047147481e-06, "loss": 1.0076, "step": 4475 }, { "epoch": 0.8048188438370943, "grad_norm": 1.3961824178695679, "learning_rate": 9.55700439931716e-06, "loss": 0.7246, "step": 4476 }, { "epoch": 0.804998651442956, "grad_norm": 1.4950225353240967, "learning_rate": 9.556764689654273e-06, "loss": 0.7509, "step": 4477 }, { "epoch": 0.8051784590488178, "grad_norm": 1.4915845394134521, "learning_rate": 9.556524918162064e-06, "loss": 0.8039, "step": 4478 }, { "epoch": 0.8053582666546795, "grad_norm": 1.125159502029419, "learning_rate": 9.556285084843793e-06, "loss": 0.9977, "step": 4479 }, { "epoch": 0.8055380742605412, "grad_norm": 1.555785059928894, "learning_rate": 9.556045189702711e-06, "loss": 0.8002, "step": 4480 }, { "epoch": 0.8057178818664029, "grad_norm": 1.4611899852752686, "learning_rate": 9.555805232742075e-06, "loss": 0.7961, "step": 4481 }, { "epoch": 0.8058976894722647, "grad_norm": 1.6159716844558716, "learning_rate": 9.555565213965139e-06, "loss": 0.7723, "step": 4482 }, { "epoch": 0.8060774970781264, "grad_norm": 0.934523344039917, "learning_rate": 9.555325133375161e-06, "loss": 1.0631, "step": 4483 }, { "epoch": 0.8062573046839882, "grad_norm": 1.5661152601242065, "learning_rate": 9.555084990975398e-06, "loss": 0.8267, "step": 4484 }, { "epoch": 0.8064371122898498, "grad_norm": 1.8080092668533325, "learning_rate": 9.554844786769107e-06, "loss": 0.8225, "step": 4485 }, { "epoch": 0.8066169198957116, "grad_norm": 1.5494400262832642, "learning_rate": 9.554604520759552e-06, "loss": 0.8103, "step": 4486 }, { "epoch": 0.8067967275015733, "grad_norm": 1.5023106336593628, "learning_rate": 9.554364192949988e-06, "loss": 0.8317, "step": 4487 }, { "epoch": 0.8069765351074351, "grad_norm": 1.5091032981872559, "learning_rate": 9.554123803343677e-06, "loss": 0.8123, "step": 4488 }, { "epoch": 0.8071563427132967, "grad_norm": 1.493321180343628, "learning_rate": 9.553883351943882e-06, "loss": 0.7812, "step": 4489 }, { "epoch": 0.8073361503191585, "grad_norm": 1.5303349494934082, "learning_rate": 9.553642838753867e-06, "loss": 0.8226, "step": 4490 }, { "epoch": 0.8075159579250202, "grad_norm": 1.5402354001998901, "learning_rate": 9.553402263776891e-06, "loss": 0.8168, "step": 4491 }, { "epoch": 0.807695765530882, "grad_norm": 1.2248616218566895, "learning_rate": 9.553161627016224e-06, "loss": 1.0116, "step": 4492 }, { "epoch": 0.8078755731367436, "grad_norm": 1.4687979221343994, "learning_rate": 9.552920928475127e-06, "loss": 0.7796, "step": 4493 }, { "epoch": 0.8080553807426054, "grad_norm": 1.3073785305023193, "learning_rate": 9.552680168156866e-06, "loss": 0.7346, "step": 4494 }, { "epoch": 0.8082351883484672, "grad_norm": 1.5177652835845947, "learning_rate": 9.55243934606471e-06, "loss": 0.7355, "step": 4495 }, { "epoch": 0.8084149959543289, "grad_norm": 1.51943838596344, "learning_rate": 9.552198462201925e-06, "loss": 0.8334, "step": 4496 }, { "epoch": 0.8085948035601906, "grad_norm": 1.1110262870788574, "learning_rate": 9.551957516571781e-06, "loss": 1.0388, "step": 4497 }, { "epoch": 0.8087746111660523, "grad_norm": 1.2079333066940308, "learning_rate": 9.551716509177545e-06, "loss": 1.0606, "step": 4498 }, { "epoch": 0.8089544187719141, "grad_norm": 1.5222595930099487, "learning_rate": 9.551475440022488e-06, "loss": 0.8317, "step": 4499 }, { "epoch": 0.8091342263777758, "grad_norm": 1.722585916519165, "learning_rate": 9.551234309109882e-06, "loss": 0.8054, "step": 4500 }, { "epoch": 0.8091342263777758, "eval_loss": 0.827734112739563, "eval_runtime": 150.7227, "eval_samples_per_second": 95.42, "eval_steps_per_second": 1.493, "step": 4500 }, { "epoch": 0.8093140339836375, "grad_norm": 1.4639239311218262, "learning_rate": 9.550993116443e-06, "loss": 0.8428, "step": 4501 }, { "epoch": 0.8094938415894992, "grad_norm": 1.5354888439178467, "learning_rate": 9.550751862025111e-06, "loss": 0.7869, "step": 4502 }, { "epoch": 0.809673649195361, "grad_norm": 1.5860997438430786, "learning_rate": 9.55051054585949e-06, "loss": 0.7751, "step": 4503 }, { "epoch": 0.8098534568012227, "grad_norm": 1.4188752174377441, "learning_rate": 9.550269167949412e-06, "loss": 0.7405, "step": 4504 }, { "epoch": 0.8100332644070845, "grad_norm": 1.5652844905853271, "learning_rate": 9.550027728298153e-06, "loss": 0.7904, "step": 4505 }, { "epoch": 0.8102130720129461, "grad_norm": 1.068093180656433, "learning_rate": 9.549786226908988e-06, "loss": 1.0287, "step": 4506 }, { "epoch": 0.8103928796188079, "grad_norm": 1.129805326461792, "learning_rate": 9.549544663785193e-06, "loss": 1.0038, "step": 4507 }, { "epoch": 0.8105726872246696, "grad_norm": 1.5965512990951538, "learning_rate": 9.549303038930046e-06, "loss": 0.8005, "step": 4508 }, { "epoch": 0.8107524948305314, "grad_norm": 1.4687259197235107, "learning_rate": 9.549061352346829e-06, "loss": 0.8366, "step": 4509 }, { "epoch": 0.810932302436393, "grad_norm": 1.4450408220291138, "learning_rate": 9.548819604038816e-06, "loss": 0.7595, "step": 4510 }, { "epoch": 0.8111121100422548, "grad_norm": 1.040211796760559, "learning_rate": 9.54857779400929e-06, "loss": 1.0316, "step": 4511 }, { "epoch": 0.8112919176481165, "grad_norm": 1.4243395328521729, "learning_rate": 9.548335922261532e-06, "loss": 0.8541, "step": 4512 }, { "epoch": 0.8114717252539783, "grad_norm": 1.4715814590454102, "learning_rate": 9.548093988798824e-06, "loss": 0.7736, "step": 4513 }, { "epoch": 0.8116515328598399, "grad_norm": 1.430260419845581, "learning_rate": 9.547851993624447e-06, "loss": 0.7914, "step": 4514 }, { "epoch": 0.8118313404657017, "grad_norm": 1.6223515272140503, "learning_rate": 9.547609936741686e-06, "loss": 0.8183, "step": 4515 }, { "epoch": 0.8120111480715634, "grad_norm": 1.4264720678329468, "learning_rate": 9.547367818153826e-06, "loss": 0.7288, "step": 4516 }, { "epoch": 0.8121909556774252, "grad_norm": 1.5702128410339355, "learning_rate": 9.547125637864152e-06, "loss": 0.7751, "step": 4517 }, { "epoch": 0.8123707632832868, "grad_norm": 1.6783373355865479, "learning_rate": 9.546883395875947e-06, "loss": 0.8561, "step": 4518 }, { "epoch": 0.8125505708891486, "grad_norm": 1.5762377977371216, "learning_rate": 9.546641092192504e-06, "loss": 0.7726, "step": 4519 }, { "epoch": 0.8127303784950103, "grad_norm": 1.4973304271697998, "learning_rate": 9.546398726817105e-06, "loss": 0.7866, "step": 4520 }, { "epoch": 0.8129101861008721, "grad_norm": 1.4276846647262573, "learning_rate": 9.54615629975304e-06, "loss": 0.8195, "step": 4521 }, { "epoch": 0.8130899937067338, "grad_norm": 1.6577377319335938, "learning_rate": 9.545913811003601e-06, "loss": 0.7912, "step": 4522 }, { "epoch": 0.8132698013125955, "grad_norm": 1.5059071779251099, "learning_rate": 9.545671260572076e-06, "loss": 0.7815, "step": 4523 }, { "epoch": 0.8134496089184573, "grad_norm": 1.551714539527893, "learning_rate": 9.545428648461756e-06, "loss": 0.791, "step": 4524 }, { "epoch": 0.813629416524319, "grad_norm": 1.4594781398773193, "learning_rate": 9.545185974675934e-06, "loss": 0.7653, "step": 4525 }, { "epoch": 0.8138092241301808, "grad_norm": 1.6097028255462646, "learning_rate": 9.544943239217903e-06, "loss": 0.7647, "step": 4526 }, { "epoch": 0.8139890317360424, "grad_norm": 1.1973966360092163, "learning_rate": 9.544700442090954e-06, "loss": 1.0338, "step": 4527 }, { "epoch": 0.8141688393419042, "grad_norm": 1.232506275177002, "learning_rate": 9.544457583298384e-06, "loss": 1.0645, "step": 4528 }, { "epoch": 0.8143486469477659, "grad_norm": 1.5583733320236206, "learning_rate": 9.544214662843487e-06, "loss": 0.7718, "step": 4529 }, { "epoch": 0.8145284545536277, "grad_norm": 1.4382836818695068, "learning_rate": 9.54397168072956e-06, "loss": 0.7486, "step": 4530 }, { "epoch": 0.8147082621594893, "grad_norm": 1.6024200916290283, "learning_rate": 9.5437286369599e-06, "loss": 0.7588, "step": 4531 }, { "epoch": 0.8148880697653511, "grad_norm": 1.6033060550689697, "learning_rate": 9.543485531537806e-06, "loss": 0.7149, "step": 4532 }, { "epoch": 0.8150678773712128, "grad_norm": 1.5496948957443237, "learning_rate": 9.543242364466573e-06, "loss": 0.764, "step": 4533 }, { "epoch": 0.8152476849770746, "grad_norm": 1.5241200923919678, "learning_rate": 9.542999135749502e-06, "loss": 0.8212, "step": 4534 }, { "epoch": 0.8154274925829362, "grad_norm": 1.1465297937393188, "learning_rate": 9.542755845389895e-06, "loss": 1.0257, "step": 4535 }, { "epoch": 0.815607300188798, "grad_norm": 1.5261207818984985, "learning_rate": 9.542512493391052e-06, "loss": 0.8696, "step": 4536 }, { "epoch": 0.8157871077946597, "grad_norm": 1.421312689781189, "learning_rate": 9.542269079756274e-06, "loss": 0.8807, "step": 4537 }, { "epoch": 0.8159669154005215, "grad_norm": 1.1184051036834717, "learning_rate": 9.542025604488865e-06, "loss": 1.0355, "step": 4538 }, { "epoch": 0.8161467230063831, "grad_norm": 1.4069020748138428, "learning_rate": 9.54178206759213e-06, "loss": 0.7942, "step": 4539 }, { "epoch": 0.8163265306122449, "grad_norm": 1.5498729944229126, "learning_rate": 9.54153846906937e-06, "loss": 0.8288, "step": 4540 }, { "epoch": 0.8165063382181066, "grad_norm": 1.7223566770553589, "learning_rate": 9.541294808923891e-06, "loss": 0.8405, "step": 4541 }, { "epoch": 0.8166861458239684, "grad_norm": 1.6048188209533691, "learning_rate": 9.541051087159001e-06, "loss": 0.7976, "step": 4542 }, { "epoch": 0.81686595342983, "grad_norm": 1.5266600847244263, "learning_rate": 9.540807303778007e-06, "loss": 0.7773, "step": 4543 }, { "epoch": 0.8170457610356918, "grad_norm": 1.4939135313034058, "learning_rate": 9.540563458784215e-06, "loss": 0.7939, "step": 4544 }, { "epoch": 0.8172255686415535, "grad_norm": 1.6121000051498413, "learning_rate": 9.540319552180937e-06, "loss": 0.7793, "step": 4545 }, { "epoch": 0.8174053762474153, "grad_norm": 1.6794301271438599, "learning_rate": 9.540075583971477e-06, "loss": 0.7835, "step": 4546 }, { "epoch": 0.8175851838532769, "grad_norm": 1.5387239456176758, "learning_rate": 9.539831554159152e-06, "loss": 0.8011, "step": 4547 }, { "epoch": 0.8177649914591387, "grad_norm": 1.5807276964187622, "learning_rate": 9.539587462747266e-06, "loss": 0.8139, "step": 4548 }, { "epoch": 0.8179447990650004, "grad_norm": 1.4318275451660156, "learning_rate": 9.539343309739137e-06, "loss": 0.7365, "step": 4549 }, { "epoch": 0.8181246066708622, "grad_norm": 1.305558204650879, "learning_rate": 9.539099095138075e-06, "loss": 0.6795, "step": 4550 }, { "epoch": 0.818304414276724, "grad_norm": 1.3832557201385498, "learning_rate": 9.538854818947393e-06, "loss": 0.7985, "step": 4551 }, { "epoch": 0.8184842218825856, "grad_norm": 1.6546440124511719, "learning_rate": 9.53861048117041e-06, "loss": 0.8138, "step": 4552 }, { "epoch": 0.8186640294884474, "grad_norm": 1.3568905591964722, "learning_rate": 9.538366081810435e-06, "loss": 0.7033, "step": 4553 }, { "epoch": 0.8188438370943091, "grad_norm": 1.3987685441970825, "learning_rate": 9.538121620870788e-06, "loss": 0.8826, "step": 4554 }, { "epoch": 0.8190236447001709, "grad_norm": 1.6181285381317139, "learning_rate": 9.537877098354787e-06, "loss": 0.8566, "step": 4555 }, { "epoch": 0.8192034523060325, "grad_norm": 1.4289944171905518, "learning_rate": 9.537632514265746e-06, "loss": 0.7952, "step": 4556 }, { "epoch": 0.8193832599118943, "grad_norm": 1.4652900695800781, "learning_rate": 9.537387868606987e-06, "loss": 0.7986, "step": 4557 }, { "epoch": 0.819563067517756, "grad_norm": 1.5431394577026367, "learning_rate": 9.537143161381826e-06, "loss": 0.8399, "step": 4558 }, { "epoch": 0.8197428751236178, "grad_norm": 1.2543509006500244, "learning_rate": 9.536898392593587e-06, "loss": 1.0358, "step": 4559 }, { "epoch": 0.8199226827294794, "grad_norm": 1.4470915794372559, "learning_rate": 9.536653562245591e-06, "loss": 0.8004, "step": 4560 }, { "epoch": 0.8201024903353412, "grad_norm": 1.496783971786499, "learning_rate": 9.536408670341157e-06, "loss": 0.793, "step": 4561 }, { "epoch": 0.8202822979412029, "grad_norm": 1.0660513639450073, "learning_rate": 9.536163716883612e-06, "loss": 1.0345, "step": 4562 }, { "epoch": 0.8204621055470647, "grad_norm": 1.5748246908187866, "learning_rate": 9.535918701876276e-06, "loss": 0.8058, "step": 4563 }, { "epoch": 0.8206419131529263, "grad_norm": 1.4609767198562622, "learning_rate": 9.535673625322475e-06, "loss": 0.8007, "step": 4564 }, { "epoch": 0.8208217207587881, "grad_norm": 1.4257301092147827, "learning_rate": 9.535428487225533e-06, "loss": 0.7898, "step": 4565 }, { "epoch": 0.8210015283646498, "grad_norm": 1.447642207145691, "learning_rate": 9.53518328758878e-06, "loss": 0.8037, "step": 4566 }, { "epoch": 0.8211813359705116, "grad_norm": 1.4779698848724365, "learning_rate": 9.534938026415539e-06, "loss": 0.8515, "step": 4567 }, { "epoch": 0.8213611435763732, "grad_norm": 1.2419447898864746, "learning_rate": 9.53469270370914e-06, "loss": 1.047, "step": 4568 }, { "epoch": 0.821540951182235, "grad_norm": 1.1664235591888428, "learning_rate": 9.534447319472911e-06, "loss": 1.0762, "step": 4569 }, { "epoch": 0.8217207587880967, "grad_norm": 1.5008351802825928, "learning_rate": 9.534201873710183e-06, "loss": 0.7937, "step": 4570 }, { "epoch": 0.8219005663939585, "grad_norm": 1.6146990060806274, "learning_rate": 9.533956366424285e-06, "loss": 0.7877, "step": 4571 }, { "epoch": 0.8220803739998201, "grad_norm": 1.5095248222351074, "learning_rate": 9.533710797618545e-06, "loss": 0.8509, "step": 4572 }, { "epoch": 0.8222601816056819, "grad_norm": 1.5355072021484375, "learning_rate": 9.5334651672963e-06, "loss": 0.7714, "step": 4573 }, { "epoch": 0.8224399892115436, "grad_norm": 1.8603582382202148, "learning_rate": 9.533219475460882e-06, "loss": 0.8067, "step": 4574 }, { "epoch": 0.8226197968174054, "grad_norm": 1.484954595565796, "learning_rate": 9.532973722115624e-06, "loss": 0.8553, "step": 4575 }, { "epoch": 0.822799604423267, "grad_norm": 1.295440673828125, "learning_rate": 9.532727907263861e-06, "loss": 1.0243, "step": 4576 }, { "epoch": 0.8229794120291288, "grad_norm": 1.5458295345306396, "learning_rate": 9.532482030908927e-06, "loss": 0.7681, "step": 4577 }, { "epoch": 0.8231592196349906, "grad_norm": 1.5832511186599731, "learning_rate": 9.532236093054159e-06, "loss": 0.7675, "step": 4578 }, { "epoch": 0.8233390272408523, "grad_norm": 1.4838255643844604, "learning_rate": 9.531990093702893e-06, "loss": 0.8034, "step": 4579 }, { "epoch": 0.8235188348467141, "grad_norm": 1.581766963005066, "learning_rate": 9.53174403285847e-06, "loss": 0.8196, "step": 4580 }, { "epoch": 0.8236986424525757, "grad_norm": 1.5868351459503174, "learning_rate": 9.531497910524225e-06, "loss": 0.9017, "step": 4581 }, { "epoch": 0.8238784500584375, "grad_norm": 1.5604870319366455, "learning_rate": 9.531251726703502e-06, "loss": 0.8268, "step": 4582 }, { "epoch": 0.8240582576642992, "grad_norm": 1.5860410928726196, "learning_rate": 9.531005481399635e-06, "loss": 0.8274, "step": 4583 }, { "epoch": 0.824238065270161, "grad_norm": 1.5933278799057007, "learning_rate": 9.53075917461597e-06, "loss": 0.8077, "step": 4584 }, { "epoch": 0.8244178728760226, "grad_norm": 1.5410104990005493, "learning_rate": 9.53051280635585e-06, "loss": 0.835, "step": 4585 }, { "epoch": 0.8245976804818844, "grad_norm": 1.416272521018982, "learning_rate": 9.530266376622615e-06, "loss": 0.7625, "step": 4586 }, { "epoch": 0.8247774880877461, "grad_norm": 1.0805530548095703, "learning_rate": 9.530019885419609e-06, "loss": 1.0172, "step": 4587 }, { "epoch": 0.8249572956936079, "grad_norm": 1.5548336505889893, "learning_rate": 9.529773332750177e-06, "loss": 0.8273, "step": 4588 }, { "epoch": 0.8251371032994695, "grad_norm": 1.5280622243881226, "learning_rate": 9.529526718617665e-06, "loss": 0.7748, "step": 4589 }, { "epoch": 0.8253169109053313, "grad_norm": 1.524723768234253, "learning_rate": 9.529280043025419e-06, "loss": 0.7442, "step": 4590 }, { "epoch": 0.825496718511193, "grad_norm": 1.017289638519287, "learning_rate": 9.529033305976785e-06, "loss": 1.0406, "step": 4591 }, { "epoch": 0.8256765261170548, "grad_norm": 1.5242866277694702, "learning_rate": 9.528786507475112e-06, "loss": 0.7458, "step": 4592 }, { "epoch": 0.8258563337229164, "grad_norm": 1.5074979066848755, "learning_rate": 9.528539647523749e-06, "loss": 0.8165, "step": 4593 }, { "epoch": 0.8260361413287782, "grad_norm": 1.5541003942489624, "learning_rate": 9.528292726126044e-06, "loss": 0.8713, "step": 4594 }, { "epoch": 0.8262159489346399, "grad_norm": 1.5432974100112915, "learning_rate": 9.52804574328535e-06, "loss": 0.8527, "step": 4595 }, { "epoch": 0.8263957565405017, "grad_norm": 1.4784146547317505, "learning_rate": 9.527798699005017e-06, "loss": 0.8401, "step": 4596 }, { "epoch": 0.8265755641463634, "grad_norm": 1.5485517978668213, "learning_rate": 9.527551593288396e-06, "loss": 0.7766, "step": 4597 }, { "epoch": 0.8267553717522251, "grad_norm": 1.465700387954712, "learning_rate": 9.527304426138839e-06, "loss": 0.7931, "step": 4598 }, { "epoch": 0.8269351793580868, "grad_norm": 1.6669470071792603, "learning_rate": 9.527057197559704e-06, "loss": 0.8131, "step": 4599 }, { "epoch": 0.8271149869639486, "grad_norm": 1.6013599634170532, "learning_rate": 9.526809907554342e-06, "loss": 0.7729, "step": 4600 }, { "epoch": 0.8272947945698103, "grad_norm": 1.5066874027252197, "learning_rate": 9.52656255612611e-06, "loss": 0.8268, "step": 4601 }, { "epoch": 0.827474602175672, "grad_norm": 1.1768916845321655, "learning_rate": 9.52631514327836e-06, "loss": 1.0615, "step": 4602 }, { "epoch": 0.8276544097815337, "grad_norm": 1.6633349657058716, "learning_rate": 9.526067669014457e-06, "loss": 0.8554, "step": 4603 }, { "epoch": 0.8278342173873955, "grad_norm": 1.4322725534439087, "learning_rate": 9.525820133337752e-06, "loss": 0.8157, "step": 4604 }, { "epoch": 0.8280140249932573, "grad_norm": 1.4566580057144165, "learning_rate": 9.525572536251608e-06, "loss": 0.7936, "step": 4605 }, { "epoch": 0.8281938325991189, "grad_norm": 1.492184042930603, "learning_rate": 9.525324877759382e-06, "loss": 0.8434, "step": 4606 }, { "epoch": 0.8283736402049807, "grad_norm": 1.5739262104034424, "learning_rate": 9.525077157864434e-06, "loss": 0.8701, "step": 4607 }, { "epoch": 0.8285534478108424, "grad_norm": 1.559730887413025, "learning_rate": 9.524829376570128e-06, "loss": 0.7923, "step": 4608 }, { "epoch": 0.8287332554167042, "grad_norm": 1.5558615922927856, "learning_rate": 9.524581533879823e-06, "loss": 0.823, "step": 4609 }, { "epoch": 0.8289130630225658, "grad_norm": 1.442177414894104, "learning_rate": 9.524333629796886e-06, "loss": 0.7398, "step": 4610 }, { "epoch": 0.8290928706284276, "grad_norm": 1.5012450218200684, "learning_rate": 9.524085664324676e-06, "loss": 0.8303, "step": 4611 }, { "epoch": 0.8292726782342893, "grad_norm": 1.5087084770202637, "learning_rate": 9.52383763746656e-06, "loss": 0.7737, "step": 4612 }, { "epoch": 0.8294524858401511, "grad_norm": 1.3306589126586914, "learning_rate": 9.523589549225905e-06, "loss": 0.779, "step": 4613 }, { "epoch": 0.8296322934460127, "grad_norm": 1.5391734838485718, "learning_rate": 9.523341399606075e-06, "loss": 0.8213, "step": 4614 }, { "epoch": 0.8298121010518745, "grad_norm": 1.5132791996002197, "learning_rate": 9.523093188610435e-06, "loss": 0.8591, "step": 4615 }, { "epoch": 0.8299919086577362, "grad_norm": 1.42058265209198, "learning_rate": 9.522844916242358e-06, "loss": 0.7276, "step": 4616 }, { "epoch": 0.830171716263598, "grad_norm": 1.5608580112457275, "learning_rate": 9.522596582505208e-06, "loss": 0.7864, "step": 4617 }, { "epoch": 0.8303515238694597, "grad_norm": 1.5054469108581543, "learning_rate": 9.522348187402358e-06, "loss": 0.8586, "step": 4618 }, { "epoch": 0.8305313314753214, "grad_norm": 1.9800701141357422, "learning_rate": 9.522099730937177e-06, "loss": 0.8415, "step": 4619 }, { "epoch": 0.8307111390811831, "grad_norm": 1.562833309173584, "learning_rate": 9.521851213113036e-06, "loss": 0.7903, "step": 4620 }, { "epoch": 0.8308909466870449, "grad_norm": 1.415250301361084, "learning_rate": 9.521602633933306e-06, "loss": 0.7877, "step": 4621 }, { "epoch": 0.8310707542929066, "grad_norm": 1.64711332321167, "learning_rate": 9.521353993401363e-06, "loss": 0.7652, "step": 4622 }, { "epoch": 0.8312505618987683, "grad_norm": 1.6084498167037964, "learning_rate": 9.52110529152058e-06, "loss": 0.8046, "step": 4623 }, { "epoch": 0.83143036950463, "grad_norm": 1.4552913904190063, "learning_rate": 9.52085652829433e-06, "loss": 0.7319, "step": 4624 }, { "epoch": 0.8316101771104918, "grad_norm": 2.003741502761841, "learning_rate": 9.520607703725986e-06, "loss": 0.7555, "step": 4625 }, { "epoch": 0.8317899847163535, "grad_norm": 1.2042852640151978, "learning_rate": 9.52035881781893e-06, "loss": 1.0652, "step": 4626 }, { "epoch": 0.8319697923222152, "grad_norm": 1.0245556831359863, "learning_rate": 9.520109870576535e-06, "loss": 1.0275, "step": 4627 }, { "epoch": 0.8321495999280769, "grad_norm": 1.5531495809555054, "learning_rate": 9.51986086200218e-06, "loss": 0.8142, "step": 4628 }, { "epoch": 0.8323294075339387, "grad_norm": 1.5928889513015747, "learning_rate": 9.519611792099243e-06, "loss": 0.8435, "step": 4629 }, { "epoch": 0.8325092151398004, "grad_norm": 1.5009626150131226, "learning_rate": 9.519362660871106e-06, "loss": 0.8456, "step": 4630 }, { "epoch": 0.8326890227456621, "grad_norm": 1.4424359798431396, "learning_rate": 9.519113468321146e-06, "loss": 0.7866, "step": 4631 }, { "epoch": 0.8328688303515238, "grad_norm": 1.3306217193603516, "learning_rate": 9.518864214452748e-06, "loss": 0.7721, "step": 4632 }, { "epoch": 0.8330486379573856, "grad_norm": 1.4022245407104492, "learning_rate": 9.51861489926929e-06, "loss": 0.7842, "step": 4633 }, { "epoch": 0.8332284455632474, "grad_norm": 1.4518150091171265, "learning_rate": 9.518365522774157e-06, "loss": 0.7895, "step": 4634 }, { "epoch": 0.833408253169109, "grad_norm": 1.3432203531265259, "learning_rate": 9.518116084970734e-06, "loss": 0.7372, "step": 4635 }, { "epoch": 0.8335880607749708, "grad_norm": 1.267754077911377, "learning_rate": 9.517866585862404e-06, "loss": 1.066, "step": 4636 }, { "epoch": 0.8337678683808325, "grad_norm": 1.4921445846557617, "learning_rate": 9.517617025452552e-06, "loss": 0.8145, "step": 4637 }, { "epoch": 0.8339476759866943, "grad_norm": 1.356820821762085, "learning_rate": 9.517367403744566e-06, "loss": 0.7485, "step": 4638 }, { "epoch": 0.834127483592556, "grad_norm": 1.5452709197998047, "learning_rate": 9.517117720741828e-06, "loss": 0.8053, "step": 4639 }, { "epoch": 0.8343072911984177, "grad_norm": 1.6486698389053345, "learning_rate": 9.516867976447733e-06, "loss": 0.8053, "step": 4640 }, { "epoch": 0.8344870988042794, "grad_norm": 1.5844017267227173, "learning_rate": 9.516618170865665e-06, "loss": 0.8623, "step": 4641 }, { "epoch": 0.8346669064101412, "grad_norm": 1.4988126754760742, "learning_rate": 9.516368303999015e-06, "loss": 0.7781, "step": 4642 }, { "epoch": 0.8348467140160029, "grad_norm": 1.8310396671295166, "learning_rate": 9.516118375851173e-06, "loss": 0.8789, "step": 4643 }, { "epoch": 0.8350265216218646, "grad_norm": 1.1037012338638306, "learning_rate": 9.515868386425532e-06, "loss": 1.0105, "step": 4644 }, { "epoch": 0.8352063292277263, "grad_norm": 1.7147064208984375, "learning_rate": 9.515618335725481e-06, "loss": 0.7724, "step": 4645 }, { "epoch": 0.8353861368335881, "grad_norm": 1.6376233100891113, "learning_rate": 9.515368223754415e-06, "loss": 0.872, "step": 4646 }, { "epoch": 0.8355659444394498, "grad_norm": 1.5814369916915894, "learning_rate": 9.515118050515726e-06, "loss": 0.7911, "step": 4647 }, { "epoch": 0.8357457520453115, "grad_norm": 1.110367774963379, "learning_rate": 9.514867816012809e-06, "loss": 1.0425, "step": 4648 }, { "epoch": 0.8359255596511732, "grad_norm": 1.7514606714248657, "learning_rate": 9.514617520249061e-06, "loss": 0.8139, "step": 4649 }, { "epoch": 0.836105367257035, "grad_norm": 1.4810051918029785, "learning_rate": 9.514367163227878e-06, "loss": 0.8421, "step": 4650 }, { "epoch": 0.8362851748628967, "grad_norm": 1.4536575078964233, "learning_rate": 9.514116744952654e-06, "loss": 0.7515, "step": 4651 }, { "epoch": 0.8364649824687584, "grad_norm": 1.0904573202133179, "learning_rate": 9.51386626542679e-06, "loss": 1.0108, "step": 4652 }, { "epoch": 0.8366447900746201, "grad_norm": 1.4826643466949463, "learning_rate": 9.513615724653684e-06, "loss": 0.7834, "step": 4653 }, { "epoch": 0.8368245976804819, "grad_norm": 1.3502509593963623, "learning_rate": 9.513365122636734e-06, "loss": 0.7777, "step": 4654 }, { "epoch": 0.8370044052863436, "grad_norm": 1.4459819793701172, "learning_rate": 9.513114459379342e-06, "loss": 0.7794, "step": 4655 }, { "epoch": 0.8371842128922053, "grad_norm": 1.4585427045822144, "learning_rate": 9.51286373488491e-06, "loss": 0.8395, "step": 4656 }, { "epoch": 0.837364020498067, "grad_norm": 1.9804768562316895, "learning_rate": 9.512612949156837e-06, "loss": 0.7733, "step": 4657 }, { "epoch": 0.8375438281039288, "grad_norm": 1.5146135091781616, "learning_rate": 9.512362102198526e-06, "loss": 0.7817, "step": 4658 }, { "epoch": 0.8377236357097905, "grad_norm": 1.108905553817749, "learning_rate": 9.512111194013385e-06, "loss": 1.0161, "step": 4659 }, { "epoch": 0.8379034433156523, "grad_norm": 1.5891153812408447, "learning_rate": 9.511860224604815e-06, "loss": 0.8332, "step": 4660 }, { "epoch": 0.838083250921514, "grad_norm": 1.083514928817749, "learning_rate": 9.51160919397622e-06, "loss": 1.0298, "step": 4661 }, { "epoch": 0.8382630585273757, "grad_norm": 1.4765925407409668, "learning_rate": 9.51135810213101e-06, "loss": 0.8004, "step": 4662 }, { "epoch": 0.8384428661332375, "grad_norm": 1.539293646812439, "learning_rate": 9.511106949072588e-06, "loss": 0.812, "step": 4663 }, { "epoch": 0.8386226737390992, "grad_norm": 1.465014100074768, "learning_rate": 9.510855734804366e-06, "loss": 0.7558, "step": 4664 }, { "epoch": 0.8388024813449609, "grad_norm": 1.515457272529602, "learning_rate": 9.51060445932975e-06, "loss": 0.8524, "step": 4665 }, { "epoch": 0.8389822889508226, "grad_norm": 1.5354347229003906, "learning_rate": 9.510353122652149e-06, "loss": 0.834, "step": 4666 }, { "epoch": 0.8391620965566844, "grad_norm": 1.456783413887024, "learning_rate": 9.510101724774976e-06, "loss": 0.8027, "step": 4667 }, { "epoch": 0.8393419041625461, "grad_norm": 1.4883925914764404, "learning_rate": 9.509850265701639e-06, "loss": 0.7828, "step": 4668 }, { "epoch": 0.8395217117684078, "grad_norm": 1.4695441722869873, "learning_rate": 9.509598745435552e-06, "loss": 0.7865, "step": 4669 }, { "epoch": 0.8397015193742695, "grad_norm": 1.6204776763916016, "learning_rate": 9.509347163980128e-06, "loss": 0.8042, "step": 4670 }, { "epoch": 0.8398813269801313, "grad_norm": 1.512015700340271, "learning_rate": 9.509095521338779e-06, "loss": 0.7889, "step": 4671 }, { "epoch": 0.840061134585993, "grad_norm": 1.6088957786560059, "learning_rate": 9.508843817514922e-06, "loss": 0.7878, "step": 4672 }, { "epoch": 0.8402409421918547, "grad_norm": 1.5691156387329102, "learning_rate": 9.508592052511967e-06, "loss": 0.8163, "step": 4673 }, { "epoch": 0.8404207497977164, "grad_norm": 1.4683409929275513, "learning_rate": 9.508340226333337e-06, "loss": 0.8356, "step": 4674 }, { "epoch": 0.8406005574035782, "grad_norm": 1.6217235326766968, "learning_rate": 9.508088338982443e-06, "loss": 0.8419, "step": 4675 }, { "epoch": 0.8407803650094399, "grad_norm": 1.4347676038742065, "learning_rate": 9.507836390462708e-06, "loss": 0.7957, "step": 4676 }, { "epoch": 0.8409601726153016, "grad_norm": 1.5659313201904297, "learning_rate": 9.507584380777547e-06, "loss": 0.8569, "step": 4677 }, { "epoch": 0.8411399802211633, "grad_norm": 1.5148638486862183, "learning_rate": 9.50733230993038e-06, "loss": 0.8314, "step": 4678 }, { "epoch": 0.8413197878270251, "grad_norm": 1.4209426641464233, "learning_rate": 9.50708017792463e-06, "loss": 0.8309, "step": 4679 }, { "epoch": 0.8414995954328868, "grad_norm": 1.4204400777816772, "learning_rate": 9.506827984763714e-06, "loss": 0.7828, "step": 4680 }, { "epoch": 0.8416794030387486, "grad_norm": 1.5239940881729126, "learning_rate": 9.506575730451056e-06, "loss": 0.874, "step": 4681 }, { "epoch": 0.8418592106446102, "grad_norm": 1.5697383880615234, "learning_rate": 9.506323414990078e-06, "loss": 0.757, "step": 4682 }, { "epoch": 0.842039018250472, "grad_norm": 1.086768388748169, "learning_rate": 9.506071038384205e-06, "loss": 1.0398, "step": 4683 }, { "epoch": 0.8422188258563337, "grad_norm": 1.6521122455596924, "learning_rate": 9.50581860063686e-06, "loss": 0.8126, "step": 4684 }, { "epoch": 0.8423986334621955, "grad_norm": 1.476617455482483, "learning_rate": 9.50556610175147e-06, "loss": 0.7791, "step": 4685 }, { "epoch": 0.8425784410680571, "grad_norm": 1.629157304763794, "learning_rate": 9.505313541731459e-06, "loss": 0.8598, "step": 4686 }, { "epoch": 0.8427582486739189, "grad_norm": 1.5070154666900635, "learning_rate": 9.505060920580256e-06, "loss": 0.7956, "step": 4687 }, { "epoch": 0.8429380562797807, "grad_norm": 1.9150975942611694, "learning_rate": 9.504808238301286e-06, "loss": 0.7227, "step": 4688 }, { "epoch": 0.8431178638856424, "grad_norm": 1.181146502494812, "learning_rate": 9.50455549489798e-06, "loss": 1.0173, "step": 4689 }, { "epoch": 0.8432976714915041, "grad_norm": 1.6266101598739624, "learning_rate": 9.504302690373765e-06, "loss": 0.7718, "step": 4690 }, { "epoch": 0.8434774790973658, "grad_norm": 3.6279656887054443, "learning_rate": 9.504049824732076e-06, "loss": 0.831, "step": 4691 }, { "epoch": 0.8436572867032276, "grad_norm": 1.5487802028656006, "learning_rate": 9.503796897976339e-06, "loss": 0.7672, "step": 4692 }, { "epoch": 0.8438370943090893, "grad_norm": 1.4396841526031494, "learning_rate": 9.503543910109987e-06, "loss": 0.7995, "step": 4693 }, { "epoch": 0.844016901914951, "grad_norm": 1.4144043922424316, "learning_rate": 9.503290861136454e-06, "loss": 0.7351, "step": 4694 }, { "epoch": 0.8441967095208127, "grad_norm": 1.5692888498306274, "learning_rate": 9.503037751059173e-06, "loss": 0.838, "step": 4695 }, { "epoch": 0.8443765171266745, "grad_norm": 1.4566915035247803, "learning_rate": 9.502784579881576e-06, "loss": 0.839, "step": 4696 }, { "epoch": 0.8445563247325362, "grad_norm": 1.0450375080108643, "learning_rate": 9.502531347607104e-06, "loss": 1.0616, "step": 4697 }, { "epoch": 0.844736132338398, "grad_norm": 1.8645967245101929, "learning_rate": 9.502278054239188e-06, "loss": 0.867, "step": 4698 }, { "epoch": 0.8449159399442596, "grad_norm": 1.4465693235397339, "learning_rate": 9.502024699781267e-06, "loss": 0.7595, "step": 4699 }, { "epoch": 0.8450957475501214, "grad_norm": 1.5456550121307373, "learning_rate": 9.501771284236778e-06, "loss": 0.8139, "step": 4700 }, { "epoch": 0.8452755551559831, "grad_norm": 1.5064281225204468, "learning_rate": 9.50151780760916e-06, "loss": 0.7666, "step": 4701 }, { "epoch": 0.8454553627618449, "grad_norm": 2.027662754058838, "learning_rate": 9.501264269901851e-06, "loss": 0.6897, "step": 4702 }, { "epoch": 0.8456351703677065, "grad_norm": 1.5308784246444702, "learning_rate": 9.501010671118292e-06, "loss": 0.782, "step": 4703 }, { "epoch": 0.8458149779735683, "grad_norm": 1.019213318824768, "learning_rate": 9.500757011261924e-06, "loss": 1.0368, "step": 4704 }, { "epoch": 0.84599478557943, "grad_norm": 1.3638519048690796, "learning_rate": 9.500503290336189e-06, "loss": 0.8248, "step": 4705 }, { "epoch": 0.8461745931852918, "grad_norm": 1.5017685890197754, "learning_rate": 9.50024950834453e-06, "loss": 0.7952, "step": 4706 }, { "epoch": 0.8463544007911534, "grad_norm": 1.5809905529022217, "learning_rate": 9.499995665290392e-06, "loss": 0.807, "step": 4707 }, { "epoch": 0.8465342083970152, "grad_norm": 1.1145106554031372, "learning_rate": 9.499741761177215e-06, "loss": 0.9926, "step": 4708 }, { "epoch": 0.8467140160028769, "grad_norm": 1.428971767425537, "learning_rate": 9.499487796008447e-06, "loss": 0.8485, "step": 4709 }, { "epoch": 0.8468938236087387, "grad_norm": 1.4750932455062866, "learning_rate": 9.499233769787534e-06, "loss": 0.7848, "step": 4710 }, { "epoch": 0.8470736312146003, "grad_norm": 1.5464423894882202, "learning_rate": 9.498979682517921e-06, "loss": 0.7957, "step": 4711 }, { "epoch": 0.8472534388204621, "grad_norm": 1.4672311544418335, "learning_rate": 9.498725534203059e-06, "loss": 0.8085, "step": 4712 }, { "epoch": 0.8474332464263238, "grad_norm": 1.5195742845535278, "learning_rate": 9.498471324846395e-06, "loss": 0.7509, "step": 4713 }, { "epoch": 0.8476130540321856, "grad_norm": 1.4483758211135864, "learning_rate": 9.498217054451376e-06, "loss": 0.7601, "step": 4714 }, { "epoch": 0.8477928616380472, "grad_norm": 1.6731927394866943, "learning_rate": 9.497962723021454e-06, "loss": 0.7933, "step": 4715 }, { "epoch": 0.847972669243909, "grad_norm": 1.4304571151733398, "learning_rate": 9.497708330560079e-06, "loss": 0.809, "step": 4716 }, { "epoch": 0.8481524768497708, "grad_norm": 1.5072499513626099, "learning_rate": 9.497453877070706e-06, "loss": 0.8042, "step": 4717 }, { "epoch": 0.8483322844556325, "grad_norm": 1.4576445817947388, "learning_rate": 9.497199362556783e-06, "loss": 0.7257, "step": 4718 }, { "epoch": 0.8485120920614942, "grad_norm": 1.3607358932495117, "learning_rate": 9.496944787021767e-06, "loss": 0.7197, "step": 4719 }, { "epoch": 0.8486918996673559, "grad_norm": 1.4854357242584229, "learning_rate": 9.49669015046911e-06, "loss": 0.8159, "step": 4720 }, { "epoch": 0.8488717072732177, "grad_norm": 1.6965532302856445, "learning_rate": 9.496435452902268e-06, "loss": 0.8396, "step": 4721 }, { "epoch": 0.8490515148790794, "grad_norm": 1.9260140657424927, "learning_rate": 9.496180694324697e-06, "loss": 0.8785, "step": 4722 }, { "epoch": 0.8492313224849412, "grad_norm": 1.5316423177719116, "learning_rate": 9.495925874739852e-06, "loss": 0.7765, "step": 4723 }, { "epoch": 0.8494111300908028, "grad_norm": 1.4715147018432617, "learning_rate": 9.495670994151194e-06, "loss": 0.7923, "step": 4724 }, { "epoch": 0.8495909376966646, "grad_norm": 1.2317607402801514, "learning_rate": 9.49541605256218e-06, "loss": 1.0177, "step": 4725 }, { "epoch": 0.8497707453025263, "grad_norm": 1.444812536239624, "learning_rate": 9.495161049976267e-06, "loss": 0.8015, "step": 4726 }, { "epoch": 0.8499505529083881, "grad_norm": 1.1209044456481934, "learning_rate": 9.494905986396918e-06, "loss": 1.0322, "step": 4727 }, { "epoch": 0.8501303605142497, "grad_norm": 1.596050500869751, "learning_rate": 9.494650861827593e-06, "loss": 0.8098, "step": 4728 }, { "epoch": 0.8503101681201115, "grad_norm": 1.0965659618377686, "learning_rate": 9.494395676271752e-06, "loss": 1.0028, "step": 4729 }, { "epoch": 0.8504899757259732, "grad_norm": 1.4914584159851074, "learning_rate": 9.49414042973286e-06, "loss": 0.7253, "step": 4730 }, { "epoch": 0.850669783331835, "grad_norm": 1.4787499904632568, "learning_rate": 9.493885122214379e-06, "loss": 0.7269, "step": 4731 }, { "epoch": 0.8508495909376966, "grad_norm": 1.4800746440887451, "learning_rate": 9.493629753719774e-06, "loss": 0.8139, "step": 4732 }, { "epoch": 0.8510293985435584, "grad_norm": 1.4505867958068848, "learning_rate": 9.493374324252508e-06, "loss": 0.6965, "step": 4733 }, { "epoch": 0.8512092061494201, "grad_norm": 1.5658198595046997, "learning_rate": 9.49311883381605e-06, "loss": 0.8592, "step": 4734 }, { "epoch": 0.8513890137552819, "grad_norm": 1.4363031387329102, "learning_rate": 9.492863282413865e-06, "loss": 0.7614, "step": 4735 }, { "epoch": 0.8515688213611435, "grad_norm": 1.4705390930175781, "learning_rate": 9.49260767004942e-06, "loss": 0.867, "step": 4736 }, { "epoch": 0.8517486289670053, "grad_norm": 1.5581986904144287, "learning_rate": 9.492351996726183e-06, "loss": 0.8414, "step": 4737 }, { "epoch": 0.851928436572867, "grad_norm": 1.4512195587158203, "learning_rate": 9.492096262447625e-06, "loss": 0.8612, "step": 4738 }, { "epoch": 0.8521082441787288, "grad_norm": 1.4916208982467651, "learning_rate": 9.491840467217215e-06, "loss": 0.7965, "step": 4739 }, { "epoch": 0.8522880517845904, "grad_norm": 1.4837064743041992, "learning_rate": 9.491584611038423e-06, "loss": 0.8271, "step": 4740 }, { "epoch": 0.8524678593904522, "grad_norm": 1.5338810682296753, "learning_rate": 9.491328693914723e-06, "loss": 0.8513, "step": 4741 }, { "epoch": 0.8526476669963139, "grad_norm": 1.2592424154281616, "learning_rate": 9.491072715849585e-06, "loss": 0.9686, "step": 4742 }, { "epoch": 0.8528274746021757, "grad_norm": 1.543633222579956, "learning_rate": 9.490816676846482e-06, "loss": 0.8425, "step": 4743 }, { "epoch": 0.8530072822080375, "grad_norm": 1.0783456563949585, "learning_rate": 9.49056057690889e-06, "loss": 1.0509, "step": 4744 }, { "epoch": 0.8531870898138991, "grad_norm": 1.549782633781433, "learning_rate": 9.490304416040284e-06, "loss": 0.7943, "step": 4745 }, { "epoch": 0.8533668974197609, "grad_norm": 1.4342007637023926, "learning_rate": 9.490048194244139e-06, "loss": 0.7908, "step": 4746 }, { "epoch": 0.8535467050256226, "grad_norm": 1.4835509061813354, "learning_rate": 9.489791911523929e-06, "loss": 0.76, "step": 4747 }, { "epoch": 0.8537265126314844, "grad_norm": 1.6968473196029663, "learning_rate": 9.489535567883136e-06, "loss": 0.841, "step": 4748 }, { "epoch": 0.853906320237346, "grad_norm": 1.492108702659607, "learning_rate": 9.489279163325237e-06, "loss": 0.8145, "step": 4749 }, { "epoch": 0.8540861278432078, "grad_norm": 1.3759979009628296, "learning_rate": 9.48902269785371e-06, "loss": 0.726, "step": 4750 }, { "epoch": 0.8542659354490695, "grad_norm": 1.4331070184707642, "learning_rate": 9.488766171472034e-06, "loss": 0.7882, "step": 4751 }, { "epoch": 0.8544457430549313, "grad_norm": 1.4747772216796875, "learning_rate": 9.488509584183691e-06, "loss": 0.8513, "step": 4752 }, { "epoch": 0.8546255506607929, "grad_norm": 1.4280459880828857, "learning_rate": 9.488252935992163e-06, "loss": 0.8303, "step": 4753 }, { "epoch": 0.8548053582666547, "grad_norm": 1.4923015832901, "learning_rate": 9.487996226900931e-06, "loss": 0.7882, "step": 4754 }, { "epoch": 0.8549851658725164, "grad_norm": 1.5378597974777222, "learning_rate": 9.48773945691348e-06, "loss": 0.7361, "step": 4755 }, { "epoch": 0.8551649734783782, "grad_norm": 1.2966725826263428, "learning_rate": 9.487482626033294e-06, "loss": 1.0416, "step": 4756 }, { "epoch": 0.8553447810842398, "grad_norm": 1.6382213830947876, "learning_rate": 9.487225734263856e-06, "loss": 0.8149, "step": 4757 }, { "epoch": 0.8555245886901016, "grad_norm": 1.4501186609268188, "learning_rate": 9.486968781608653e-06, "loss": 0.7961, "step": 4758 }, { "epoch": 0.8557043962959633, "grad_norm": 1.6126421689987183, "learning_rate": 9.48671176807117e-06, "loss": 0.8019, "step": 4759 }, { "epoch": 0.8558842039018251, "grad_norm": 1.4601049423217773, "learning_rate": 9.486454693654897e-06, "loss": 0.8112, "step": 4760 }, { "epoch": 0.8560640115076867, "grad_norm": 1.412331223487854, "learning_rate": 9.486197558363318e-06, "loss": 0.7797, "step": 4761 }, { "epoch": 0.8562438191135485, "grad_norm": 1.4747018814086914, "learning_rate": 9.485940362199927e-06, "loss": 0.8257, "step": 4762 }, { "epoch": 0.8564236267194102, "grad_norm": 1.024909257888794, "learning_rate": 9.485683105168212e-06, "loss": 1.0602, "step": 4763 }, { "epoch": 0.856603434325272, "grad_norm": 1.3951696157455444, "learning_rate": 9.485425787271663e-06, "loss": 0.7714, "step": 4764 }, { "epoch": 0.8567832419311336, "grad_norm": 1.4494869709014893, "learning_rate": 9.48516840851377e-06, "loss": 0.8029, "step": 4765 }, { "epoch": 0.8569630495369954, "grad_norm": 1.1347081661224365, "learning_rate": 9.484910968898027e-06, "loss": 1.0182, "step": 4766 }, { "epoch": 0.8571428571428571, "grad_norm": 1.1101051568984985, "learning_rate": 9.484653468427926e-06, "loss": 1.0355, "step": 4767 }, { "epoch": 0.8573226647487189, "grad_norm": 1.5306233167648315, "learning_rate": 9.484395907106965e-06, "loss": 0.885, "step": 4768 }, { "epoch": 0.8575024723545805, "grad_norm": 1.739035725593567, "learning_rate": 9.484138284938633e-06, "loss": 0.7959, "step": 4769 }, { "epoch": 0.8576822799604423, "grad_norm": 1.5150938034057617, "learning_rate": 9.48388060192643e-06, "loss": 0.8156, "step": 4770 }, { "epoch": 0.8578620875663041, "grad_norm": 1.0650852918624878, "learning_rate": 9.483622858073851e-06, "loss": 1.0679, "step": 4771 }, { "epoch": 0.8580418951721658, "grad_norm": 1.168430209159851, "learning_rate": 9.483365053384391e-06, "loss": 0.9825, "step": 4772 }, { "epoch": 0.8582217027780276, "grad_norm": 1.3841443061828613, "learning_rate": 9.483107187861552e-06, "loss": 0.7948, "step": 4773 }, { "epoch": 0.8584015103838892, "grad_norm": 1.4784632921218872, "learning_rate": 9.482849261508828e-06, "loss": 0.8087, "step": 4774 }, { "epoch": 0.858581317989751, "grad_norm": 2.1182913780212402, "learning_rate": 9.482591274329724e-06, "loss": 0.7926, "step": 4775 }, { "epoch": 0.8587611255956127, "grad_norm": 1.4695041179656982, "learning_rate": 9.482333226327738e-06, "loss": 0.8285, "step": 4776 }, { "epoch": 0.8589409332014745, "grad_norm": 2.0132744312286377, "learning_rate": 9.48207511750637e-06, "loss": 0.8466, "step": 4777 }, { "epoch": 0.8591207408073361, "grad_norm": 1.4114259481430054, "learning_rate": 9.481816947869126e-06, "loss": 0.8216, "step": 4778 }, { "epoch": 0.8593005484131979, "grad_norm": 1.2015447616577148, "learning_rate": 9.481558717419506e-06, "loss": 1.0184, "step": 4779 }, { "epoch": 0.8594803560190596, "grad_norm": 1.5798442363739014, "learning_rate": 9.481300426161016e-06, "loss": 0.8252, "step": 4780 }, { "epoch": 0.8596601636249214, "grad_norm": 1.5662212371826172, "learning_rate": 9.481042074097156e-06, "loss": 0.7554, "step": 4781 }, { "epoch": 0.859839971230783, "grad_norm": 1.8148833513259888, "learning_rate": 9.480783661231436e-06, "loss": 0.769, "step": 4782 }, { "epoch": 0.8600197788366448, "grad_norm": 1.5574424266815186, "learning_rate": 9.480525187567362e-06, "loss": 0.839, "step": 4783 }, { "epoch": 0.8601995864425065, "grad_norm": 1.4488495588302612, "learning_rate": 9.48026665310844e-06, "loss": 0.8283, "step": 4784 }, { "epoch": 0.8603793940483683, "grad_norm": 1.5299148559570312, "learning_rate": 9.480008057858179e-06, "loss": 0.8288, "step": 4785 }, { "epoch": 0.8605592016542299, "grad_norm": 1.5322327613830566, "learning_rate": 9.479749401820085e-06, "loss": 0.8383, "step": 4786 }, { "epoch": 0.8607390092600917, "grad_norm": 1.4721593856811523, "learning_rate": 9.479490684997673e-06, "loss": 0.7967, "step": 4787 }, { "epoch": 0.8609188168659534, "grad_norm": 1.437270164489746, "learning_rate": 9.479231907394447e-06, "loss": 0.8085, "step": 4788 }, { "epoch": 0.8610986244718152, "grad_norm": 1.4771713018417358, "learning_rate": 9.478973069013922e-06, "loss": 0.7741, "step": 4789 }, { "epoch": 0.8612784320776768, "grad_norm": 1.4247263669967651, "learning_rate": 9.47871416985961e-06, "loss": 0.7105, "step": 4790 }, { "epoch": 0.8614582396835386, "grad_norm": 1.4071322679519653, "learning_rate": 9.478455209935023e-06, "loss": 0.8009, "step": 4791 }, { "epoch": 0.8616380472894003, "grad_norm": 1.538465976715088, "learning_rate": 9.478196189243675e-06, "loss": 0.7595, "step": 4792 }, { "epoch": 0.8618178548952621, "grad_norm": 1.46634840965271, "learning_rate": 9.477937107789082e-06, "loss": 0.8553, "step": 4793 }, { "epoch": 0.8619976625011238, "grad_norm": 1.5542404651641846, "learning_rate": 9.477677965574757e-06, "loss": 0.7495, "step": 4794 }, { "epoch": 0.8621774701069855, "grad_norm": 1.588809847831726, "learning_rate": 9.477418762604216e-06, "loss": 0.7992, "step": 4795 }, { "epoch": 0.8623572777128472, "grad_norm": 1.476873755455017, "learning_rate": 9.477159498880979e-06, "loss": 0.7907, "step": 4796 }, { "epoch": 0.862537085318709, "grad_norm": 1.505225658416748, "learning_rate": 9.476900174408562e-06, "loss": 0.7989, "step": 4797 }, { "epoch": 0.8627168929245707, "grad_norm": 1.1241689920425415, "learning_rate": 9.476640789190485e-06, "loss": 1.0443, "step": 4798 }, { "epoch": 0.8628967005304324, "grad_norm": 1.5493125915527344, "learning_rate": 9.476381343230265e-06, "loss": 0.821, "step": 4799 }, { "epoch": 0.8630765081362942, "grad_norm": 1.4636551141738892, "learning_rate": 9.476121836531424e-06, "loss": 0.7616, "step": 4800 }, { "epoch": 0.8632563157421559, "grad_norm": 1.5136648416519165, "learning_rate": 9.475862269097483e-06, "loss": 0.7648, "step": 4801 }, { "epoch": 0.8634361233480177, "grad_norm": 1.545785665512085, "learning_rate": 9.475602640931964e-06, "loss": 0.8719, "step": 4802 }, { "epoch": 0.8636159309538793, "grad_norm": 1.3699753284454346, "learning_rate": 9.47534295203839e-06, "loss": 0.7288, "step": 4803 }, { "epoch": 0.8637957385597411, "grad_norm": 1.532967209815979, "learning_rate": 9.475083202420285e-06, "loss": 0.813, "step": 4804 }, { "epoch": 0.8639755461656028, "grad_norm": 1.5547925233840942, "learning_rate": 9.47482339208117e-06, "loss": 0.7786, "step": 4805 }, { "epoch": 0.8641553537714646, "grad_norm": 1.4544427394866943, "learning_rate": 9.474563521024578e-06, "loss": 0.7806, "step": 4806 }, { "epoch": 0.8643351613773262, "grad_norm": 1.6555811166763306, "learning_rate": 9.474303589254026e-06, "loss": 0.7967, "step": 4807 }, { "epoch": 0.864514968983188, "grad_norm": 1.4523322582244873, "learning_rate": 9.474043596773048e-06, "loss": 0.8287, "step": 4808 }, { "epoch": 0.8646947765890497, "grad_norm": 1.4548453092575073, "learning_rate": 9.473783543585167e-06, "loss": 0.7465, "step": 4809 }, { "epoch": 0.8648745841949115, "grad_norm": 1.5515263080596924, "learning_rate": 9.473523429693915e-06, "loss": 0.814, "step": 4810 }, { "epoch": 0.8650543918007731, "grad_norm": 1.5015981197357178, "learning_rate": 9.473263255102819e-06, "loss": 0.8121, "step": 4811 }, { "epoch": 0.8652341994066349, "grad_norm": 1.5532110929489136, "learning_rate": 9.47300301981541e-06, "loss": 0.7762, "step": 4812 }, { "epoch": 0.8654140070124966, "grad_norm": 1.5990227460861206, "learning_rate": 9.47274272383522e-06, "loss": 0.8747, "step": 4813 }, { "epoch": 0.8655938146183584, "grad_norm": 1.58867347240448, "learning_rate": 9.47248236716578e-06, "loss": 0.8109, "step": 4814 }, { "epoch": 0.86577362222422, "grad_norm": 1.5321375131607056, "learning_rate": 9.472221949810622e-06, "loss": 0.781, "step": 4815 }, { "epoch": 0.8659534298300818, "grad_norm": 1.2000808715820312, "learning_rate": 9.47196147177328e-06, "loss": 1.0474, "step": 4816 }, { "epoch": 0.8661332374359435, "grad_norm": 1.3100295066833496, "learning_rate": 9.471700933057291e-06, "loss": 0.9949, "step": 4817 }, { "epoch": 0.8663130450418053, "grad_norm": 1.1335164308547974, "learning_rate": 9.471440333666185e-06, "loss": 0.9988, "step": 4818 }, { "epoch": 0.866492852647667, "grad_norm": 1.5723861455917358, "learning_rate": 9.471179673603503e-06, "loss": 0.8694, "step": 4819 }, { "epoch": 0.8666726602535287, "grad_norm": 1.4350836277008057, "learning_rate": 9.470918952872779e-06, "loss": 0.766, "step": 4820 }, { "epoch": 0.8668524678593904, "grad_norm": 1.5935750007629395, "learning_rate": 9.47065817147755e-06, "loss": 0.8498, "step": 4821 }, { "epoch": 0.8670322754652522, "grad_norm": 1.8240487575531006, "learning_rate": 9.470397329421357e-06, "loss": 0.7924, "step": 4822 }, { "epoch": 0.8672120830711139, "grad_norm": 1.6965103149414062, "learning_rate": 9.470136426707738e-06, "loss": 0.8024, "step": 4823 }, { "epoch": 0.8673918906769756, "grad_norm": 1.4187966585159302, "learning_rate": 9.469875463340233e-06, "loss": 0.7542, "step": 4824 }, { "epoch": 0.8675716982828373, "grad_norm": 1.5632860660552979, "learning_rate": 9.469614439322383e-06, "loss": 0.7786, "step": 4825 }, { "epoch": 0.8677515058886991, "grad_norm": 1.4717576503753662, "learning_rate": 9.469353354657729e-06, "loss": 0.7766, "step": 4826 }, { "epoch": 0.8679313134945609, "grad_norm": 1.5094801187515259, "learning_rate": 9.469092209349816e-06, "loss": 0.7998, "step": 4827 }, { "epoch": 0.8681111211004225, "grad_norm": 1.4798719882965088, "learning_rate": 9.468831003402184e-06, "loss": 0.779, "step": 4828 }, { "epoch": 0.8682909287062843, "grad_norm": 1.4589825868606567, "learning_rate": 9.46856973681838e-06, "loss": 0.751, "step": 4829 }, { "epoch": 0.868470736312146, "grad_norm": 1.5674482583999634, "learning_rate": 9.468308409601947e-06, "loss": 0.8204, "step": 4830 }, { "epoch": 0.8686505439180078, "grad_norm": 1.5424445867538452, "learning_rate": 9.468047021756433e-06, "loss": 0.868, "step": 4831 }, { "epoch": 0.8688303515238694, "grad_norm": 1.4777412414550781, "learning_rate": 9.467785573285383e-06, "loss": 0.8433, "step": 4832 }, { "epoch": 0.8690101591297312, "grad_norm": 1.555158257484436, "learning_rate": 9.467524064192346e-06, "loss": 1.0386, "step": 4833 }, { "epoch": 0.8691899667355929, "grad_norm": 1.360934853553772, "learning_rate": 9.46726249448087e-06, "loss": 1.0655, "step": 4834 }, { "epoch": 0.8693697743414547, "grad_norm": 1.5808839797973633, "learning_rate": 9.467000864154501e-06, "loss": 0.7788, "step": 4835 }, { "epoch": 0.8695495819473164, "grad_norm": 1.546069622039795, "learning_rate": 9.466739173216793e-06, "loss": 0.8423, "step": 4836 }, { "epoch": 0.8697293895531781, "grad_norm": 1.3851464986801147, "learning_rate": 9.466477421671296e-06, "loss": 0.8078, "step": 4837 }, { "epoch": 0.8699091971590398, "grad_norm": 1.5620698928833008, "learning_rate": 9.46621560952156e-06, "loss": 0.761, "step": 4838 }, { "epoch": 0.8700890047649016, "grad_norm": 1.4431191682815552, "learning_rate": 9.46595373677114e-06, "loss": 0.8221, "step": 4839 }, { "epoch": 0.8702688123707633, "grad_norm": 1.4492493867874146, "learning_rate": 9.465691803423587e-06, "loss": 0.7683, "step": 4840 }, { "epoch": 0.870448619976625, "grad_norm": 1.612499475479126, "learning_rate": 9.465429809482456e-06, "loss": 1.028, "step": 4841 }, { "epoch": 0.8706284275824867, "grad_norm": 1.5361970663070679, "learning_rate": 9.465167754951301e-06, "loss": 1.0216, "step": 4842 }, { "epoch": 0.8708082351883485, "grad_norm": 1.2858940362930298, "learning_rate": 9.464905639833677e-06, "loss": 1.0358, "step": 4843 }, { "epoch": 0.8709880427942102, "grad_norm": 2.1710877418518066, "learning_rate": 9.464643464133145e-06, "loss": 0.827, "step": 4844 }, { "epoch": 0.8711678504000719, "grad_norm": 1.4665095806121826, "learning_rate": 9.464381227853259e-06, "loss": 0.7925, "step": 4845 }, { "epoch": 0.8713476580059336, "grad_norm": 1.5063393115997314, "learning_rate": 9.464118930997577e-06, "loss": 1.0022, "step": 4846 }, { "epoch": 0.8715274656117954, "grad_norm": 1.6751232147216797, "learning_rate": 9.463856573569657e-06, "loss": 0.8348, "step": 4847 }, { "epoch": 0.8717072732176571, "grad_norm": 1.6980006694793701, "learning_rate": 9.463594155573063e-06, "loss": 0.8332, "step": 4848 }, { "epoch": 0.8718870808235188, "grad_norm": 1.5884907245635986, "learning_rate": 9.463331677011352e-06, "loss": 0.8614, "step": 4849 }, { "epoch": 0.8720668884293805, "grad_norm": 1.4665364027023315, "learning_rate": 9.463069137888086e-06, "loss": 0.7895, "step": 4850 }, { "epoch": 0.8722466960352423, "grad_norm": 1.5350004434585571, "learning_rate": 9.462806538206829e-06, "loss": 0.8727, "step": 4851 }, { "epoch": 0.872426503641104, "grad_norm": 1.526597499847412, "learning_rate": 9.462543877971143e-06, "loss": 0.8157, "step": 4852 }, { "epoch": 0.8726063112469657, "grad_norm": 1.4443378448486328, "learning_rate": 9.462281157184592e-06, "loss": 0.7737, "step": 4853 }, { "epoch": 0.8727861188528274, "grad_norm": 1.6446102857589722, "learning_rate": 9.462018375850741e-06, "loss": 0.8649, "step": 4854 }, { "epoch": 0.8729659264586892, "grad_norm": 1.5543646812438965, "learning_rate": 9.461755533973155e-06, "loss": 0.822, "step": 4855 }, { "epoch": 0.873145734064551, "grad_norm": 1.5335321426391602, "learning_rate": 9.4614926315554e-06, "loss": 0.7539, "step": 4856 }, { "epoch": 0.8733255416704127, "grad_norm": 1.6851465702056885, "learning_rate": 9.461229668601045e-06, "loss": 0.8131, "step": 4857 }, { "epoch": 0.8735053492762744, "grad_norm": 1.692925214767456, "learning_rate": 9.460966645113659e-06, "loss": 1.0045, "step": 4858 }, { "epoch": 0.8736851568821361, "grad_norm": 1.4875473976135254, "learning_rate": 9.460703561096805e-06, "loss": 0.7709, "step": 4859 }, { "epoch": 0.8738649644879979, "grad_norm": 1.558475136756897, "learning_rate": 9.46044041655406e-06, "loss": 0.8263, "step": 4860 }, { "epoch": 0.8740447720938596, "grad_norm": 1.4927070140838623, "learning_rate": 9.46017721148899e-06, "loss": 0.79, "step": 4861 }, { "epoch": 0.8742245796997213, "grad_norm": 1.5217283964157104, "learning_rate": 9.459913945905168e-06, "loss": 0.7747, "step": 4862 }, { "epoch": 0.874404387305583, "grad_norm": 1.227650761604309, "learning_rate": 9.459650619806164e-06, "loss": 0.9887, "step": 4863 }, { "epoch": 0.8745841949114448, "grad_norm": 1.4876627922058105, "learning_rate": 9.459387233195556e-06, "loss": 0.7519, "step": 4864 }, { "epoch": 0.8747640025173065, "grad_norm": 1.5357236862182617, "learning_rate": 9.459123786076911e-06, "loss": 0.743, "step": 4865 }, { "epoch": 0.8749438101231682, "grad_norm": 1.5553922653198242, "learning_rate": 9.45886027845381e-06, "loss": 0.7437, "step": 4866 }, { "epoch": 0.8751236177290299, "grad_norm": 1.8726946115493774, "learning_rate": 9.458596710329824e-06, "loss": 0.7742, "step": 4867 }, { "epoch": 0.8753034253348917, "grad_norm": 1.4099657535552979, "learning_rate": 9.458333081708533e-06, "loss": 0.742, "step": 4868 }, { "epoch": 0.8754832329407534, "grad_norm": 1.3752559423446655, "learning_rate": 9.45806939259351e-06, "loss": 0.7854, "step": 4869 }, { "epoch": 0.8756630405466151, "grad_norm": 1.689400553703308, "learning_rate": 9.457805642988336e-06, "loss": 0.809, "step": 4870 }, { "epoch": 0.8758428481524768, "grad_norm": 1.4976612329483032, "learning_rate": 9.457541832896588e-06, "loss": 0.764, "step": 4871 }, { "epoch": 0.8760226557583386, "grad_norm": 1.515519380569458, "learning_rate": 9.457277962321847e-06, "loss": 0.8643, "step": 4872 }, { "epoch": 0.8762024633642003, "grad_norm": 1.5723596811294556, "learning_rate": 9.457014031267692e-06, "loss": 0.8624, "step": 4873 }, { "epoch": 0.876382270970062, "grad_norm": 1.303154468536377, "learning_rate": 9.456750039737706e-06, "loss": 1.023, "step": 4874 }, { "epoch": 0.8765620785759237, "grad_norm": 1.7216514348983765, "learning_rate": 9.456485987735468e-06, "loss": 0.7906, "step": 4875 }, { "epoch": 0.8767418861817855, "grad_norm": 1.532609224319458, "learning_rate": 9.456221875264562e-06, "loss": 0.7607, "step": 4876 }, { "epoch": 0.8769216937876472, "grad_norm": 1.6174463033676147, "learning_rate": 9.455957702328576e-06, "loss": 0.8151, "step": 4877 }, { "epoch": 0.877101501393509, "grad_norm": 1.5245028734207153, "learning_rate": 9.455693468931086e-06, "loss": 0.8325, "step": 4878 }, { "epoch": 0.8772813089993706, "grad_norm": 1.4245812892913818, "learning_rate": 9.455429175075685e-06, "loss": 0.8131, "step": 4879 }, { "epoch": 0.8774611166052324, "grad_norm": 1.50757896900177, "learning_rate": 9.455164820765956e-06, "loss": 0.7041, "step": 4880 }, { "epoch": 0.8776409242110941, "grad_norm": 1.386338472366333, "learning_rate": 9.454900406005487e-06, "loss": 0.709, "step": 4881 }, { "epoch": 0.8778207318169559, "grad_norm": 2.3101065158843994, "learning_rate": 9.454635930797863e-06, "loss": 0.7664, "step": 4882 }, { "epoch": 0.8780005394228176, "grad_norm": 1.5211983919143677, "learning_rate": 9.454371395146677e-06, "loss": 0.7862, "step": 4883 }, { "epoch": 0.8781803470286793, "grad_norm": 1.5126399993896484, "learning_rate": 9.454106799055513e-06, "loss": 0.7689, "step": 4884 }, { "epoch": 0.8783601546345411, "grad_norm": 1.0781089067459106, "learning_rate": 9.453842142527966e-06, "loss": 0.9965, "step": 4885 }, { "epoch": 0.8785399622404028, "grad_norm": 1.0312448740005493, "learning_rate": 9.453577425567626e-06, "loss": 1.0544, "step": 4886 }, { "epoch": 0.8787197698462645, "grad_norm": 1.0195186138153076, "learning_rate": 9.453312648178081e-06, "loss": 1.0529, "step": 4887 }, { "epoch": 0.8788995774521262, "grad_norm": 0.9801411628723145, "learning_rate": 9.45304781036293e-06, "loss": 1.0345, "step": 4888 }, { "epoch": 0.879079385057988, "grad_norm": 1.598496913909912, "learning_rate": 9.45278291212576e-06, "loss": 0.8494, "step": 4889 }, { "epoch": 0.8792591926638497, "grad_norm": 1.6401399374008179, "learning_rate": 9.45251795347017e-06, "loss": 0.821, "step": 4890 }, { "epoch": 0.8794390002697114, "grad_norm": 1.533836007118225, "learning_rate": 9.452252934399755e-06, "loss": 0.8242, "step": 4891 }, { "epoch": 0.8796188078755731, "grad_norm": 1.2714773416519165, "learning_rate": 9.451987854918107e-06, "loss": 0.9859, "step": 4892 }, { "epoch": 0.8797986154814349, "grad_norm": 1.54738450050354, "learning_rate": 9.451722715028829e-06, "loss": 0.8025, "step": 4893 }, { "epoch": 0.8799784230872966, "grad_norm": 1.5204397439956665, "learning_rate": 9.451457514735513e-06, "loss": 0.7987, "step": 4894 }, { "epoch": 0.8801582306931583, "grad_norm": 1.1119036674499512, "learning_rate": 9.451192254041759e-06, "loss": 1.0621, "step": 4895 }, { "epoch": 0.88033803829902, "grad_norm": 1.469238519668579, "learning_rate": 9.450926932951166e-06, "loss": 0.7943, "step": 4896 }, { "epoch": 0.8805178459048818, "grad_norm": 1.4936633110046387, "learning_rate": 9.450661551467337e-06, "loss": 0.7664, "step": 4897 }, { "epoch": 0.8806976535107435, "grad_norm": 1.5451332330703735, "learning_rate": 9.450396109593869e-06, "loss": 0.7952, "step": 4898 }, { "epoch": 0.8808774611166053, "grad_norm": 1.5288515090942383, "learning_rate": 9.450130607334366e-06, "loss": 0.7617, "step": 4899 }, { "epoch": 0.8810572687224669, "grad_norm": 1.5904958248138428, "learning_rate": 9.44986504469243e-06, "loss": 0.813, "step": 4900 }, { "epoch": 0.8812370763283287, "grad_norm": 1.4625988006591797, "learning_rate": 9.449599421671664e-06, "loss": 0.7864, "step": 4901 }, { "epoch": 0.8814168839341904, "grad_norm": 1.4893211126327515, "learning_rate": 9.449333738275672e-06, "loss": 0.8041, "step": 4902 }, { "epoch": 0.8815966915400522, "grad_norm": 1.4213378429412842, "learning_rate": 9.449067994508058e-06, "loss": 0.8114, "step": 4903 }, { "epoch": 0.8817764991459138, "grad_norm": 1.4873967170715332, "learning_rate": 9.44880219037243e-06, "loss": 0.7848, "step": 4904 }, { "epoch": 0.8819563067517756, "grad_norm": 1.481550693511963, "learning_rate": 9.448536325872395e-06, "loss": 0.8612, "step": 4905 }, { "epoch": 0.8821361143576373, "grad_norm": 1.1166808605194092, "learning_rate": 9.448270401011559e-06, "loss": 1.0731, "step": 4906 }, { "epoch": 0.8823159219634991, "grad_norm": 1.380664348602295, "learning_rate": 9.44800441579353e-06, "loss": 0.714, "step": 4907 }, { "epoch": 0.8824957295693607, "grad_norm": 1.548128604888916, "learning_rate": 9.447738370221918e-06, "loss": 0.8393, "step": 4908 }, { "epoch": 0.8826755371752225, "grad_norm": 1.4302412271499634, "learning_rate": 9.44747226430033e-06, "loss": 0.7381, "step": 4909 }, { "epoch": 0.8828553447810843, "grad_norm": 1.5356941223144531, "learning_rate": 9.447206098032383e-06, "loss": 0.7432, "step": 4910 }, { "epoch": 0.883035152386946, "grad_norm": 1.4527058601379395, "learning_rate": 9.446939871421681e-06, "loss": 0.7691, "step": 4911 }, { "epoch": 0.8832149599928077, "grad_norm": 1.5258033275604248, "learning_rate": 9.446673584471841e-06, "loss": 0.7485, "step": 4912 }, { "epoch": 0.8833947675986694, "grad_norm": 1.5501693487167358, "learning_rate": 9.446407237186475e-06, "loss": 0.8388, "step": 4913 }, { "epoch": 0.8835745752045312, "grad_norm": 1.5017356872558594, "learning_rate": 9.446140829569198e-06, "loss": 0.8134, "step": 4914 }, { "epoch": 0.8837543828103929, "grad_norm": 1.4103807210922241, "learning_rate": 9.445874361623623e-06, "loss": 0.7881, "step": 4915 }, { "epoch": 0.8839341904162547, "grad_norm": 1.4725176095962524, "learning_rate": 9.445607833353368e-06, "loss": 0.8351, "step": 4916 }, { "epoch": 0.8841139980221163, "grad_norm": 1.6087779998779297, "learning_rate": 9.445341244762045e-06, "loss": 0.8337, "step": 4917 }, { "epoch": 0.8842938056279781, "grad_norm": 1.506746530532837, "learning_rate": 9.445074595853276e-06, "loss": 0.7488, "step": 4918 }, { "epoch": 0.8844736132338398, "grad_norm": 1.0505965948104858, "learning_rate": 9.444807886630678e-06, "loss": 1.1088, "step": 4919 }, { "epoch": 0.8846534208397016, "grad_norm": 1.0227065086364746, "learning_rate": 9.444541117097868e-06, "loss": 0.9988, "step": 4920 }, { "epoch": 0.8848332284455632, "grad_norm": 1.0770868062973022, "learning_rate": 9.444274287258469e-06, "loss": 1.0348, "step": 4921 }, { "epoch": 0.885013036051425, "grad_norm": 1.5776374340057373, "learning_rate": 9.444007397116095e-06, "loss": 0.8109, "step": 4922 }, { "epoch": 0.8851928436572867, "grad_norm": 1.5094382762908936, "learning_rate": 9.443740446674377e-06, "loss": 0.7891, "step": 4923 }, { "epoch": 0.8853726512631485, "grad_norm": 1.4603886604309082, "learning_rate": 9.44347343593693e-06, "loss": 0.8157, "step": 4924 }, { "epoch": 0.8855524588690101, "grad_norm": 1.4614754915237427, "learning_rate": 9.443206364907375e-06, "loss": 0.747, "step": 4925 }, { "epoch": 0.8857322664748719, "grad_norm": 1.50273597240448, "learning_rate": 9.442939233589346e-06, "loss": 0.8236, "step": 4926 }, { "epoch": 0.8859120740807336, "grad_norm": 1.211696982383728, "learning_rate": 9.442672041986456e-06, "loss": 1.0305, "step": 4927 }, { "epoch": 0.8860918816865954, "grad_norm": 1.419123888015747, "learning_rate": 9.44240479010234e-06, "loss": 0.7819, "step": 4928 }, { "epoch": 0.886271689292457, "grad_norm": 1.6028755903244019, "learning_rate": 9.442137477940617e-06, "loss": 0.7615, "step": 4929 }, { "epoch": 0.8864514968983188, "grad_norm": 1.5211751461029053, "learning_rate": 9.44187010550492e-06, "loss": 0.7966, "step": 4930 }, { "epoch": 0.8866313045041805, "grad_norm": 1.6033810377120972, "learning_rate": 9.441602672798871e-06, "loss": 0.9225, "step": 4931 }, { "epoch": 0.8868111121100423, "grad_norm": 1.4567378759384155, "learning_rate": 9.441335179826104e-06, "loss": 0.7326, "step": 4932 }, { "epoch": 0.8869909197159039, "grad_norm": 1.4947305917739868, "learning_rate": 9.441067626590244e-06, "loss": 1.0094, "step": 4933 }, { "epoch": 0.8871707273217657, "grad_norm": 1.5662206411361694, "learning_rate": 9.440800013094926e-06, "loss": 0.8007, "step": 4934 }, { "epoch": 0.8873505349276274, "grad_norm": 1.5403687953948975, "learning_rate": 9.440532339343777e-06, "loss": 0.7647, "step": 4935 }, { "epoch": 0.8875303425334892, "grad_norm": 1.1438363790512085, "learning_rate": 9.44026460534043e-06, "loss": 1.0233, "step": 4936 }, { "epoch": 0.8877101501393508, "grad_norm": 1.4619438648223877, "learning_rate": 9.439996811088522e-06, "loss": 0.856, "step": 4937 }, { "epoch": 0.8878899577452126, "grad_norm": 1.3628541231155396, "learning_rate": 9.43972895659168e-06, "loss": 0.7754, "step": 4938 }, { "epoch": 0.8880697653510744, "grad_norm": 1.4531751871109009, "learning_rate": 9.439461041853545e-06, "loss": 0.7997, "step": 4939 }, { "epoch": 0.8882495729569361, "grad_norm": 1.6807769536972046, "learning_rate": 9.439193066877746e-06, "loss": 0.7758, "step": 4940 }, { "epoch": 0.8884293805627979, "grad_norm": 1.547478199005127, "learning_rate": 9.43892503166792e-06, "loss": 0.7566, "step": 4941 }, { "epoch": 0.8886091881686595, "grad_norm": 1.5418468713760376, "learning_rate": 9.43865693622771e-06, "loss": 0.8414, "step": 4942 }, { "epoch": 0.8887889957745213, "grad_norm": 1.4266550540924072, "learning_rate": 9.438388780560747e-06, "loss": 0.808, "step": 4943 }, { "epoch": 0.888968803380383, "grad_norm": 1.3740637302398682, "learning_rate": 9.438120564670672e-06, "loss": 0.806, "step": 4944 }, { "epoch": 0.8891486109862448, "grad_norm": 1.5804153680801392, "learning_rate": 9.437852288561125e-06, "loss": 0.7656, "step": 4945 }, { "epoch": 0.8893284185921064, "grad_norm": 1.5152982473373413, "learning_rate": 9.437583952235747e-06, "loss": 0.8253, "step": 4946 }, { "epoch": 0.8895082261979682, "grad_norm": 1.9273169040679932, "learning_rate": 9.437315555698175e-06, "loss": 0.8394, "step": 4947 }, { "epoch": 0.8896880338038299, "grad_norm": 1.1930562257766724, "learning_rate": 9.437047098952054e-06, "loss": 1.0047, "step": 4948 }, { "epoch": 0.8898678414096917, "grad_norm": 1.4212086200714111, "learning_rate": 9.436778582001028e-06, "loss": 0.8391, "step": 4949 }, { "epoch": 0.8900476490155533, "grad_norm": 1.5743324756622314, "learning_rate": 9.436510004848736e-06, "loss": 0.8314, "step": 4950 }, { "epoch": 0.8902274566214151, "grad_norm": 1.4352772235870361, "learning_rate": 9.436241367498824e-06, "loss": 0.809, "step": 4951 }, { "epoch": 0.8904072642272768, "grad_norm": 1.0528550148010254, "learning_rate": 9.43597266995494e-06, "loss": 1.0741, "step": 4952 }, { "epoch": 0.8905870718331386, "grad_norm": 1.109467625617981, "learning_rate": 9.435703912220727e-06, "loss": 1.0047, "step": 4953 }, { "epoch": 0.8907668794390002, "grad_norm": 1.4717870950698853, "learning_rate": 9.43543509429983e-06, "loss": 0.7908, "step": 4954 }, { "epoch": 0.890946687044862, "grad_norm": 1.9859378337860107, "learning_rate": 9.4351662161959e-06, "loss": 0.7982, "step": 4955 }, { "epoch": 0.8911264946507237, "grad_norm": 1.4518814086914062, "learning_rate": 9.434897277912584e-06, "loss": 0.7617, "step": 4956 }, { "epoch": 0.8913063022565855, "grad_norm": 1.630049467086792, "learning_rate": 9.434628279453531e-06, "loss": 0.8251, "step": 4957 }, { "epoch": 0.8914861098624471, "grad_norm": 1.4946800470352173, "learning_rate": 9.43435922082239e-06, "loss": 0.7763, "step": 4958 }, { "epoch": 0.8916659174683089, "grad_norm": 1.2597384452819824, "learning_rate": 9.434090102022816e-06, "loss": 0.9962, "step": 4959 }, { "epoch": 0.8918457250741706, "grad_norm": 1.4550422430038452, "learning_rate": 9.433820923058455e-06, "loss": 0.8229, "step": 4960 }, { "epoch": 0.8920255326800324, "grad_norm": 3.5705783367156982, "learning_rate": 9.433551683932962e-06, "loss": 0.84, "step": 4961 }, { "epoch": 0.892205340285894, "grad_norm": 1.5047974586486816, "learning_rate": 9.433282384649991e-06, "loss": 0.7775, "step": 4962 }, { "epoch": 0.8923851478917558, "grad_norm": 1.5887138843536377, "learning_rate": 9.433013025213194e-06, "loss": 0.8024, "step": 4963 }, { "epoch": 0.8925649554976175, "grad_norm": 1.4463310241699219, "learning_rate": 9.432743605626228e-06, "loss": 0.8116, "step": 4964 }, { "epoch": 0.8927447631034793, "grad_norm": 1.414645791053772, "learning_rate": 9.432474125892747e-06, "loss": 0.7796, "step": 4965 }, { "epoch": 0.8929245707093411, "grad_norm": 1.257605791091919, "learning_rate": 9.432204586016407e-06, "loss": 1.0179, "step": 4966 }, { "epoch": 0.8931043783152027, "grad_norm": 1.0827717781066895, "learning_rate": 9.431934986000869e-06, "loss": 1.011, "step": 4967 }, { "epoch": 0.8932841859210645, "grad_norm": 1.6244893074035645, "learning_rate": 9.431665325849788e-06, "loss": 0.7707, "step": 4968 }, { "epoch": 0.8934639935269262, "grad_norm": 1.141173243522644, "learning_rate": 9.431395605566823e-06, "loss": 1.0076, "step": 4969 }, { "epoch": 0.893643801132788, "grad_norm": 1.486941933631897, "learning_rate": 9.431125825155633e-06, "loss": 0.883, "step": 4970 }, { "epoch": 0.8938236087386496, "grad_norm": 1.0566731691360474, "learning_rate": 9.43085598461988e-06, "loss": 1.0113, "step": 4971 }, { "epoch": 0.8940034163445114, "grad_norm": 1.5743610858917236, "learning_rate": 9.430586083963228e-06, "loss": 0.8649, "step": 4972 }, { "epoch": 0.8941832239503731, "grad_norm": 2.672884702682495, "learning_rate": 9.430316123189333e-06, "loss": 0.7681, "step": 4973 }, { "epoch": 0.8943630315562349, "grad_norm": 1.7700341939926147, "learning_rate": 9.430046102301861e-06, "loss": 0.787, "step": 4974 }, { "epoch": 0.8945428391620965, "grad_norm": 1.7103270292282104, "learning_rate": 9.42977602130448e-06, "loss": 0.7896, "step": 4975 }, { "epoch": 0.8947226467679583, "grad_norm": 1.4038941860198975, "learning_rate": 9.429505880200849e-06, "loss": 0.7629, "step": 4976 }, { "epoch": 0.89490245437382, "grad_norm": 1.4291133880615234, "learning_rate": 9.429235678994635e-06, "loss": 0.7907, "step": 4977 }, { "epoch": 0.8950822619796818, "grad_norm": 1.5724271535873413, "learning_rate": 9.428965417689504e-06, "loss": 0.7644, "step": 4978 }, { "epoch": 0.8952620695855434, "grad_norm": 1.4939767122268677, "learning_rate": 9.428695096289125e-06, "loss": 0.7341, "step": 4979 }, { "epoch": 0.8954418771914052, "grad_norm": 1.4309900999069214, "learning_rate": 9.428424714797164e-06, "loss": 0.7958, "step": 4980 }, { "epoch": 0.8956216847972669, "grad_norm": 1.484302043914795, "learning_rate": 9.428154273217289e-06, "loss": 0.8029, "step": 4981 }, { "epoch": 0.8958014924031287, "grad_norm": 1.5265486240386963, "learning_rate": 9.427883771553172e-06, "loss": 0.7528, "step": 4982 }, { "epoch": 0.8959813000089903, "grad_norm": 1.170527458190918, "learning_rate": 9.427613209808482e-06, "loss": 1.0244, "step": 4983 }, { "epoch": 0.8961611076148521, "grad_norm": 1.522199273109436, "learning_rate": 9.427342587986892e-06, "loss": 0.7798, "step": 4984 }, { "epoch": 0.8963409152207138, "grad_norm": 1.4527654647827148, "learning_rate": 9.427071906092071e-06, "loss": 0.7711, "step": 4985 }, { "epoch": 0.8965207228265756, "grad_norm": 1.0890135765075684, "learning_rate": 9.426801164127692e-06, "loss": 1.0314, "step": 4986 }, { "epoch": 0.8967005304324372, "grad_norm": 1.1439685821533203, "learning_rate": 9.426530362097433e-06, "loss": 1.0513, "step": 4987 }, { "epoch": 0.896880338038299, "grad_norm": 1.058232069015503, "learning_rate": 9.426259500004961e-06, "loss": 1.04, "step": 4988 }, { "epoch": 0.8970601456441607, "grad_norm": 1.4985085725784302, "learning_rate": 9.425988577853959e-06, "loss": 0.8063, "step": 4989 }, { "epoch": 0.8972399532500225, "grad_norm": 3.3908586502075195, "learning_rate": 9.425717595648099e-06, "loss": 0.8183, "step": 4990 }, { "epoch": 0.8974197608558842, "grad_norm": 1.6220691204071045, "learning_rate": 9.425446553391055e-06, "loss": 0.7778, "step": 4991 }, { "epoch": 0.8975995684617459, "grad_norm": 1.4926468133926392, "learning_rate": 9.425175451086513e-06, "loss": 0.8116, "step": 4992 }, { "epoch": 0.8977793760676077, "grad_norm": 1.5427669286727905, "learning_rate": 9.424904288738144e-06, "loss": 0.8945, "step": 4993 }, { "epoch": 0.8979591836734694, "grad_norm": 1.4080256223678589, "learning_rate": 9.424633066349629e-06, "loss": 0.7854, "step": 4994 }, { "epoch": 0.8981389912793312, "grad_norm": 1.4150761365890503, "learning_rate": 9.42436178392465e-06, "loss": 0.7965, "step": 4995 }, { "epoch": 0.8983187988851928, "grad_norm": 1.493156909942627, "learning_rate": 9.424090441466887e-06, "loss": 0.7907, "step": 4996 }, { "epoch": 0.8984986064910546, "grad_norm": 1.7350465059280396, "learning_rate": 9.423819038980022e-06, "loss": 0.8448, "step": 4997 }, { "epoch": 0.8986784140969163, "grad_norm": 1.4734959602355957, "learning_rate": 9.423547576467738e-06, "loss": 0.7874, "step": 4998 }, { "epoch": 0.8988582217027781, "grad_norm": 1.5065395832061768, "learning_rate": 9.423276053933716e-06, "loss": 0.7648, "step": 4999 }, { "epoch": 0.8990380293086397, "grad_norm": 1.4206337928771973, "learning_rate": 9.423004471381643e-06, "loss": 0.8236, "step": 5000 }, { "epoch": 0.8990380293086397, "eval_loss": 0.8222971558570862, "eval_runtime": 148.5664, "eval_samples_per_second": 96.805, "eval_steps_per_second": 1.514, "step": 5000 }, { "epoch": 0.8992178369145015, "grad_norm": 1.4380314350128174, "learning_rate": 9.422732828815203e-06, "loss": 0.7332, "step": 5001 }, { "epoch": 0.8993976445203632, "grad_norm": 1.5353572368621826, "learning_rate": 9.42246112623808e-06, "loss": 0.8145, "step": 5002 }, { "epoch": 0.899577452126225, "grad_norm": 1.5737299919128418, "learning_rate": 9.422189363653964e-06, "loss": 0.846, "step": 5003 }, { "epoch": 0.8997572597320866, "grad_norm": 1.514419436454773, "learning_rate": 9.421917541066539e-06, "loss": 0.7287, "step": 5004 }, { "epoch": 0.8999370673379484, "grad_norm": 1.5714675188064575, "learning_rate": 9.421645658479498e-06, "loss": 0.8387, "step": 5005 }, { "epoch": 0.9001168749438101, "grad_norm": 1.5765331983566284, "learning_rate": 9.421373715896527e-06, "loss": 0.86, "step": 5006 }, { "epoch": 0.9002966825496719, "grad_norm": 1.648154616355896, "learning_rate": 9.421101713321314e-06, "loss": 0.8313, "step": 5007 }, { "epoch": 0.9004764901555335, "grad_norm": 1.290747880935669, "learning_rate": 9.420829650757552e-06, "loss": 0.9963, "step": 5008 }, { "epoch": 0.9006562977613953, "grad_norm": 1.4874649047851562, "learning_rate": 9.420557528208933e-06, "loss": 0.7386, "step": 5009 }, { "epoch": 0.900836105367257, "grad_norm": 1.612047791481018, "learning_rate": 9.42028534567915e-06, "loss": 0.8118, "step": 5010 }, { "epoch": 0.9010159129731188, "grad_norm": 1.6266807317733765, "learning_rate": 9.420013103171893e-06, "loss": 0.7957, "step": 5011 }, { "epoch": 0.9011957205789805, "grad_norm": 1.5886543989181519, "learning_rate": 9.419740800690858e-06, "loss": 0.7549, "step": 5012 }, { "epoch": 0.9013755281848422, "grad_norm": 1.4109641313552856, "learning_rate": 9.41946843823974e-06, "loss": 0.8385, "step": 5013 }, { "epoch": 0.9015553357907039, "grad_norm": 1.4762169122695923, "learning_rate": 9.419196015822235e-06, "loss": 0.8492, "step": 5014 }, { "epoch": 0.9017351433965657, "grad_norm": 1.1062211990356445, "learning_rate": 9.418923533442038e-06, "loss": 1.0142, "step": 5015 }, { "epoch": 0.9019149510024274, "grad_norm": 1.460412621498108, "learning_rate": 9.418650991102847e-06, "loss": 0.7925, "step": 5016 }, { "epoch": 0.9020947586082891, "grad_norm": 1.5884506702423096, "learning_rate": 9.41837838880836e-06, "loss": 0.8401, "step": 5017 }, { "epoch": 0.9022745662141508, "grad_norm": 1.51798415184021, "learning_rate": 9.418105726562276e-06, "loss": 0.7458, "step": 5018 }, { "epoch": 0.9024543738200126, "grad_norm": 1.405206322669983, "learning_rate": 9.417833004368295e-06, "loss": 0.7389, "step": 5019 }, { "epoch": 0.9026341814258743, "grad_norm": 1.377221941947937, "learning_rate": 9.417560222230115e-06, "loss": 0.7534, "step": 5020 }, { "epoch": 0.902813989031736, "grad_norm": 1.4881842136383057, "learning_rate": 9.417287380151441e-06, "loss": 0.7981, "step": 5021 }, { "epoch": 0.9029937966375978, "grad_norm": 1.139966607093811, "learning_rate": 9.417014478135973e-06, "loss": 0.9737, "step": 5022 }, { "epoch": 0.9031736042434595, "grad_norm": 1.4195433855056763, "learning_rate": 9.416741516187414e-06, "loss": 0.8282, "step": 5023 }, { "epoch": 0.9033534118493213, "grad_norm": 1.5823339223861694, "learning_rate": 9.416468494309468e-06, "loss": 0.8622, "step": 5024 }, { "epoch": 0.903533219455183, "grad_norm": 1.080438256263733, "learning_rate": 9.41619541250584e-06, "loss": 1.0219, "step": 5025 }, { "epoch": 0.9037130270610447, "grad_norm": 1.5047681331634521, "learning_rate": 9.415922270780234e-06, "loss": 0.8048, "step": 5026 }, { "epoch": 0.9038928346669064, "grad_norm": 1.3862308263778687, "learning_rate": 9.415649069136356e-06, "loss": 0.7923, "step": 5027 }, { "epoch": 0.9040726422727682, "grad_norm": 1.4931395053863525, "learning_rate": 9.415375807577915e-06, "loss": 0.7903, "step": 5028 }, { "epoch": 0.9042524498786298, "grad_norm": 1.4487100839614868, "learning_rate": 9.41510248610862e-06, "loss": 0.7985, "step": 5029 }, { "epoch": 0.9044322574844916, "grad_norm": 1.0491769313812256, "learning_rate": 9.414829104732174e-06, "loss": 1.0474, "step": 5030 }, { "epoch": 0.9046120650903533, "grad_norm": 1.10770583152771, "learning_rate": 9.414555663452293e-06, "loss": 1.0472, "step": 5031 }, { "epoch": 0.9047918726962151, "grad_norm": 1.577890157699585, "learning_rate": 9.414282162272683e-06, "loss": 0.7533, "step": 5032 }, { "epoch": 0.9049716803020768, "grad_norm": 1.5551683902740479, "learning_rate": 9.414008601197056e-06, "loss": 0.7943, "step": 5033 }, { "epoch": 0.9051514879079385, "grad_norm": 1.430364727973938, "learning_rate": 9.413734980229123e-06, "loss": 0.9887, "step": 5034 }, { "epoch": 0.9053312955138002, "grad_norm": 1.444108247756958, "learning_rate": 9.4134612993726e-06, "loss": 0.7666, "step": 5035 }, { "epoch": 0.905511103119662, "grad_norm": 1.1056288480758667, "learning_rate": 9.413187558631198e-06, "loss": 1.0184, "step": 5036 }, { "epoch": 0.9056909107255237, "grad_norm": 1.5407027006149292, "learning_rate": 9.41291375800863e-06, "loss": 0.8607, "step": 5037 }, { "epoch": 0.9058707183313854, "grad_norm": 1.4209930896759033, "learning_rate": 9.412639897508613e-06, "loss": 0.8229, "step": 5038 }, { "epoch": 0.9060505259372471, "grad_norm": 1.5430734157562256, "learning_rate": 9.412365977134862e-06, "loss": 0.7554, "step": 5039 }, { "epoch": 0.9062303335431089, "grad_norm": 1.3901515007019043, "learning_rate": 9.412091996891097e-06, "loss": 0.7752, "step": 5040 }, { "epoch": 0.9064101411489706, "grad_norm": 1.4660435914993286, "learning_rate": 9.411817956781031e-06, "loss": 0.8073, "step": 5041 }, { "epoch": 0.9065899487548323, "grad_norm": 1.0854905843734741, "learning_rate": 9.411543856808384e-06, "loss": 0.9962, "step": 5042 }, { "epoch": 0.906769756360694, "grad_norm": 1.6543750762939453, "learning_rate": 9.411269696976876e-06, "loss": 0.7895, "step": 5043 }, { "epoch": 0.9069495639665558, "grad_norm": 1.5434178113937378, "learning_rate": 9.410995477290226e-06, "loss": 0.8061, "step": 5044 }, { "epoch": 0.9071293715724175, "grad_norm": 1.4061789512634277, "learning_rate": 9.410721197752154e-06, "loss": 0.7618, "step": 5045 }, { "epoch": 0.9073091791782792, "grad_norm": 1.513649821281433, "learning_rate": 9.410446858366385e-06, "loss": 0.728, "step": 5046 }, { "epoch": 0.9074889867841409, "grad_norm": 1.5337766408920288, "learning_rate": 9.410172459136639e-06, "loss": 0.7868, "step": 5047 }, { "epoch": 0.9076687943900027, "grad_norm": 1.4523080587387085, "learning_rate": 9.409898000066638e-06, "loss": 0.7943, "step": 5048 }, { "epoch": 0.9078486019958645, "grad_norm": 1.52362060546875, "learning_rate": 9.409623481160108e-06, "loss": 0.8439, "step": 5049 }, { "epoch": 0.9080284096017261, "grad_norm": 1.608469009399414, "learning_rate": 9.409348902420773e-06, "loss": 0.7984, "step": 5050 }, { "epoch": 0.9082082172075879, "grad_norm": 8.01246166229248, "learning_rate": 9.40907426385236e-06, "loss": 0.8223, "step": 5051 }, { "epoch": 0.9083880248134496, "grad_norm": 1.5544511079788208, "learning_rate": 9.408799565458595e-06, "loss": 0.8363, "step": 5052 }, { "epoch": 0.9085678324193114, "grad_norm": 1.5351781845092773, "learning_rate": 9.408524807243204e-06, "loss": 0.754, "step": 5053 }, { "epoch": 0.908747640025173, "grad_norm": 1.455367088317871, "learning_rate": 9.408249989209916e-06, "loss": 0.7828, "step": 5054 }, { "epoch": 0.9089274476310348, "grad_norm": 1.479666829109192, "learning_rate": 9.407975111362461e-06, "loss": 0.7943, "step": 5055 }, { "epoch": 0.9091072552368965, "grad_norm": 1.4296228885650635, "learning_rate": 9.407700173704566e-06, "loss": 0.7257, "step": 5056 }, { "epoch": 0.9092870628427583, "grad_norm": 1.5804457664489746, "learning_rate": 9.407425176239964e-06, "loss": 0.8257, "step": 5057 }, { "epoch": 0.90946687044862, "grad_norm": 1.104758381843567, "learning_rate": 9.407150118972386e-06, "loss": 1.0069, "step": 5058 }, { "epoch": 0.9096466780544817, "grad_norm": 1.451600193977356, "learning_rate": 9.406875001905563e-06, "loss": 0.8071, "step": 5059 }, { "epoch": 0.9098264856603434, "grad_norm": 1.5209804773330688, "learning_rate": 9.40659982504323e-06, "loss": 0.7799, "step": 5060 }, { "epoch": 0.9100062932662052, "grad_norm": 1.6221436262130737, "learning_rate": 9.406324588389117e-06, "loss": 0.7634, "step": 5061 }, { "epoch": 0.9101861008720669, "grad_norm": 1.5968841314315796, "learning_rate": 9.406049291946961e-06, "loss": 0.8846, "step": 5062 }, { "epoch": 0.9103659084779286, "grad_norm": 1.4584510326385498, "learning_rate": 9.405773935720499e-06, "loss": 0.7714, "step": 5063 }, { "epoch": 0.9105457160837903, "grad_norm": 1.5931679010391235, "learning_rate": 9.405498519713465e-06, "loss": 0.7886, "step": 5064 }, { "epoch": 0.9107255236896521, "grad_norm": 1.5196269750595093, "learning_rate": 9.405223043929597e-06, "loss": 0.7573, "step": 5065 }, { "epoch": 0.9109053312955138, "grad_norm": 1.4341652393341064, "learning_rate": 9.404947508372633e-06, "loss": 0.8125, "step": 5066 }, { "epoch": 0.9110851389013755, "grad_norm": 1.4431241750717163, "learning_rate": 9.40467191304631e-06, "loss": 0.7729, "step": 5067 }, { "epoch": 0.9112649465072372, "grad_norm": 1.473512053489685, "learning_rate": 9.40439625795437e-06, "loss": 0.769, "step": 5068 }, { "epoch": 0.911444754113099, "grad_norm": 1.0852359533309937, "learning_rate": 9.404120543100553e-06, "loss": 1.0707, "step": 5069 }, { "epoch": 0.9116245617189607, "grad_norm": 1.4767255783081055, "learning_rate": 9.403844768488595e-06, "loss": 0.7274, "step": 5070 }, { "epoch": 0.9118043693248225, "grad_norm": 1.7169439792633057, "learning_rate": 9.403568934122244e-06, "loss": 0.8075, "step": 5071 }, { "epoch": 0.9119841769306841, "grad_norm": 0.993739128112793, "learning_rate": 9.403293040005242e-06, "loss": 1.0358, "step": 5072 }, { "epoch": 0.9121639845365459, "grad_norm": 1.7921611070632935, "learning_rate": 9.40301708614133e-06, "loss": 0.81, "step": 5073 }, { "epoch": 0.9123437921424076, "grad_norm": 1.1698029041290283, "learning_rate": 9.402741072534253e-06, "loss": 1.0009, "step": 5074 }, { "epoch": 0.9125235997482694, "grad_norm": 1.573439359664917, "learning_rate": 9.402464999187758e-06, "loss": 0.8072, "step": 5075 }, { "epoch": 0.912703407354131, "grad_norm": 1.5506081581115723, "learning_rate": 9.402188866105588e-06, "loss": 0.7878, "step": 5076 }, { "epoch": 0.9128832149599928, "grad_norm": 0.965960681438446, "learning_rate": 9.401912673291494e-06, "loss": 1.0249, "step": 5077 }, { "epoch": 0.9130630225658546, "grad_norm": 1.5282543897628784, "learning_rate": 9.401636420749219e-06, "loss": 0.7619, "step": 5078 }, { "epoch": 0.9132428301717163, "grad_norm": 1.3597201108932495, "learning_rate": 9.401360108482513e-06, "loss": 0.7609, "step": 5079 }, { "epoch": 0.913422637777578, "grad_norm": 1.6446382999420166, "learning_rate": 9.401083736495125e-06, "loss": 0.7997, "step": 5080 }, { "epoch": 0.9136024453834397, "grad_norm": 1.6037688255310059, "learning_rate": 9.400807304790807e-06, "loss": 0.791, "step": 5081 }, { "epoch": 0.9137822529893015, "grad_norm": 1.5888334512710571, "learning_rate": 9.400530813373308e-06, "loss": 0.802, "step": 5082 }, { "epoch": 0.9139620605951632, "grad_norm": 1.4780842065811157, "learning_rate": 9.40025426224638e-06, "loss": 0.8766, "step": 5083 }, { "epoch": 0.9141418682010249, "grad_norm": 1.4775323867797852, "learning_rate": 9.399977651413775e-06, "loss": 0.7788, "step": 5084 }, { "epoch": 0.9143216758068866, "grad_norm": 1.577797293663025, "learning_rate": 9.399700980879246e-06, "loss": 0.7787, "step": 5085 }, { "epoch": 0.9145014834127484, "grad_norm": 1.4458776712417603, "learning_rate": 9.39942425064655e-06, "loss": 1.0326, "step": 5086 }, { "epoch": 0.9146812910186101, "grad_norm": 1.462699055671692, "learning_rate": 9.399147460719438e-06, "loss": 0.793, "step": 5087 }, { "epoch": 0.9148610986244718, "grad_norm": 1.5673062801361084, "learning_rate": 9.398870611101668e-06, "loss": 0.7746, "step": 5088 }, { "epoch": 0.9150409062303335, "grad_norm": 1.4571340084075928, "learning_rate": 9.398593701796993e-06, "loss": 0.7729, "step": 5089 }, { "epoch": 0.9152207138361953, "grad_norm": 1.0473713874816895, "learning_rate": 9.398316732809177e-06, "loss": 0.9937, "step": 5090 }, { "epoch": 0.915400521442057, "grad_norm": 1.4139474630355835, "learning_rate": 9.398039704141971e-06, "loss": 0.8695, "step": 5091 }, { "epoch": 0.9155803290479188, "grad_norm": 1.5905563831329346, "learning_rate": 9.397762615799137e-06, "loss": 0.7594, "step": 5092 }, { "epoch": 0.9157601366537804, "grad_norm": 1.0740511417388916, "learning_rate": 9.397485467784438e-06, "loss": 1.0154, "step": 5093 }, { "epoch": 0.9159399442596422, "grad_norm": 1.4100329875946045, "learning_rate": 9.397208260101628e-06, "loss": 0.756, "step": 5094 }, { "epoch": 0.9161197518655039, "grad_norm": 1.533599615097046, "learning_rate": 9.396930992754475e-06, "loss": 0.8089, "step": 5095 }, { "epoch": 0.9162995594713657, "grad_norm": 1.4230890274047852, "learning_rate": 9.396653665746733e-06, "loss": 0.7148, "step": 5096 }, { "epoch": 0.9164793670772273, "grad_norm": 1.138244867324829, "learning_rate": 9.396376279082174e-06, "loss": 1.0034, "step": 5097 }, { "epoch": 0.9166591746830891, "grad_norm": 1.8555201292037964, "learning_rate": 9.396098832764555e-06, "loss": 0.8057, "step": 5098 }, { "epoch": 0.9168389822889508, "grad_norm": 1.4099005460739136, "learning_rate": 9.395821326797645e-06, "loss": 0.7676, "step": 5099 }, { "epoch": 0.9170187898948126, "grad_norm": 1.057647943496704, "learning_rate": 9.395543761185207e-06, "loss": 0.9775, "step": 5100 }, { "epoch": 0.9171985975006742, "grad_norm": 1.511412501335144, "learning_rate": 9.395266135931007e-06, "loss": 0.8294, "step": 5101 }, { "epoch": 0.917378405106536, "grad_norm": 1.5202441215515137, "learning_rate": 9.394988451038813e-06, "loss": 0.7986, "step": 5102 }, { "epoch": 0.9175582127123977, "grad_norm": 1.0165436267852783, "learning_rate": 9.394710706512393e-06, "loss": 1.0001, "step": 5103 }, { "epoch": 0.9177380203182595, "grad_norm": 1.514156460762024, "learning_rate": 9.394432902355515e-06, "loss": 0.8203, "step": 5104 }, { "epoch": 0.9179178279241212, "grad_norm": 1.4850209951400757, "learning_rate": 9.394155038571948e-06, "loss": 0.835, "step": 5105 }, { "epoch": 0.9180976355299829, "grad_norm": 1.678871989250183, "learning_rate": 9.393877115165463e-06, "loss": 0.7559, "step": 5106 }, { "epoch": 0.9182774431358447, "grad_norm": 1.169964075088501, "learning_rate": 9.393599132139832e-06, "loss": 1.0073, "step": 5107 }, { "epoch": 0.9184572507417064, "grad_norm": 1.372173547744751, "learning_rate": 9.393321089498824e-06, "loss": 0.8006, "step": 5108 }, { "epoch": 0.9186370583475681, "grad_norm": 1.641574740409851, "learning_rate": 9.393042987246215e-06, "loss": 0.7861, "step": 5109 }, { "epoch": 0.9188168659534298, "grad_norm": 1.535106897354126, "learning_rate": 9.392764825385776e-06, "loss": 0.8148, "step": 5110 }, { "epoch": 0.9189966735592916, "grad_norm": 1.651363492012024, "learning_rate": 9.392486603921283e-06, "loss": 0.7513, "step": 5111 }, { "epoch": 0.9191764811651533, "grad_norm": 1.5642551183700562, "learning_rate": 9.392208322856508e-06, "loss": 0.8922, "step": 5112 }, { "epoch": 0.919356288771015, "grad_norm": 1.3872352838516235, "learning_rate": 9.391929982195233e-06, "loss": 0.8162, "step": 5113 }, { "epoch": 0.9195360963768767, "grad_norm": 1.5065405368804932, "learning_rate": 9.391651581941228e-06, "loss": 0.7925, "step": 5114 }, { "epoch": 0.9197159039827385, "grad_norm": 1.8298064470291138, "learning_rate": 9.391373122098275e-06, "loss": 0.8125, "step": 5115 }, { "epoch": 0.9198957115886002, "grad_norm": 1.541296362876892, "learning_rate": 9.39109460267015e-06, "loss": 0.7833, "step": 5116 }, { "epoch": 0.920075519194462, "grad_norm": 1.5341918468475342, "learning_rate": 9.390816023660634e-06, "loss": 0.7699, "step": 5117 }, { "epoch": 0.9202553268003236, "grad_norm": 1.518286108970642, "learning_rate": 9.390537385073506e-06, "loss": 0.8192, "step": 5118 }, { "epoch": 0.9204351344061854, "grad_norm": 1.4760112762451172, "learning_rate": 9.390258686912545e-06, "loss": 0.7989, "step": 5119 }, { "epoch": 0.9206149420120471, "grad_norm": 1.6888883113861084, "learning_rate": 9.389979929181535e-06, "loss": 0.7882, "step": 5120 }, { "epoch": 0.9207947496179089, "grad_norm": 1.3638006448745728, "learning_rate": 9.389701111884259e-06, "loss": 1.0521, "step": 5121 }, { "epoch": 0.9209745572237705, "grad_norm": 1.5115140676498413, "learning_rate": 9.389422235024498e-06, "loss": 0.753, "step": 5122 }, { "epoch": 0.9211543648296323, "grad_norm": 1.553532361984253, "learning_rate": 9.389143298606037e-06, "loss": 0.8068, "step": 5123 }, { "epoch": 0.921334172435494, "grad_norm": 1.4974693059921265, "learning_rate": 9.388864302632659e-06, "loss": 0.7521, "step": 5124 }, { "epoch": 0.9215139800413558, "grad_norm": 1.675368309020996, "learning_rate": 9.388585247108151e-06, "loss": 0.8993, "step": 5125 }, { "epoch": 0.9216937876472174, "grad_norm": 1.5580986738204956, "learning_rate": 9.388306132036301e-06, "loss": 0.7785, "step": 5126 }, { "epoch": 0.9218735952530792, "grad_norm": 1.4952630996704102, "learning_rate": 9.388026957420895e-06, "loss": 0.7663, "step": 5127 }, { "epoch": 0.9220534028589409, "grad_norm": 1.4079821109771729, "learning_rate": 9.387747723265721e-06, "loss": 0.7852, "step": 5128 }, { "epoch": 0.9222332104648027, "grad_norm": 1.429874062538147, "learning_rate": 9.387468429574567e-06, "loss": 0.7343, "step": 5129 }, { "epoch": 0.9224130180706643, "grad_norm": 1.41315495967865, "learning_rate": 9.387189076351223e-06, "loss": 0.7466, "step": 5130 }, { "epoch": 0.9225928256765261, "grad_norm": 1.4900848865509033, "learning_rate": 9.386909663599482e-06, "loss": 0.7851, "step": 5131 }, { "epoch": 0.9227726332823879, "grad_norm": 1.444315791130066, "learning_rate": 9.386630191323131e-06, "loss": 0.7312, "step": 5132 }, { "epoch": 0.9229524408882496, "grad_norm": 1.8127222061157227, "learning_rate": 9.386350659525965e-06, "loss": 0.8465, "step": 5133 }, { "epoch": 0.9231322484941114, "grad_norm": 1.6152628660202026, "learning_rate": 9.386071068211775e-06, "loss": 0.8112, "step": 5134 }, { "epoch": 0.923312056099973, "grad_norm": 1.7027703523635864, "learning_rate": 9.385791417384356e-06, "loss": 0.8468, "step": 5135 }, { "epoch": 0.9234918637058348, "grad_norm": 1.4115921258926392, "learning_rate": 9.385511707047504e-06, "loss": 0.7644, "step": 5136 }, { "epoch": 0.9236716713116965, "grad_norm": 1.4699656963348389, "learning_rate": 9.385231937205011e-06, "loss": 0.8127, "step": 5137 }, { "epoch": 0.9238514789175583, "grad_norm": 1.6054273843765259, "learning_rate": 9.384952107860674e-06, "loss": 0.7908, "step": 5138 }, { "epoch": 0.9240312865234199, "grad_norm": 1.5451942682266235, "learning_rate": 9.384672219018292e-06, "loss": 0.8621, "step": 5139 }, { "epoch": 0.9242110941292817, "grad_norm": 1.5346368551254272, "learning_rate": 9.384392270681661e-06, "loss": 0.8709, "step": 5140 }, { "epoch": 0.9243909017351434, "grad_norm": 1.0295500755310059, "learning_rate": 9.38411226285458e-06, "loss": 0.9883, "step": 5141 }, { "epoch": 0.9245707093410052, "grad_norm": 1.5380516052246094, "learning_rate": 9.383832195540848e-06, "loss": 0.8072, "step": 5142 }, { "epoch": 0.9247505169468668, "grad_norm": 1.4569404125213623, "learning_rate": 9.383552068744264e-06, "loss": 0.819, "step": 5143 }, { "epoch": 0.9249303245527286, "grad_norm": 1.5045782327651978, "learning_rate": 9.383271882468631e-06, "loss": 0.7507, "step": 5144 }, { "epoch": 0.9251101321585903, "grad_norm": 1.482135534286499, "learning_rate": 9.382991636717752e-06, "loss": 0.7318, "step": 5145 }, { "epoch": 0.9252899397644521, "grad_norm": 1.6250739097595215, "learning_rate": 9.382711331495425e-06, "loss": 0.8126, "step": 5146 }, { "epoch": 0.9254697473703137, "grad_norm": 1.7290821075439453, "learning_rate": 9.382430966805456e-06, "loss": 0.8144, "step": 5147 }, { "epoch": 0.9256495549761755, "grad_norm": 1.510159969329834, "learning_rate": 9.382150542651649e-06, "loss": 0.8455, "step": 5148 }, { "epoch": 0.9258293625820372, "grad_norm": 1.5095720291137695, "learning_rate": 9.38187005903781e-06, "loss": 0.7697, "step": 5149 }, { "epoch": 0.926009170187899, "grad_norm": 1.6004877090454102, "learning_rate": 9.381589515967745e-06, "loss": 0.8137, "step": 5150 }, { "epoch": 0.9261889777937606, "grad_norm": 1.4157841205596924, "learning_rate": 9.381308913445258e-06, "loss": 0.749, "step": 5151 }, { "epoch": 0.9263687853996224, "grad_norm": 1.5176798105239868, "learning_rate": 9.381028251474159e-06, "loss": 0.7901, "step": 5152 }, { "epoch": 0.9265485930054841, "grad_norm": 1.4975584745407104, "learning_rate": 9.380747530058255e-06, "loss": 0.7759, "step": 5153 }, { "epoch": 0.9267284006113459, "grad_norm": 1.5605871677398682, "learning_rate": 9.380466749201353e-06, "loss": 0.8394, "step": 5154 }, { "epoch": 0.9269082082172075, "grad_norm": 1.42843759059906, "learning_rate": 9.380185908907267e-06, "loss": 0.8069, "step": 5155 }, { "epoch": 0.9270880158230693, "grad_norm": 1.5015894174575806, "learning_rate": 9.379905009179804e-06, "loss": 0.8849, "step": 5156 }, { "epoch": 0.927267823428931, "grad_norm": 1.3675652742385864, "learning_rate": 9.379624050022779e-06, "loss": 0.7478, "step": 5157 }, { "epoch": 0.9274476310347928, "grad_norm": 1.59014892578125, "learning_rate": 9.37934303144e-06, "loss": 0.8313, "step": 5158 }, { "epoch": 0.9276274386406544, "grad_norm": 1.45221745967865, "learning_rate": 9.379061953435286e-06, "loss": 0.801, "step": 5159 }, { "epoch": 0.9278072462465162, "grad_norm": 1.5578912496566772, "learning_rate": 9.378780816012445e-06, "loss": 0.7755, "step": 5160 }, { "epoch": 0.927987053852378, "grad_norm": 1.4394546747207642, "learning_rate": 9.378499619175295e-06, "loss": 0.723, "step": 5161 }, { "epoch": 0.9281668614582397, "grad_norm": 1.6645104885101318, "learning_rate": 9.378218362927648e-06, "loss": 0.8065, "step": 5162 }, { "epoch": 0.9283466690641015, "grad_norm": 1.5515800714492798, "learning_rate": 9.377937047273324e-06, "loss": 0.8201, "step": 5163 }, { "epoch": 0.9285264766699631, "grad_norm": 1.5176299810409546, "learning_rate": 9.37765567221614e-06, "loss": 0.7998, "step": 5164 }, { "epoch": 0.9287062842758249, "grad_norm": 1.5752800703048706, "learning_rate": 9.37737423775991e-06, "loss": 0.7671, "step": 5165 }, { "epoch": 0.9288860918816866, "grad_norm": 1.1199350357055664, "learning_rate": 9.377092743908456e-06, "loss": 1.038, "step": 5166 }, { "epoch": 0.9290658994875484, "grad_norm": 1.0875550508499146, "learning_rate": 9.376811190665598e-06, "loss": 0.9781, "step": 5167 }, { "epoch": 0.92924570709341, "grad_norm": 1.5622622966766357, "learning_rate": 9.376529578035155e-06, "loss": 0.8457, "step": 5168 }, { "epoch": 0.9294255146992718, "grad_norm": 1.3372231721878052, "learning_rate": 9.376247906020947e-06, "loss": 0.7727, "step": 5169 }, { "epoch": 0.9296053223051335, "grad_norm": 1.451878547668457, "learning_rate": 9.375966174626798e-06, "loss": 0.7838, "step": 5170 }, { "epoch": 0.9297851299109953, "grad_norm": 1.539966344833374, "learning_rate": 9.37568438385653e-06, "loss": 0.8188, "step": 5171 }, { "epoch": 0.9299649375168569, "grad_norm": 1.4757055044174194, "learning_rate": 9.375402533713966e-06, "loss": 0.7826, "step": 5172 }, { "epoch": 0.9301447451227187, "grad_norm": 1.4115300178527832, "learning_rate": 9.375120624202932e-06, "loss": 0.8595, "step": 5173 }, { "epoch": 0.9303245527285804, "grad_norm": 1.5255554914474487, "learning_rate": 9.374838655327251e-06, "loss": 0.8307, "step": 5174 }, { "epoch": 0.9305043603344422, "grad_norm": 1.4835933446884155, "learning_rate": 9.374556627090749e-06, "loss": 1.0535, "step": 5175 }, { "epoch": 0.9306841679403038, "grad_norm": 1.3741183280944824, "learning_rate": 9.374274539497254e-06, "loss": 0.7523, "step": 5176 }, { "epoch": 0.9308639755461656, "grad_norm": 1.5328537225723267, "learning_rate": 9.373992392550594e-06, "loss": 0.7788, "step": 5177 }, { "epoch": 0.9310437831520273, "grad_norm": 1.4810459613800049, "learning_rate": 9.373710186254597e-06, "loss": 0.8223, "step": 5178 }, { "epoch": 0.9312235907578891, "grad_norm": 1.4368090629577637, "learning_rate": 9.37342792061309e-06, "loss": 0.7376, "step": 5179 }, { "epoch": 0.9314033983637507, "grad_norm": 1.5131661891937256, "learning_rate": 9.373145595629904e-06, "loss": 0.7715, "step": 5180 }, { "epoch": 0.9315832059696125, "grad_norm": 1.5576003789901733, "learning_rate": 9.372863211308872e-06, "loss": 0.8036, "step": 5181 }, { "epoch": 0.9317630135754742, "grad_norm": 1.7344937324523926, "learning_rate": 9.372580767653825e-06, "loss": 0.8304, "step": 5182 }, { "epoch": 0.931942821181336, "grad_norm": 1.5873453617095947, "learning_rate": 9.372298264668592e-06, "loss": 0.8259, "step": 5183 }, { "epoch": 0.9321226287871976, "grad_norm": 1.0506329536437988, "learning_rate": 9.37201570235701e-06, "loss": 1.0747, "step": 5184 }, { "epoch": 0.9323024363930594, "grad_norm": 1.6598986387252808, "learning_rate": 9.371733080722911e-06, "loss": 0.7816, "step": 5185 }, { "epoch": 0.9324822439989211, "grad_norm": 1.5138188600540161, "learning_rate": 9.371450399770132e-06, "loss": 0.7333, "step": 5186 }, { "epoch": 0.9326620516047829, "grad_norm": 1.1239498853683472, "learning_rate": 9.371167659502505e-06, "loss": 1.0261, "step": 5187 }, { "epoch": 0.9328418592106447, "grad_norm": 1.511448621749878, "learning_rate": 9.370884859923869e-06, "loss": 0.8598, "step": 5188 }, { "epoch": 0.9330216668165063, "grad_norm": 1.1077089309692383, "learning_rate": 9.370602001038061e-06, "loss": 0.9766, "step": 5189 }, { "epoch": 0.9332014744223681, "grad_norm": 1.7437348365783691, "learning_rate": 9.370319082848919e-06, "loss": 0.8232, "step": 5190 }, { "epoch": 0.9333812820282298, "grad_norm": 1.5395888090133667, "learning_rate": 9.37003610536028e-06, "loss": 0.7999, "step": 5191 }, { "epoch": 0.9335610896340916, "grad_norm": 1.53573739528656, "learning_rate": 9.369753068575987e-06, "loss": 0.7885, "step": 5192 }, { "epoch": 0.9337408972399532, "grad_norm": 1.0844756364822388, "learning_rate": 9.369469972499878e-06, "loss": 1.0178, "step": 5193 }, { "epoch": 0.933920704845815, "grad_norm": 1.4889503717422485, "learning_rate": 9.369186817135793e-06, "loss": 0.8318, "step": 5194 }, { "epoch": 0.9341005124516767, "grad_norm": 1.5172600746154785, "learning_rate": 9.368903602487577e-06, "loss": 0.8055, "step": 5195 }, { "epoch": 0.9342803200575385, "grad_norm": 1.447882890701294, "learning_rate": 9.368620328559073e-06, "loss": 0.8091, "step": 5196 }, { "epoch": 0.9344601276634001, "grad_norm": 1.4226703643798828, "learning_rate": 9.368336995354122e-06, "loss": 0.773, "step": 5197 }, { "epoch": 0.9346399352692619, "grad_norm": 1.0231056213378906, "learning_rate": 9.368053602876572e-06, "loss": 1.0244, "step": 5198 }, { "epoch": 0.9348197428751236, "grad_norm": 1.0906304121017456, "learning_rate": 9.367770151130263e-06, "loss": 1.0342, "step": 5199 }, { "epoch": 0.9349995504809854, "grad_norm": 1.5071245431900024, "learning_rate": 9.367486640119046e-06, "loss": 0.8251, "step": 5200 }, { "epoch": 0.935179358086847, "grad_norm": 1.5007017850875854, "learning_rate": 9.367203069846766e-06, "loss": 0.8425, "step": 5201 }, { "epoch": 0.9353591656927088, "grad_norm": 1.5017173290252686, "learning_rate": 9.366919440317271e-06, "loss": 0.8318, "step": 5202 }, { "epoch": 0.9355389732985705, "grad_norm": 1.450498104095459, "learning_rate": 9.366635751534408e-06, "loss": 0.8818, "step": 5203 }, { "epoch": 0.9357187809044323, "grad_norm": 1.5326751470565796, "learning_rate": 9.366352003502027e-06, "loss": 0.7932, "step": 5204 }, { "epoch": 0.935898588510294, "grad_norm": 1.5017732381820679, "learning_rate": 9.36606819622398e-06, "loss": 0.7952, "step": 5205 }, { "epoch": 0.9360783961161557, "grad_norm": 1.4448060989379883, "learning_rate": 9.365784329704114e-06, "loss": 0.8387, "step": 5206 }, { "epoch": 0.9362582037220174, "grad_norm": 1.1071051359176636, "learning_rate": 9.365500403946286e-06, "loss": 0.9962, "step": 5207 }, { "epoch": 0.9364380113278792, "grad_norm": 1.6143099069595337, "learning_rate": 9.365216418954346e-06, "loss": 0.7727, "step": 5208 }, { "epoch": 0.9366178189337409, "grad_norm": 1.5214605331420898, "learning_rate": 9.364932374732145e-06, "loss": 0.8053, "step": 5209 }, { "epoch": 0.9367976265396026, "grad_norm": 1.557895541191101, "learning_rate": 9.364648271283541e-06, "loss": 0.7825, "step": 5210 }, { "epoch": 0.9369774341454643, "grad_norm": 1.531866192817688, "learning_rate": 9.364364108612385e-06, "loss": 0.815, "step": 5211 }, { "epoch": 0.9371572417513261, "grad_norm": 1.4270401000976562, "learning_rate": 9.364079886722534e-06, "loss": 0.7603, "step": 5212 }, { "epoch": 0.9373370493571878, "grad_norm": 1.4183332920074463, "learning_rate": 9.363795605617849e-06, "loss": 0.7614, "step": 5213 }, { "epoch": 0.9375168569630495, "grad_norm": 1.4485455751419067, "learning_rate": 9.36351126530218e-06, "loss": 0.7603, "step": 5214 }, { "epoch": 0.9376966645689113, "grad_norm": 1.6348893642425537, "learning_rate": 9.36322686577939e-06, "loss": 0.8165, "step": 5215 }, { "epoch": 0.937876472174773, "grad_norm": 1.3773916959762573, "learning_rate": 9.362942407053338e-06, "loss": 0.7576, "step": 5216 }, { "epoch": 0.9380562797806348, "grad_norm": 1.4523686170578003, "learning_rate": 9.36265788912788e-06, "loss": 0.7666, "step": 5217 }, { "epoch": 0.9382360873864964, "grad_norm": 1.603916883468628, "learning_rate": 9.362373312006878e-06, "loss": 0.7427, "step": 5218 }, { "epoch": 0.9384158949923582, "grad_norm": 1.5702747106552124, "learning_rate": 9.362088675694196e-06, "loss": 0.7326, "step": 5219 }, { "epoch": 0.9385957025982199, "grad_norm": 1.5149431228637695, "learning_rate": 9.361803980193695e-06, "loss": 0.7501, "step": 5220 }, { "epoch": 0.9387755102040817, "grad_norm": 1.631328821182251, "learning_rate": 9.361519225509236e-06, "loss": 0.8129, "step": 5221 }, { "epoch": 0.9389553178099433, "grad_norm": 1.523064374923706, "learning_rate": 9.361234411644684e-06, "loss": 0.7989, "step": 5222 }, { "epoch": 0.9391351254158051, "grad_norm": 1.6382553577423096, "learning_rate": 9.360949538603904e-06, "loss": 0.7695, "step": 5223 }, { "epoch": 0.9393149330216668, "grad_norm": 1.5065926313400269, "learning_rate": 9.360664606390761e-06, "loss": 0.7298, "step": 5224 }, { "epoch": 0.9394947406275286, "grad_norm": 1.0466110706329346, "learning_rate": 9.36037961500912e-06, "loss": 1.0198, "step": 5225 }, { "epoch": 0.9396745482333902, "grad_norm": 1.4240554571151733, "learning_rate": 9.360094564462852e-06, "loss": 0.773, "step": 5226 }, { "epoch": 0.939854355839252, "grad_norm": 1.509992003440857, "learning_rate": 9.359809454755819e-06, "loss": 0.814, "step": 5227 }, { "epoch": 0.9400341634451137, "grad_norm": 1.5848439931869507, "learning_rate": 9.359524285891892e-06, "loss": 0.7346, "step": 5228 }, { "epoch": 0.9402139710509755, "grad_norm": 1.7206693887710571, "learning_rate": 9.359239057874942e-06, "loss": 0.7992, "step": 5229 }, { "epoch": 0.9403937786568372, "grad_norm": 1.5984501838684082, "learning_rate": 9.358953770708839e-06, "loss": 0.8356, "step": 5230 }, { "epoch": 0.9405735862626989, "grad_norm": 1.1617834568023682, "learning_rate": 9.35866842439745e-06, "loss": 1.0274, "step": 5231 }, { "epoch": 0.9407533938685606, "grad_norm": 1.396662712097168, "learning_rate": 9.358383018944653e-06, "loss": 0.8047, "step": 5232 }, { "epoch": 0.9409332014744224, "grad_norm": 1.0851625204086304, "learning_rate": 9.358097554354315e-06, "loss": 1.0359, "step": 5233 }, { "epoch": 0.9411130090802841, "grad_norm": 1.6325346231460571, "learning_rate": 9.357812030630312e-06, "loss": 0.719, "step": 5234 }, { "epoch": 0.9412928166861458, "grad_norm": 1.489789366722107, "learning_rate": 9.357526447776516e-06, "loss": 0.7958, "step": 5235 }, { "epoch": 0.9414726242920075, "grad_norm": 1.4787901639938354, "learning_rate": 9.357240805796809e-06, "loss": 0.8275, "step": 5236 }, { "epoch": 0.9416524318978693, "grad_norm": 1.539143443107605, "learning_rate": 9.356955104695057e-06, "loss": 0.8109, "step": 5237 }, { "epoch": 0.941832239503731, "grad_norm": 1.7752374410629272, "learning_rate": 9.356669344475142e-06, "loss": 0.7933, "step": 5238 }, { "epoch": 0.9420120471095927, "grad_norm": 1.5250688791275024, "learning_rate": 9.356383525140941e-06, "loss": 0.8002, "step": 5239 }, { "epoch": 0.9421918547154544, "grad_norm": 1.5817461013793945, "learning_rate": 9.356097646696332e-06, "loss": 0.7249, "step": 5240 }, { "epoch": 0.9423716623213162, "grad_norm": 1.4836690425872803, "learning_rate": 9.355811709145194e-06, "loss": 0.7689, "step": 5241 }, { "epoch": 0.9425514699271779, "grad_norm": 1.4850928783416748, "learning_rate": 9.355525712491405e-06, "loss": 0.7922, "step": 5242 }, { "epoch": 0.9427312775330396, "grad_norm": 1.383021593093872, "learning_rate": 9.355239656738849e-06, "loss": 0.9966, "step": 5243 }, { "epoch": 0.9429110851389014, "grad_norm": 1.5061609745025635, "learning_rate": 9.354953541891404e-06, "loss": 0.7912, "step": 5244 }, { "epoch": 0.9430908927447631, "grad_norm": 1.5026694536209106, "learning_rate": 9.354667367952954e-06, "loss": 0.7393, "step": 5245 }, { "epoch": 0.9432707003506249, "grad_norm": 0.9898255467414856, "learning_rate": 9.354381134927381e-06, "loss": 1.0423, "step": 5246 }, { "epoch": 0.9434505079564866, "grad_norm": 1.4336121082305908, "learning_rate": 9.354094842818571e-06, "loss": 0.7754, "step": 5247 }, { "epoch": 0.9436303155623483, "grad_norm": 1.434730887413025, "learning_rate": 9.353808491630407e-06, "loss": 0.6996, "step": 5248 }, { "epoch": 0.94381012316821, "grad_norm": 1.5161542892456055, "learning_rate": 9.353522081366776e-06, "loss": 0.8019, "step": 5249 }, { "epoch": 0.9439899307740718, "grad_norm": 1.0739808082580566, "learning_rate": 9.35323561203156e-06, "loss": 1.0041, "step": 5250 }, { "epoch": 0.9441697383799335, "grad_norm": 1.1094928979873657, "learning_rate": 9.352949083628651e-06, "loss": 1.0258, "step": 5251 }, { "epoch": 0.9443495459857952, "grad_norm": 1.615899682044983, "learning_rate": 9.352662496161933e-06, "loss": 0.8426, "step": 5252 }, { "epoch": 0.9445293535916569, "grad_norm": 1.5832793712615967, "learning_rate": 9.352375849635295e-06, "loss": 0.8264, "step": 5253 }, { "epoch": 0.9447091611975187, "grad_norm": 1.6178362369537354, "learning_rate": 9.35208914405263e-06, "loss": 0.8612, "step": 5254 }, { "epoch": 0.9448889688033804, "grad_norm": 1.5072529315948486, "learning_rate": 9.351802379417826e-06, "loss": 0.8461, "step": 5255 }, { "epoch": 0.9450687764092421, "grad_norm": 1.72722327709198, "learning_rate": 9.351515555734772e-06, "loss": 0.7861, "step": 5256 }, { "epoch": 0.9452485840151038, "grad_norm": 1.4307855367660522, "learning_rate": 9.351228673007363e-06, "loss": 0.771, "step": 5257 }, { "epoch": 0.9454283916209656, "grad_norm": 1.7000453472137451, "learning_rate": 9.35094173123949e-06, "loss": 0.7892, "step": 5258 }, { "epoch": 0.9456081992268273, "grad_norm": 1.4224562644958496, "learning_rate": 9.350654730435046e-06, "loss": 0.7276, "step": 5259 }, { "epoch": 0.945788006832689, "grad_norm": 1.5280685424804688, "learning_rate": 9.350367670597928e-06, "loss": 0.738, "step": 5260 }, { "epoch": 0.9459678144385507, "grad_norm": 1.4413679838180542, "learning_rate": 9.350080551732028e-06, "loss": 0.7716, "step": 5261 }, { "epoch": 0.9461476220444125, "grad_norm": 1.6095905303955078, "learning_rate": 9.349793373841243e-06, "loss": 0.8677, "step": 5262 }, { "epoch": 0.9463274296502742, "grad_norm": 1.5149976015090942, "learning_rate": 9.349506136929468e-06, "loss": 0.8081, "step": 5263 }, { "epoch": 0.946507237256136, "grad_norm": 1.5143392086029053, "learning_rate": 9.349218841000602e-06, "loss": 0.7821, "step": 5264 }, { "epoch": 0.9466870448619976, "grad_norm": 1.195287823677063, "learning_rate": 9.348931486058545e-06, "loss": 0.9843, "step": 5265 }, { "epoch": 0.9468668524678594, "grad_norm": 1.5323030948638916, "learning_rate": 9.348644072107194e-06, "loss": 0.7147, "step": 5266 }, { "epoch": 0.9470466600737211, "grad_norm": 1.483736276626587, "learning_rate": 9.348356599150447e-06, "loss": 0.7385, "step": 5267 }, { "epoch": 0.9472264676795829, "grad_norm": 1.4876688718795776, "learning_rate": 9.348069067192206e-06, "loss": 0.8318, "step": 5268 }, { "epoch": 0.9474062752854445, "grad_norm": 1.4813998937606812, "learning_rate": 9.347781476236375e-06, "loss": 0.7758, "step": 5269 }, { "epoch": 0.9475860828913063, "grad_norm": 1.0566375255584717, "learning_rate": 9.347493826286855e-06, "loss": 1.0777, "step": 5270 }, { "epoch": 0.9477658904971681, "grad_norm": 1.4400498867034912, "learning_rate": 9.347206117347547e-06, "loss": 0.8231, "step": 5271 }, { "epoch": 0.9479456981030298, "grad_norm": 1.4309947490692139, "learning_rate": 9.346918349422356e-06, "loss": 0.787, "step": 5272 }, { "epoch": 0.9481255057088915, "grad_norm": 1.59282648563385, "learning_rate": 9.346630522515187e-06, "loss": 0.7492, "step": 5273 }, { "epoch": 0.9483053133147532, "grad_norm": 1.5894156694412231, "learning_rate": 9.346342636629947e-06, "loss": 0.8461, "step": 5274 }, { "epoch": 0.948485120920615, "grad_norm": 1.4803394079208374, "learning_rate": 9.346054691770537e-06, "loss": 0.7806, "step": 5275 }, { "epoch": 0.9486649285264767, "grad_norm": 1.481806993484497, "learning_rate": 9.34576668794087e-06, "loss": 0.8191, "step": 5276 }, { "epoch": 0.9488447361323384, "grad_norm": 1.547501564025879, "learning_rate": 9.34547862514485e-06, "loss": 0.8045, "step": 5277 }, { "epoch": 0.9490245437382001, "grad_norm": 1.4938063621520996, "learning_rate": 9.345190503386387e-06, "loss": 0.7774, "step": 5278 }, { "epoch": 0.9492043513440619, "grad_norm": 1.5421675443649292, "learning_rate": 9.344902322669391e-06, "loss": 0.7857, "step": 5279 }, { "epoch": 0.9493841589499236, "grad_norm": 1.4150820970535278, "learning_rate": 9.344614082997772e-06, "loss": 0.7981, "step": 5280 }, { "epoch": 0.9495639665557853, "grad_norm": 1.4298380613327026, "learning_rate": 9.344325784375438e-06, "loss": 0.8023, "step": 5281 }, { "epoch": 0.949743774161647, "grad_norm": 1.080183982849121, "learning_rate": 9.344037426806306e-06, "loss": 0.9955, "step": 5282 }, { "epoch": 0.9499235817675088, "grad_norm": 1.393722414970398, "learning_rate": 9.343749010294285e-06, "loss": 0.7538, "step": 5283 }, { "epoch": 0.9501033893733705, "grad_norm": 1.4228429794311523, "learning_rate": 9.34346053484329e-06, "loss": 0.8573, "step": 5284 }, { "epoch": 0.9502831969792322, "grad_norm": 1.622624397277832, "learning_rate": 9.343172000457234e-06, "loss": 0.6965, "step": 5285 }, { "epoch": 0.9504630045850939, "grad_norm": 1.7303560972213745, "learning_rate": 9.342883407140034e-06, "loss": 0.8536, "step": 5286 }, { "epoch": 0.9506428121909557, "grad_norm": 1.5451059341430664, "learning_rate": 9.342594754895605e-06, "loss": 0.8016, "step": 5287 }, { "epoch": 0.9508226197968174, "grad_norm": 1.5463403463363647, "learning_rate": 9.342306043727863e-06, "loss": 0.8366, "step": 5288 }, { "epoch": 0.9510024274026792, "grad_norm": 1.09312105178833, "learning_rate": 9.342017273640724e-06, "loss": 1.0061, "step": 5289 }, { "epoch": 0.9511822350085408, "grad_norm": 1.5325839519500732, "learning_rate": 9.341728444638108e-06, "loss": 0.8222, "step": 5290 }, { "epoch": 0.9513620426144026, "grad_norm": 1.5466417074203491, "learning_rate": 9.341439556723936e-06, "loss": 0.8248, "step": 5291 }, { "epoch": 0.9515418502202643, "grad_norm": 1.4468868970870972, "learning_rate": 9.341150609902124e-06, "loss": 0.8578, "step": 5292 }, { "epoch": 0.951721657826126, "grad_norm": 1.4639235734939575, "learning_rate": 9.340861604176596e-06, "loss": 0.8066, "step": 5293 }, { "epoch": 0.9519014654319877, "grad_norm": 1.4494351148605347, "learning_rate": 9.34057253955127e-06, "loss": 0.7365, "step": 5294 }, { "epoch": 0.9520812730378495, "grad_norm": 1.6020569801330566, "learning_rate": 9.34028341603007e-06, "loss": 0.7857, "step": 5295 }, { "epoch": 0.9522610806437112, "grad_norm": 1.40376615524292, "learning_rate": 9.33999423361692e-06, "loss": 0.7844, "step": 5296 }, { "epoch": 0.952440888249573, "grad_norm": 1.4915663003921509, "learning_rate": 9.339704992315744e-06, "loss": 0.7763, "step": 5297 }, { "epoch": 0.9526206958554347, "grad_norm": 1.4122449159622192, "learning_rate": 9.339415692130464e-06, "loss": 0.8177, "step": 5298 }, { "epoch": 0.9528005034612964, "grad_norm": 1.4631431102752686, "learning_rate": 9.339126333065008e-06, "loss": 0.7817, "step": 5299 }, { "epoch": 0.9529803110671582, "grad_norm": 1.469172477722168, "learning_rate": 9.3388369151233e-06, "loss": 0.7698, "step": 5300 }, { "epoch": 0.9531601186730199, "grad_norm": 1.6992908716201782, "learning_rate": 9.33854743830927e-06, "loss": 0.7822, "step": 5301 }, { "epoch": 0.9533399262788816, "grad_norm": 1.4825961589813232, "learning_rate": 9.33825790262684e-06, "loss": 0.779, "step": 5302 }, { "epoch": 0.9535197338847433, "grad_norm": 1.547013282775879, "learning_rate": 9.337968308079947e-06, "loss": 0.8234, "step": 5303 }, { "epoch": 0.9536995414906051, "grad_norm": 1.413761019706726, "learning_rate": 9.337678654672516e-06, "loss": 0.7715, "step": 5304 }, { "epoch": 0.9538793490964668, "grad_norm": 1.4244701862335205, "learning_rate": 9.337388942408476e-06, "loss": 0.7721, "step": 5305 }, { "epoch": 0.9540591567023285, "grad_norm": 1.4411699771881104, "learning_rate": 9.33709917129176e-06, "loss": 0.729, "step": 5306 }, { "epoch": 0.9542389643081902, "grad_norm": 1.5166343450546265, "learning_rate": 9.336809341326297e-06, "loss": 0.8535, "step": 5307 }, { "epoch": 0.954418771914052, "grad_norm": 1.4965696334838867, "learning_rate": 9.336519452516024e-06, "loss": 0.7941, "step": 5308 }, { "epoch": 0.9545985795199137, "grad_norm": 1.4492243528366089, "learning_rate": 9.33622950486487e-06, "loss": 0.8096, "step": 5309 }, { "epoch": 0.9547783871257755, "grad_norm": 1.5608545541763306, "learning_rate": 9.335939498376773e-06, "loss": 0.7784, "step": 5310 }, { "epoch": 0.9549581947316371, "grad_norm": 1.3728559017181396, "learning_rate": 9.335649433055665e-06, "loss": 0.7523, "step": 5311 }, { "epoch": 0.9551380023374989, "grad_norm": 1.4503026008605957, "learning_rate": 9.335359308905486e-06, "loss": 0.7891, "step": 5312 }, { "epoch": 0.9553178099433606, "grad_norm": 1.4493162631988525, "learning_rate": 9.335069125930167e-06, "loss": 0.7991, "step": 5313 }, { "epoch": 0.9554976175492224, "grad_norm": 1.4611551761627197, "learning_rate": 9.334778884133648e-06, "loss": 0.8188, "step": 5314 }, { "epoch": 0.955677425155084, "grad_norm": 1.4378705024719238, "learning_rate": 9.334488583519868e-06, "loss": 0.8193, "step": 5315 }, { "epoch": 0.9558572327609458, "grad_norm": 1.264024257659912, "learning_rate": 9.334198224092765e-06, "loss": 0.9816, "step": 5316 }, { "epoch": 0.9560370403668075, "grad_norm": 1.4694515466690063, "learning_rate": 9.33390780585628e-06, "loss": 0.7859, "step": 5317 }, { "epoch": 0.9562168479726693, "grad_norm": 1.4149690866470337, "learning_rate": 9.333617328814353e-06, "loss": 0.7806, "step": 5318 }, { "epoch": 0.9563966555785309, "grad_norm": 1.5417561531066895, "learning_rate": 9.333326792970924e-06, "loss": 0.817, "step": 5319 }, { "epoch": 0.9565764631843927, "grad_norm": 1.6132651567459106, "learning_rate": 9.33303619832994e-06, "loss": 0.81, "step": 5320 }, { "epoch": 0.9567562707902544, "grad_norm": 1.499921441078186, "learning_rate": 9.332745544895335e-06, "loss": 0.7702, "step": 5321 }, { "epoch": 0.9569360783961162, "grad_norm": 1.4168899059295654, "learning_rate": 9.332454832671061e-06, "loss": 0.789, "step": 5322 }, { "epoch": 0.9571158860019778, "grad_norm": 1.4243887662887573, "learning_rate": 9.33216406166106e-06, "loss": 0.7934, "step": 5323 }, { "epoch": 0.9572956936078396, "grad_norm": 1.5834763050079346, "learning_rate": 9.331873231869275e-06, "loss": 0.7392, "step": 5324 }, { "epoch": 0.9574755012137013, "grad_norm": 1.4254204034805298, "learning_rate": 9.331582343299656e-06, "loss": 0.6779, "step": 5325 }, { "epoch": 0.9576553088195631, "grad_norm": 1.4345442056655884, "learning_rate": 9.331291395956148e-06, "loss": 0.7342, "step": 5326 }, { "epoch": 0.9578351164254248, "grad_norm": 1.5784755945205688, "learning_rate": 9.331000389842698e-06, "loss": 0.8168, "step": 5327 }, { "epoch": 0.9580149240312865, "grad_norm": 1.1757761240005493, "learning_rate": 9.330709324963257e-06, "loss": 1.0005, "step": 5328 }, { "epoch": 0.9581947316371483, "grad_norm": 1.3641247749328613, "learning_rate": 9.330418201321772e-06, "loss": 0.769, "step": 5329 }, { "epoch": 0.95837453924301, "grad_norm": 1.4622457027435303, "learning_rate": 9.330127018922195e-06, "loss": 0.8455, "step": 5330 }, { "epoch": 0.9585543468488718, "grad_norm": 1.548407793045044, "learning_rate": 9.329835777768474e-06, "loss": 0.7624, "step": 5331 }, { "epoch": 0.9587341544547334, "grad_norm": 1.038811445236206, "learning_rate": 9.329544477864565e-06, "loss": 1.0436, "step": 5332 }, { "epoch": 0.9589139620605952, "grad_norm": 1.5345159769058228, "learning_rate": 9.329253119214418e-06, "loss": 0.7613, "step": 5333 }, { "epoch": 0.9590937696664569, "grad_norm": 1.5582995414733887, "learning_rate": 9.328961701821986e-06, "loss": 0.8996, "step": 5334 }, { "epoch": 0.9592735772723187, "grad_norm": 1.5028146505355835, "learning_rate": 9.328670225691225e-06, "loss": 0.7484, "step": 5335 }, { "epoch": 0.9594533848781803, "grad_norm": 1.4076032638549805, "learning_rate": 9.328378690826087e-06, "loss": 0.7356, "step": 5336 }, { "epoch": 0.9596331924840421, "grad_norm": 1.1161925792694092, "learning_rate": 9.328087097230532e-06, "loss": 0.9822, "step": 5337 }, { "epoch": 0.9598130000899038, "grad_norm": 2.1264994144439697, "learning_rate": 9.327795444908511e-06, "loss": 0.8253, "step": 5338 }, { "epoch": 0.9599928076957656, "grad_norm": 1.5115877389907837, "learning_rate": 9.327503733863987e-06, "loss": 0.7746, "step": 5339 }, { "epoch": 0.9601726153016272, "grad_norm": 1.6231133937835693, "learning_rate": 9.327211964100915e-06, "loss": 0.7694, "step": 5340 }, { "epoch": 0.960352422907489, "grad_norm": 1.4675205945968628, "learning_rate": 9.326920135623255e-06, "loss": 0.7044, "step": 5341 }, { "epoch": 0.9605322305133507, "grad_norm": 1.4872199296951294, "learning_rate": 9.326628248434966e-06, "loss": 0.8418, "step": 5342 }, { "epoch": 0.9607120381192125, "grad_norm": 1.2436611652374268, "learning_rate": 9.326336302540007e-06, "loss": 0.9846, "step": 5343 }, { "epoch": 0.9608918457250741, "grad_norm": 1.5318292379379272, "learning_rate": 9.326044297942342e-06, "loss": 0.8465, "step": 5344 }, { "epoch": 0.9610716533309359, "grad_norm": 1.5153331756591797, "learning_rate": 9.325752234645934e-06, "loss": 0.8303, "step": 5345 }, { "epoch": 0.9612514609367976, "grad_norm": 1.7095106840133667, "learning_rate": 9.325460112654743e-06, "loss": 0.8375, "step": 5346 }, { "epoch": 0.9614312685426594, "grad_norm": 1.5435380935668945, "learning_rate": 9.325167931972733e-06, "loss": 0.7399, "step": 5347 }, { "epoch": 0.961611076148521, "grad_norm": 1.674791693687439, "learning_rate": 9.324875692603872e-06, "loss": 0.8375, "step": 5348 }, { "epoch": 0.9617908837543828, "grad_norm": 1.4937186241149902, "learning_rate": 9.32458339455212e-06, "loss": 0.7454, "step": 5349 }, { "epoch": 0.9619706913602445, "grad_norm": 1.4653829336166382, "learning_rate": 9.324291037821446e-06, "loss": 0.7786, "step": 5350 }, { "epoch": 0.9621504989661063, "grad_norm": 1.4734662771224976, "learning_rate": 9.323998622415819e-06, "loss": 0.7845, "step": 5351 }, { "epoch": 0.9623303065719679, "grad_norm": 1.4705153703689575, "learning_rate": 9.323706148339203e-06, "loss": 0.8193, "step": 5352 }, { "epoch": 0.9625101141778297, "grad_norm": 1.3755403757095337, "learning_rate": 9.323413615595567e-06, "loss": 0.7291, "step": 5353 }, { "epoch": 0.9626899217836915, "grad_norm": 1.437629222869873, "learning_rate": 9.323121024188882e-06, "loss": 0.782, "step": 5354 }, { "epoch": 0.9628697293895532, "grad_norm": 1.489606499671936, "learning_rate": 9.322828374123116e-06, "loss": 0.7386, "step": 5355 }, { "epoch": 0.963049536995415, "grad_norm": 1.5932585000991821, "learning_rate": 9.322535665402243e-06, "loss": 0.8644, "step": 5356 }, { "epoch": 0.9632293446012766, "grad_norm": 1.5727351903915405, "learning_rate": 9.322242898030231e-06, "loss": 0.7936, "step": 5357 }, { "epoch": 0.9634091522071384, "grad_norm": 1.4817239046096802, "learning_rate": 9.321950072011056e-06, "loss": 0.6832, "step": 5358 }, { "epoch": 0.9635889598130001, "grad_norm": 1.4865175485610962, "learning_rate": 9.321657187348689e-06, "loss": 0.7894, "step": 5359 }, { "epoch": 0.9637687674188619, "grad_norm": 1.5399329662322998, "learning_rate": 9.321364244047104e-06, "loss": 0.7856, "step": 5360 }, { "epoch": 0.9639485750247235, "grad_norm": 1.4305647611618042, "learning_rate": 9.321071242110275e-06, "loss": 0.8004, "step": 5361 }, { "epoch": 0.9641283826305853, "grad_norm": 1.6025422811508179, "learning_rate": 9.320778181542183e-06, "loss": 0.853, "step": 5362 }, { "epoch": 0.964308190236447, "grad_norm": 1.484844446182251, "learning_rate": 9.320485062346798e-06, "loss": 0.8626, "step": 5363 }, { "epoch": 0.9644879978423088, "grad_norm": 1.5053356885910034, "learning_rate": 9.3201918845281e-06, "loss": 0.8226, "step": 5364 }, { "epoch": 0.9646678054481704, "grad_norm": 1.5162628889083862, "learning_rate": 9.319898648090066e-06, "loss": 0.8652, "step": 5365 }, { "epoch": 0.9648476130540322, "grad_norm": 1.5409398078918457, "learning_rate": 9.319605353036676e-06, "loss": 0.8572, "step": 5366 }, { "epoch": 0.9650274206598939, "grad_norm": 1.5218430757522583, "learning_rate": 9.31931199937191e-06, "loss": 0.7942, "step": 5367 }, { "epoch": 0.9652072282657557, "grad_norm": 1.5351556539535522, "learning_rate": 9.319018587099748e-06, "loss": 0.7737, "step": 5368 }, { "epoch": 0.9653870358716173, "grad_norm": 1.4587377309799194, "learning_rate": 9.31872511622417e-06, "loss": 0.8449, "step": 5369 }, { "epoch": 0.9655668434774791, "grad_norm": 1.4708789587020874, "learning_rate": 9.318431586749159e-06, "loss": 0.7481, "step": 5370 }, { "epoch": 0.9657466510833408, "grad_norm": 1.412964940071106, "learning_rate": 9.318137998678698e-06, "loss": 0.7682, "step": 5371 }, { "epoch": 0.9659264586892026, "grad_norm": 1.4654090404510498, "learning_rate": 9.317844352016772e-06, "loss": 0.7646, "step": 5372 }, { "epoch": 0.9661062662950642, "grad_norm": 1.346605896949768, "learning_rate": 9.317550646767362e-06, "loss": 1.02, "step": 5373 }, { "epoch": 0.966286073900926, "grad_norm": 1.4703826904296875, "learning_rate": 9.317256882934455e-06, "loss": 0.8188, "step": 5374 }, { "epoch": 0.9664658815067877, "grad_norm": 1.0980297327041626, "learning_rate": 9.316963060522037e-06, "loss": 0.9982, "step": 5375 }, { "epoch": 0.9666456891126495, "grad_norm": 1.5479439496994019, "learning_rate": 9.316669179534095e-06, "loss": 0.7711, "step": 5376 }, { "epoch": 0.9668254967185111, "grad_norm": 1.4587174654006958, "learning_rate": 9.316375239974615e-06, "loss": 0.8211, "step": 5377 }, { "epoch": 0.9670053043243729, "grad_norm": 1.548818588256836, "learning_rate": 9.316081241847588e-06, "loss": 0.7944, "step": 5378 }, { "epoch": 0.9671851119302346, "grad_norm": 1.5021331310272217, "learning_rate": 9.315787185157002e-06, "loss": 0.8227, "step": 5379 }, { "epoch": 0.9673649195360964, "grad_norm": 1.5700757503509521, "learning_rate": 9.315493069906845e-06, "loss": 0.7286, "step": 5380 }, { "epoch": 0.967544727141958, "grad_norm": 1.5296121835708618, "learning_rate": 9.315198896101112e-06, "loss": 0.7918, "step": 5381 }, { "epoch": 0.9677245347478198, "grad_norm": 1.483571171760559, "learning_rate": 9.314904663743792e-06, "loss": 0.9849, "step": 5382 }, { "epoch": 0.9679043423536816, "grad_norm": 1.52244234085083, "learning_rate": 9.314610372838875e-06, "loss": 0.8038, "step": 5383 }, { "epoch": 0.9680841499595433, "grad_norm": 1.463749885559082, "learning_rate": 9.314316023390358e-06, "loss": 0.8218, "step": 5384 }, { "epoch": 0.9682639575654051, "grad_norm": 1.9869366884231567, "learning_rate": 9.314021615402233e-06, "loss": 0.7639, "step": 5385 }, { "epoch": 0.9684437651712667, "grad_norm": 1.62091863155365, "learning_rate": 9.313727148878496e-06, "loss": 0.8726, "step": 5386 }, { "epoch": 0.9686235727771285, "grad_norm": 1.0162721872329712, "learning_rate": 9.313432623823142e-06, "loss": 1.05, "step": 5387 }, { "epoch": 0.9688033803829902, "grad_norm": 1.479587435722351, "learning_rate": 9.313138040240167e-06, "loss": 0.8079, "step": 5388 }, { "epoch": 0.968983187988852, "grad_norm": 1.5217266082763672, "learning_rate": 9.312843398133567e-06, "loss": 0.7712, "step": 5389 }, { "epoch": 0.9691629955947136, "grad_norm": 1.513573408126831, "learning_rate": 9.312548697507342e-06, "loss": 0.7646, "step": 5390 }, { "epoch": 0.9693428032005754, "grad_norm": 1.6987406015396118, "learning_rate": 9.31225393836549e-06, "loss": 0.7488, "step": 5391 }, { "epoch": 0.9695226108064371, "grad_norm": 1.6042656898498535, "learning_rate": 9.311959120712012e-06, "loss": 0.8508, "step": 5392 }, { "epoch": 0.9697024184122989, "grad_norm": 1.647255778312683, "learning_rate": 9.311664244550905e-06, "loss": 0.7696, "step": 5393 }, { "epoch": 0.9698822260181605, "grad_norm": 1.465809941291809, "learning_rate": 9.31136930988617e-06, "loss": 0.8053, "step": 5394 }, { "epoch": 0.9700620336240223, "grad_norm": 1.5714547634124756, "learning_rate": 9.311074316721813e-06, "loss": 0.7488, "step": 5395 }, { "epoch": 0.970241841229884, "grad_norm": 1.1784247159957886, "learning_rate": 9.310779265061834e-06, "loss": 1.0331, "step": 5396 }, { "epoch": 0.9704216488357458, "grad_norm": 1.5450834035873413, "learning_rate": 9.310484154910235e-06, "loss": 0.7903, "step": 5397 }, { "epoch": 0.9706014564416074, "grad_norm": 1.5093809366226196, "learning_rate": 9.310188986271023e-06, "loss": 0.7835, "step": 5398 }, { "epoch": 0.9707812640474692, "grad_norm": 1.4473463296890259, "learning_rate": 9.309893759148201e-06, "loss": 0.7434, "step": 5399 }, { "epoch": 0.9709610716533309, "grad_norm": 1.388708472251892, "learning_rate": 9.309598473545778e-06, "loss": 0.7995, "step": 5400 }, { "epoch": 0.9711408792591927, "grad_norm": 1.5517940521240234, "learning_rate": 9.309303129467757e-06, "loss": 0.8058, "step": 5401 }, { "epoch": 0.9713206868650544, "grad_norm": 1.0349252223968506, "learning_rate": 9.309007726918147e-06, "loss": 1.0314, "step": 5402 }, { "epoch": 0.9715004944709161, "grad_norm": 1.4306038618087769, "learning_rate": 9.308712265900956e-06, "loss": 0.8113, "step": 5403 }, { "epoch": 0.9716803020767778, "grad_norm": 1.522476077079773, "learning_rate": 9.308416746420194e-06, "loss": 0.8462, "step": 5404 }, { "epoch": 0.9718601096826396, "grad_norm": 1.5032306909561157, "learning_rate": 9.30812116847987e-06, "loss": 0.7845, "step": 5405 }, { "epoch": 0.9720399172885013, "grad_norm": 1.058918833732605, "learning_rate": 9.307825532083994e-06, "loss": 0.999, "step": 5406 }, { "epoch": 0.972219724894363, "grad_norm": 1.0271835327148438, "learning_rate": 9.307529837236577e-06, "loss": 1.0416, "step": 5407 }, { "epoch": 0.9723995325002247, "grad_norm": 1.4679399728775024, "learning_rate": 9.307234083941633e-06, "loss": 0.8032, "step": 5408 }, { "epoch": 0.9725793401060865, "grad_norm": 1.512485384941101, "learning_rate": 9.306938272203177e-06, "loss": 0.8628, "step": 5409 }, { "epoch": 0.9727591477119483, "grad_norm": 1.4785813093185425, "learning_rate": 9.306642402025216e-06, "loss": 0.8207, "step": 5410 }, { "epoch": 0.9729389553178099, "grad_norm": 1.56483793258667, "learning_rate": 9.306346473411771e-06, "loss": 0.7627, "step": 5411 }, { "epoch": 0.9731187629236717, "grad_norm": 1.6150126457214355, "learning_rate": 9.306050486366854e-06, "loss": 0.8506, "step": 5412 }, { "epoch": 0.9732985705295334, "grad_norm": 1.2594351768493652, "learning_rate": 9.305754440894482e-06, "loss": 0.9919, "step": 5413 }, { "epoch": 0.9734783781353952, "grad_norm": 1.627441167831421, "learning_rate": 9.305458336998671e-06, "loss": 0.8635, "step": 5414 }, { "epoch": 0.9736581857412568, "grad_norm": 1.7656501531600952, "learning_rate": 9.30516217468344e-06, "loss": 0.9141, "step": 5415 }, { "epoch": 0.9738379933471186, "grad_norm": 1.5120015144348145, "learning_rate": 9.304865953952809e-06, "loss": 0.7728, "step": 5416 }, { "epoch": 0.9740178009529803, "grad_norm": 1.4368507862091064, "learning_rate": 9.304569674810794e-06, "loss": 0.764, "step": 5417 }, { "epoch": 0.9741976085588421, "grad_norm": 1.5388818979263306, "learning_rate": 9.304273337261417e-06, "loss": 0.8441, "step": 5418 }, { "epoch": 0.9743774161647037, "grad_norm": 1.4983805418014526, "learning_rate": 9.303976941308699e-06, "loss": 0.7815, "step": 5419 }, { "epoch": 0.9745572237705655, "grad_norm": 1.4587311744689941, "learning_rate": 9.30368048695666e-06, "loss": 0.782, "step": 5420 }, { "epoch": 0.9747370313764272, "grad_norm": 1.4565938711166382, "learning_rate": 9.303383974209325e-06, "loss": 0.6858, "step": 5421 }, { "epoch": 0.974916838982289, "grad_norm": 1.473008632659912, "learning_rate": 9.303087403070716e-06, "loss": 0.7298, "step": 5422 }, { "epoch": 0.9750966465881507, "grad_norm": 1.4002560377120972, "learning_rate": 9.302790773544858e-06, "loss": 0.8225, "step": 5423 }, { "epoch": 0.9752764541940124, "grad_norm": 1.6636377573013306, "learning_rate": 9.302494085635774e-06, "loss": 0.778, "step": 5424 }, { "epoch": 0.9754562617998741, "grad_norm": 1.684274673461914, "learning_rate": 9.30219733934749e-06, "loss": 0.8411, "step": 5425 }, { "epoch": 0.9756360694057359, "grad_norm": 1.4624388217926025, "learning_rate": 9.301900534684034e-06, "loss": 0.7985, "step": 5426 }, { "epoch": 0.9758158770115976, "grad_norm": 1.4639497995376587, "learning_rate": 9.301603671649433e-06, "loss": 0.7916, "step": 5427 }, { "epoch": 0.9759956846174593, "grad_norm": 1.4433799982070923, "learning_rate": 9.301306750247713e-06, "loss": 0.811, "step": 5428 }, { "epoch": 0.976175492223321, "grad_norm": 1.2522255182266235, "learning_rate": 9.301009770482905e-06, "loss": 1.011, "step": 5429 }, { "epoch": 0.9763552998291828, "grad_norm": 1.4322302341461182, "learning_rate": 9.300712732359038e-06, "loss": 0.7994, "step": 5430 }, { "epoch": 0.9765351074350445, "grad_norm": 1.3760467767715454, "learning_rate": 9.300415635880143e-06, "loss": 0.7307, "step": 5431 }, { "epoch": 0.9767149150409062, "grad_norm": 1.5933094024658203, "learning_rate": 9.300118481050252e-06, "loss": 0.8226, "step": 5432 }, { "epoch": 0.9768947226467679, "grad_norm": 1.4822863340377808, "learning_rate": 9.299821267873393e-06, "loss": 0.7755, "step": 5433 }, { "epoch": 0.9770745302526297, "grad_norm": 1.9530855417251587, "learning_rate": 9.299523996353601e-06, "loss": 0.8199, "step": 5434 }, { "epoch": 0.9772543378584914, "grad_norm": 1.147768259048462, "learning_rate": 9.299226666494912e-06, "loss": 0.9714, "step": 5435 }, { "epoch": 0.9774341454643531, "grad_norm": 1.425732970237732, "learning_rate": 9.298929278301356e-06, "loss": 0.7752, "step": 5436 }, { "epoch": 0.9776139530702149, "grad_norm": 1.1426788568496704, "learning_rate": 9.298631831776972e-06, "loss": 1.0058, "step": 5437 }, { "epoch": 0.9777937606760766, "grad_norm": 1.5827614068984985, "learning_rate": 9.298334326925793e-06, "loss": 0.8197, "step": 5438 }, { "epoch": 0.9779735682819384, "grad_norm": 1.3622502088546753, "learning_rate": 9.298036763751858e-06, "loss": 0.7612, "step": 5439 }, { "epoch": 0.9781533758878, "grad_norm": 1.4501160383224487, "learning_rate": 9.297739142259206e-06, "loss": 0.8451, "step": 5440 }, { "epoch": 0.9783331834936618, "grad_norm": 1.2581967115402222, "learning_rate": 9.29744146245187e-06, "loss": 1.0099, "step": 5441 }, { "epoch": 0.9785129910995235, "grad_norm": 1.3958653211593628, "learning_rate": 9.297143724333893e-06, "loss": 0.7152, "step": 5442 }, { "epoch": 0.9786927987053853, "grad_norm": 1.523084282875061, "learning_rate": 9.296845927909315e-06, "loss": 0.8598, "step": 5443 }, { "epoch": 0.978872606311247, "grad_norm": 1.1347382068634033, "learning_rate": 9.296548073182174e-06, "loss": 1.014, "step": 5444 }, { "epoch": 0.9790524139171087, "grad_norm": 1.648667335510254, "learning_rate": 9.296250160156515e-06, "loss": 0.8845, "step": 5445 }, { "epoch": 0.9792322215229704, "grad_norm": 1.5957751274108887, "learning_rate": 9.29595218883638e-06, "loss": 0.7842, "step": 5446 }, { "epoch": 0.9794120291288322, "grad_norm": 1.6001306772232056, "learning_rate": 9.295654159225806e-06, "loss": 0.7884, "step": 5447 }, { "epoch": 0.9795918367346939, "grad_norm": 1.5073755979537964, "learning_rate": 9.295356071328845e-06, "loss": 0.7972, "step": 5448 }, { "epoch": 0.9797716443405556, "grad_norm": 1.3683955669403076, "learning_rate": 9.295057925149538e-06, "loss": 0.8139, "step": 5449 }, { "epoch": 0.9799514519464173, "grad_norm": 1.401698350906372, "learning_rate": 9.294759720691931e-06, "loss": 0.7783, "step": 5450 }, { "epoch": 0.9801312595522791, "grad_norm": 1.4063252210617065, "learning_rate": 9.294461457960068e-06, "loss": 0.7948, "step": 5451 }, { "epoch": 0.9803110671581408, "grad_norm": 1.2237882614135742, "learning_rate": 9.294163136958e-06, "loss": 1.0835, "step": 5452 }, { "epoch": 0.9804908747640025, "grad_norm": 1.5501402616500854, "learning_rate": 9.29386475768977e-06, "loss": 0.7899, "step": 5453 }, { "epoch": 0.9806706823698642, "grad_norm": 1.5790632963180542, "learning_rate": 9.293566320159432e-06, "loss": 0.7597, "step": 5454 }, { "epoch": 0.980850489975726, "grad_norm": 1.5094808340072632, "learning_rate": 9.293267824371032e-06, "loss": 0.8482, "step": 5455 }, { "epoch": 0.9810302975815877, "grad_norm": 1.4987690448760986, "learning_rate": 9.29296927032862e-06, "loss": 0.8114, "step": 5456 }, { "epoch": 0.9812101051874494, "grad_norm": 1.5169519186019897, "learning_rate": 9.292670658036249e-06, "loss": 0.793, "step": 5457 }, { "epoch": 0.9813899127933111, "grad_norm": 1.5114814043045044, "learning_rate": 9.29237198749797e-06, "loss": 0.8084, "step": 5458 }, { "epoch": 0.9815697203991729, "grad_norm": 1.4740804433822632, "learning_rate": 9.292073258717835e-06, "loss": 0.8012, "step": 5459 }, { "epoch": 0.9817495280050346, "grad_norm": 1.7284692525863647, "learning_rate": 9.291774471699897e-06, "loss": 0.8076, "step": 5460 }, { "epoch": 0.9819293356108963, "grad_norm": 1.4983668327331543, "learning_rate": 9.291475626448213e-06, "loss": 0.7856, "step": 5461 }, { "epoch": 0.982109143216758, "grad_norm": 1.4115239381790161, "learning_rate": 9.291176722966833e-06, "loss": 0.8684, "step": 5462 }, { "epoch": 0.9822889508226198, "grad_norm": 1.5455524921417236, "learning_rate": 9.290877761259816e-06, "loss": 0.8517, "step": 5463 }, { "epoch": 0.9824687584284815, "grad_norm": 1.205739140510559, "learning_rate": 9.290578741331218e-06, "loss": 0.9881, "step": 5464 }, { "epoch": 0.9826485660343433, "grad_norm": 1.0467307567596436, "learning_rate": 9.290279663185097e-06, "loss": 1.0439, "step": 5465 }, { "epoch": 0.982828373640205, "grad_norm": 1.5340728759765625, "learning_rate": 9.28998052682551e-06, "loss": 0.7425, "step": 5466 }, { "epoch": 0.9830081812460667, "grad_norm": 1.5775870084762573, "learning_rate": 9.289681332256517e-06, "loss": 0.8235, "step": 5467 }, { "epoch": 0.9831879888519285, "grad_norm": 1.0208961963653564, "learning_rate": 9.289382079482177e-06, "loss": 1.0105, "step": 5468 }, { "epoch": 0.9833677964577902, "grad_norm": 1.5162442922592163, "learning_rate": 9.28908276850655e-06, "loss": 0.7823, "step": 5469 }, { "epoch": 0.9835476040636519, "grad_norm": 1.459712028503418, "learning_rate": 9.288783399333698e-06, "loss": 0.8142, "step": 5470 }, { "epoch": 0.9837274116695136, "grad_norm": 1.3358063697814941, "learning_rate": 9.288483971967682e-06, "loss": 0.7452, "step": 5471 }, { "epoch": 0.9839072192753754, "grad_norm": 1.549817442893982, "learning_rate": 9.288184486412566e-06, "loss": 0.7695, "step": 5472 }, { "epoch": 0.9840870268812371, "grad_norm": 1.6080976724624634, "learning_rate": 9.287884942672414e-06, "loss": 0.7744, "step": 5473 }, { "epoch": 0.9842668344870988, "grad_norm": 1.4644978046417236, "learning_rate": 9.287585340751288e-06, "loss": 0.7602, "step": 5474 }, { "epoch": 0.9844466420929605, "grad_norm": 1.4487100839614868, "learning_rate": 9.287285680653254e-06, "loss": 0.7216, "step": 5475 }, { "epoch": 0.9846264496988223, "grad_norm": 1.606967806816101, "learning_rate": 9.286985962382382e-06, "loss": 0.8548, "step": 5476 }, { "epoch": 0.984806257304684, "grad_norm": 1.6158809661865234, "learning_rate": 9.286686185942735e-06, "loss": 0.7621, "step": 5477 }, { "epoch": 0.9849860649105457, "grad_norm": 1.2722147703170776, "learning_rate": 9.286386351338379e-06, "loss": 1.0643, "step": 5478 }, { "epoch": 0.9851658725164074, "grad_norm": 1.5669379234313965, "learning_rate": 9.286086458573386e-06, "loss": 0.81, "step": 5479 }, { "epoch": 0.9853456801222692, "grad_norm": 1.5059822797775269, "learning_rate": 9.285786507651824e-06, "loss": 0.7277, "step": 5480 }, { "epoch": 0.9855254877281309, "grad_norm": 1.47649347782135, "learning_rate": 9.285486498577761e-06, "loss": 0.854, "step": 5481 }, { "epoch": 0.9857052953339926, "grad_norm": 1.5173324346542358, "learning_rate": 9.285186431355271e-06, "loss": 0.7377, "step": 5482 }, { "epoch": 0.9858851029398543, "grad_norm": 1.5662028789520264, "learning_rate": 9.284886305988423e-06, "loss": 0.7889, "step": 5483 }, { "epoch": 0.9860649105457161, "grad_norm": 1.5211464166641235, "learning_rate": 9.284586122481292e-06, "loss": 0.7804, "step": 5484 }, { "epoch": 0.9862447181515778, "grad_norm": 1.3174076080322266, "learning_rate": 9.284285880837947e-06, "loss": 1.0419, "step": 5485 }, { "epoch": 0.9864245257574396, "grad_norm": 1.3925230503082275, "learning_rate": 9.283985581062464e-06, "loss": 0.7215, "step": 5486 }, { "epoch": 0.9866043333633012, "grad_norm": 1.117020606994629, "learning_rate": 9.28368522315892e-06, "loss": 0.9938, "step": 5487 }, { "epoch": 0.986784140969163, "grad_norm": 1.4601472616195679, "learning_rate": 9.283384807131386e-06, "loss": 0.8009, "step": 5488 }, { "epoch": 0.9869639485750247, "grad_norm": 1.4012113809585571, "learning_rate": 9.283084332983943e-06, "loss": 0.7699, "step": 5489 }, { "epoch": 0.9871437561808865, "grad_norm": 1.4374969005584717, "learning_rate": 9.282783800720664e-06, "loss": 0.687, "step": 5490 }, { "epoch": 0.9873235637867481, "grad_norm": 1.2981061935424805, "learning_rate": 9.282483210345628e-06, "loss": 0.9609, "step": 5491 }, { "epoch": 0.9875033713926099, "grad_norm": 1.3732237815856934, "learning_rate": 9.282182561862915e-06, "loss": 0.8296, "step": 5492 }, { "epoch": 0.9876831789984717, "grad_norm": 1.5118861198425293, "learning_rate": 9.281881855276604e-06, "loss": 0.7689, "step": 5493 }, { "epoch": 0.9878629866043334, "grad_norm": 1.5144509077072144, "learning_rate": 9.281581090590772e-06, "loss": 0.8069, "step": 5494 }, { "epoch": 0.9880427942101951, "grad_norm": 2.8057398796081543, "learning_rate": 9.281280267809504e-06, "loss": 0.8454, "step": 5495 }, { "epoch": 0.9882226018160568, "grad_norm": 1.1115798950195312, "learning_rate": 9.280979386936882e-06, "loss": 0.997, "step": 5496 }, { "epoch": 0.9884024094219186, "grad_norm": 1.502284049987793, "learning_rate": 9.280678447976987e-06, "loss": 0.8608, "step": 5497 }, { "epoch": 0.9885822170277803, "grad_norm": 1.5356335639953613, "learning_rate": 9.280377450933899e-06, "loss": 0.8345, "step": 5498 }, { "epoch": 0.988762024633642, "grad_norm": 1.4988148212432861, "learning_rate": 9.280076395811709e-06, "loss": 0.8257, "step": 5499 }, { "epoch": 0.9889418322395037, "grad_norm": 1.5024759769439697, "learning_rate": 9.279775282614497e-06, "loss": 0.8335, "step": 5500 }, { "epoch": 0.9889418322395037, "eval_loss": 0.8166871070861816, "eval_runtime": 148.4888, "eval_samples_per_second": 96.856, "eval_steps_per_second": 1.515, "step": 5500 }, { "epoch": 0.9891216398453655, "grad_norm": 1.4754029512405396, "learning_rate": 9.279474111346349e-06, "loss": 0.7862, "step": 5501 }, { "epoch": 0.9893014474512272, "grad_norm": 1.5630217790603638, "learning_rate": 9.279172882011353e-06, "loss": 0.7605, "step": 5502 }, { "epoch": 0.989481255057089, "grad_norm": 1.4986640214920044, "learning_rate": 9.278871594613596e-06, "loss": 0.7802, "step": 5503 }, { "epoch": 0.9896610626629506, "grad_norm": 1.0876049995422363, "learning_rate": 9.278570249157166e-06, "loss": 0.9859, "step": 5504 }, { "epoch": 0.9898408702688124, "grad_norm": 1.5936278104782104, "learning_rate": 9.278268845646152e-06, "loss": 0.8379, "step": 5505 }, { "epoch": 0.9900206778746741, "grad_norm": 1.545123815536499, "learning_rate": 9.277967384084645e-06, "loss": 0.7733, "step": 5506 }, { "epoch": 0.9902004854805359, "grad_norm": 1.0799236297607422, "learning_rate": 9.277665864476732e-06, "loss": 1.0602, "step": 5507 }, { "epoch": 0.9903802930863975, "grad_norm": 1.3991434574127197, "learning_rate": 9.277364286826507e-06, "loss": 0.7655, "step": 5508 }, { "epoch": 0.9905601006922593, "grad_norm": 1.713663101196289, "learning_rate": 9.27706265113806e-06, "loss": 0.7541, "step": 5509 }, { "epoch": 0.990739908298121, "grad_norm": 1.6031455993652344, "learning_rate": 9.276760957415485e-06, "loss": 0.8111, "step": 5510 }, { "epoch": 0.9909197159039828, "grad_norm": 1.457960605621338, "learning_rate": 9.276459205662875e-06, "loss": 0.7505, "step": 5511 }, { "epoch": 0.9910995235098444, "grad_norm": 1.1515425443649292, "learning_rate": 9.276157395884326e-06, "loss": 0.9969, "step": 5512 }, { "epoch": 0.9912793311157062, "grad_norm": 1.4158600568771362, "learning_rate": 9.275855528083932e-06, "loss": 0.752, "step": 5513 }, { "epoch": 0.9914591387215679, "grad_norm": 1.0426422357559204, "learning_rate": 9.27555360226579e-06, "loss": 1.0321, "step": 5514 }, { "epoch": 0.9916389463274297, "grad_norm": 1.0715957880020142, "learning_rate": 9.275251618433993e-06, "loss": 1.0239, "step": 5515 }, { "epoch": 0.9918187539332913, "grad_norm": 1.0046255588531494, "learning_rate": 9.274949576592645e-06, "loss": 1.0078, "step": 5516 }, { "epoch": 0.9919985615391531, "grad_norm": 1.107450008392334, "learning_rate": 9.274647476745838e-06, "loss": 1.0206, "step": 5517 }, { "epoch": 0.9921783691450148, "grad_norm": 1.4558939933776855, "learning_rate": 9.274345318897674e-06, "loss": 0.8392, "step": 5518 }, { "epoch": 0.9923581767508766, "grad_norm": 1.4592024087905884, "learning_rate": 9.274043103052253e-06, "loss": 0.7487, "step": 5519 }, { "epoch": 0.9925379843567383, "grad_norm": 1.5300066471099854, "learning_rate": 9.273740829213673e-06, "loss": 0.8219, "step": 5520 }, { "epoch": 0.9927177919626, "grad_norm": 1.484954595565796, "learning_rate": 9.27343849738604e-06, "loss": 0.7496, "step": 5521 }, { "epoch": 0.9928975995684618, "grad_norm": 1.3656402826309204, "learning_rate": 9.273136107573453e-06, "loss": 0.7692, "step": 5522 }, { "epoch": 0.9930774071743235, "grad_norm": 1.4043902158737183, "learning_rate": 9.272833659780018e-06, "loss": 0.7691, "step": 5523 }, { "epoch": 0.9932572147801852, "grad_norm": 1.5834009647369385, "learning_rate": 9.272531154009834e-06, "loss": 0.7184, "step": 5524 }, { "epoch": 0.9934370223860469, "grad_norm": 1.4826111793518066, "learning_rate": 9.272228590267009e-06, "loss": 0.8242, "step": 5525 }, { "epoch": 0.9936168299919087, "grad_norm": 1.6056462526321411, "learning_rate": 9.27192596855565e-06, "loss": 0.7863, "step": 5526 }, { "epoch": 0.9937966375977704, "grad_norm": 1.4663348197937012, "learning_rate": 9.271623288879859e-06, "loss": 0.8037, "step": 5527 }, { "epoch": 0.9939764452036322, "grad_norm": 1.5803160667419434, "learning_rate": 9.271320551243745e-06, "loss": 0.7456, "step": 5528 }, { "epoch": 0.9941562528094938, "grad_norm": 1.5345524549484253, "learning_rate": 9.271017755651416e-06, "loss": 0.817, "step": 5529 }, { "epoch": 0.9943360604153556, "grad_norm": 1.5183571577072144, "learning_rate": 9.27071490210698e-06, "loss": 0.8132, "step": 5530 }, { "epoch": 0.9945158680212173, "grad_norm": 1.4374091625213623, "learning_rate": 9.270411990614548e-06, "loss": 0.8321, "step": 5531 }, { "epoch": 0.9946956756270791, "grad_norm": 1.5576709508895874, "learning_rate": 9.270109021178227e-06, "loss": 0.8014, "step": 5532 }, { "epoch": 0.9948754832329407, "grad_norm": 1.4983655214309692, "learning_rate": 9.26980599380213e-06, "loss": 0.8424, "step": 5533 }, { "epoch": 0.9950552908388025, "grad_norm": 1.5845227241516113, "learning_rate": 9.269502908490367e-06, "loss": 0.8578, "step": 5534 }, { "epoch": 0.9952350984446642, "grad_norm": 1.4374641180038452, "learning_rate": 9.269199765247052e-06, "loss": 0.7714, "step": 5535 }, { "epoch": 0.995414906050526, "grad_norm": 1.5874446630477905, "learning_rate": 9.2688965640763e-06, "loss": 0.8443, "step": 5536 }, { "epoch": 0.9955947136563876, "grad_norm": 1.5612971782684326, "learning_rate": 9.268593304982221e-06, "loss": 0.7908, "step": 5537 }, { "epoch": 0.9957745212622494, "grad_norm": 1.5921889543533325, "learning_rate": 9.268289987968932e-06, "loss": 0.7561, "step": 5538 }, { "epoch": 0.9959543288681111, "grad_norm": 1.4579795598983765, "learning_rate": 9.26798661304055e-06, "loss": 0.7947, "step": 5539 }, { "epoch": 0.9961341364739729, "grad_norm": 1.514324426651001, "learning_rate": 9.267683180201189e-06, "loss": 0.7924, "step": 5540 }, { "epoch": 0.9963139440798345, "grad_norm": 1.3929516077041626, "learning_rate": 9.267379689454966e-06, "loss": 0.7215, "step": 5541 }, { "epoch": 0.9964937516856963, "grad_norm": 1.3931025266647339, "learning_rate": 9.267076140806001e-06, "loss": 0.7785, "step": 5542 }, { "epoch": 0.996673559291558, "grad_norm": 1.2615712881088257, "learning_rate": 9.266772534258412e-06, "loss": 1.0732, "step": 5543 }, { "epoch": 0.9968533668974198, "grad_norm": 1.1895344257354736, "learning_rate": 9.266468869816318e-06, "loss": 1.0049, "step": 5544 }, { "epoch": 0.9970331745032814, "grad_norm": 1.7239253520965576, "learning_rate": 9.266165147483839e-06, "loss": 0.8217, "step": 5545 }, { "epoch": 0.9972129821091432, "grad_norm": 1.2689374685287476, "learning_rate": 9.265861367265097e-06, "loss": 1.0115, "step": 5546 }, { "epoch": 0.9973927897150049, "grad_norm": 1.0200244188308716, "learning_rate": 9.265557529164215e-06, "loss": 0.973, "step": 5547 }, { "epoch": 0.9975725973208667, "grad_norm": 1.5025711059570312, "learning_rate": 9.265253633185313e-06, "loss": 0.7938, "step": 5548 }, { "epoch": 0.9977524049267285, "grad_norm": 1.4969233274459839, "learning_rate": 9.264949679332515e-06, "loss": 0.7758, "step": 5549 }, { "epoch": 0.9979322125325901, "grad_norm": 1.467028260231018, "learning_rate": 9.264645667609948e-06, "loss": 0.7946, "step": 5550 }, { "epoch": 0.9981120201384519, "grad_norm": 1.4493361711502075, "learning_rate": 9.264341598021735e-06, "loss": 0.7638, "step": 5551 }, { "epoch": 0.9982918277443136, "grad_norm": 1.5417901277542114, "learning_rate": 9.264037470572e-06, "loss": 0.816, "step": 5552 }, { "epoch": 0.9984716353501754, "grad_norm": 1.5269584655761719, "learning_rate": 9.263733285264873e-06, "loss": 0.7455, "step": 5553 }, { "epoch": 0.998651442956037, "grad_norm": 1.4694838523864746, "learning_rate": 9.26342904210448e-06, "loss": 0.7217, "step": 5554 }, { "epoch": 0.9988312505618988, "grad_norm": 1.5076265335083008, "learning_rate": 9.263124741094948e-06, "loss": 0.7868, "step": 5555 }, { "epoch": 0.9990110581677605, "grad_norm": 1.7360106706619263, "learning_rate": 9.262820382240408e-06, "loss": 0.7159, "step": 5556 }, { "epoch": 0.9991908657736223, "grad_norm": 1.5993796586990356, "learning_rate": 9.262515965544989e-06, "loss": 0.6845, "step": 5557 }, { "epoch": 0.9993706733794839, "grad_norm": 1.4164179563522339, "learning_rate": 9.26221149101282e-06, "loss": 0.7513, "step": 5558 }, { "epoch": 0.9995504809853457, "grad_norm": 1.2817232608795166, "learning_rate": 9.261906958648036e-06, "loss": 1.0297, "step": 5559 }, { "epoch": 0.9997302885912074, "grad_norm": 1.4725055694580078, "learning_rate": 9.261602368454763e-06, "loss": 0.7791, "step": 5560 }, { "epoch": 1.0001798076058617, "grad_norm": 1.4444500207901, "learning_rate": 9.261297720437142e-06, "loss": 0.8014, "step": 5561 }, { "epoch": 1.0003596152117236, "grad_norm": 1.3268214464187622, "learning_rate": 9.2609930145993e-06, "loss": 0.7345, "step": 5562 }, { "epoch": 1.0005394228175852, "grad_norm": 1.4281917810440063, "learning_rate": 9.260688250945374e-06, "loss": 0.7033, "step": 5563 }, { "epoch": 1.000719230423447, "grad_norm": 1.3463271856307983, "learning_rate": 9.260383429479498e-06, "loss": 0.7003, "step": 5564 }, { "epoch": 1.0008990380293086, "grad_norm": 1.370398998260498, "learning_rate": 9.26007855020581e-06, "loss": 0.7173, "step": 5565 }, { "epoch": 1.0010788456351705, "grad_norm": 1.3529536724090576, "learning_rate": 9.259773613128446e-06, "loss": 0.7731, "step": 5566 }, { "epoch": 1.0012586532410321, "grad_norm": 1.4106078147888184, "learning_rate": 9.259468618251545e-06, "loss": 0.7292, "step": 5567 }, { "epoch": 1.0014384608468938, "grad_norm": 1.4781494140625, "learning_rate": 9.259163565579242e-06, "loss": 0.7877, "step": 5568 }, { "epoch": 1.0016182684527555, "grad_norm": 1.3863905668258667, "learning_rate": 9.258858455115679e-06, "loss": 0.6843, "step": 5569 }, { "epoch": 1.0017980760586174, "grad_norm": 1.3751782178878784, "learning_rate": 9.258553286864993e-06, "loss": 0.7186, "step": 5570 }, { "epoch": 1.001977883664479, "grad_norm": 1.4807534217834473, "learning_rate": 9.25824806083133e-06, "loss": 0.6925, "step": 5571 }, { "epoch": 1.0021576912703407, "grad_norm": 1.4231053590774536, "learning_rate": 9.257942777018827e-06, "loss": 0.7782, "step": 5572 }, { "epoch": 1.0023374988762024, "grad_norm": 1.1966956853866577, "learning_rate": 9.257637435431626e-06, "loss": 0.929, "step": 5573 }, { "epoch": 1.0025173064820643, "grad_norm": 1.5033222436904907, "learning_rate": 9.257332036073872e-06, "loss": 0.7729, "step": 5574 }, { "epoch": 1.002697114087926, "grad_norm": 1.6547225713729858, "learning_rate": 9.257026578949709e-06, "loss": 0.7116, "step": 5575 }, { "epoch": 1.0028769216937876, "grad_norm": 1.0616459846496582, "learning_rate": 9.25672106406328e-06, "loss": 0.9453, "step": 5576 }, { "epoch": 1.0030567292996493, "grad_norm": 0.9956672787666321, "learning_rate": 9.256415491418734e-06, "loss": 0.9815, "step": 5577 }, { "epoch": 1.0032365369055112, "grad_norm": 1.4786231517791748, "learning_rate": 9.256109861020213e-06, "loss": 0.7773, "step": 5578 }, { "epoch": 1.0034163445113728, "grad_norm": 1.5099455118179321, "learning_rate": 9.255804172871867e-06, "loss": 0.7755, "step": 5579 }, { "epoch": 1.0035961521172345, "grad_norm": 1.5732470750808716, "learning_rate": 9.25549842697784e-06, "loss": 0.7514, "step": 5580 }, { "epoch": 1.0037759597230962, "grad_norm": 1.4206966161727905, "learning_rate": 9.255192623342287e-06, "loss": 0.7373, "step": 5581 }, { "epoch": 1.003955767328958, "grad_norm": 1.5056943893432617, "learning_rate": 9.25488676196935e-06, "loss": 0.7072, "step": 5582 }, { "epoch": 1.0041355749348198, "grad_norm": 1.4731570482254028, "learning_rate": 9.254580842863185e-06, "loss": 0.7425, "step": 5583 }, { "epoch": 1.0043153825406814, "grad_norm": 1.1961346864700317, "learning_rate": 9.25427486602794e-06, "loss": 0.9697, "step": 5584 }, { "epoch": 1.004495190146543, "grad_norm": 1.3735735416412354, "learning_rate": 9.253968831467765e-06, "loss": 0.7487, "step": 5585 }, { "epoch": 1.004674997752405, "grad_norm": 1.4128272533416748, "learning_rate": 9.253662739186817e-06, "loss": 0.7568, "step": 5586 }, { "epoch": 1.0048548053582667, "grad_norm": 1.4583648443222046, "learning_rate": 9.253356589189247e-06, "loss": 0.7649, "step": 5587 }, { "epoch": 1.0050346129641283, "grad_norm": 1.5369511842727661, "learning_rate": 9.253050381479209e-06, "loss": 0.7394, "step": 5588 }, { "epoch": 1.0052144205699902, "grad_norm": 1.3566842079162598, "learning_rate": 9.252744116060857e-06, "loss": 0.7581, "step": 5589 }, { "epoch": 1.005394228175852, "grad_norm": 1.5122618675231934, "learning_rate": 9.252437792938348e-06, "loss": 0.7581, "step": 5590 }, { "epoch": 1.0055740357817136, "grad_norm": 1.4228779077529907, "learning_rate": 9.252131412115838e-06, "loss": 0.7247, "step": 5591 }, { "epoch": 1.0057538433875752, "grad_norm": 1.1414968967437744, "learning_rate": 9.251824973597483e-06, "loss": 0.9185, "step": 5592 }, { "epoch": 1.0059336509934371, "grad_norm": 1.5435535907745361, "learning_rate": 9.251518477387444e-06, "loss": 0.7936, "step": 5593 }, { "epoch": 1.0061134585992988, "grad_norm": 1.4849529266357422, "learning_rate": 9.251211923489877e-06, "loss": 0.8125, "step": 5594 }, { "epoch": 1.0062932662051605, "grad_norm": 1.9053382873535156, "learning_rate": 9.250905311908943e-06, "loss": 0.7313, "step": 5595 }, { "epoch": 1.0064730738110221, "grad_norm": 1.4677716493606567, "learning_rate": 9.2505986426488e-06, "loss": 0.7996, "step": 5596 }, { "epoch": 1.006652881416884, "grad_norm": 1.42100191116333, "learning_rate": 9.250291915713613e-06, "loss": 0.7187, "step": 5597 }, { "epoch": 1.0068326890227457, "grad_norm": 1.5126135349273682, "learning_rate": 9.249985131107541e-06, "loss": 0.7156, "step": 5598 }, { "epoch": 1.0070124966286074, "grad_norm": 1.3606711626052856, "learning_rate": 9.249678288834747e-06, "loss": 0.7125, "step": 5599 }, { "epoch": 1.007192304234469, "grad_norm": 1.3683743476867676, "learning_rate": 9.249371388899395e-06, "loss": 0.713, "step": 5600 }, { "epoch": 1.007372111840331, "grad_norm": 1.4852293729782104, "learning_rate": 9.249064431305647e-06, "loss": 0.7679, "step": 5601 }, { "epoch": 1.0075519194461926, "grad_norm": 1.3400685787200928, "learning_rate": 9.248757416057672e-06, "loss": 0.6649, "step": 5602 }, { "epoch": 1.0077317270520543, "grad_norm": 1.4079093933105469, "learning_rate": 9.248450343159635e-06, "loss": 0.7203, "step": 5603 }, { "epoch": 1.007911534657916, "grad_norm": 1.7185794115066528, "learning_rate": 9.248143212615698e-06, "loss": 0.7146, "step": 5604 }, { "epoch": 1.0080913422637778, "grad_norm": 1.465437412261963, "learning_rate": 9.247836024430034e-06, "loss": 0.7143, "step": 5605 }, { "epoch": 1.0082711498696395, "grad_norm": 1.4666272401809692, "learning_rate": 9.24752877860681e-06, "loss": 0.7291, "step": 5606 }, { "epoch": 1.0084509574755012, "grad_norm": 1.5297152996063232, "learning_rate": 9.24722147515019e-06, "loss": 0.7378, "step": 5607 }, { "epoch": 1.0086307650813628, "grad_norm": 1.4621238708496094, "learning_rate": 9.246914114064351e-06, "loss": 0.685, "step": 5608 }, { "epoch": 1.0088105726872247, "grad_norm": 1.3408464193344116, "learning_rate": 9.24660669535346e-06, "loss": 0.7338, "step": 5609 }, { "epoch": 1.0089903802930864, "grad_norm": 1.1327366828918457, "learning_rate": 9.246299219021685e-06, "loss": 0.9628, "step": 5610 }, { "epoch": 1.009170187898948, "grad_norm": 1.5991307497024536, "learning_rate": 9.245991685073205e-06, "loss": 0.8263, "step": 5611 }, { "epoch": 1.0093499955048097, "grad_norm": 1.1568052768707275, "learning_rate": 9.245684093512186e-06, "loss": 0.9497, "step": 5612 }, { "epoch": 1.0095298031106716, "grad_norm": 1.8113433122634888, "learning_rate": 9.245376444342807e-06, "loss": 0.7244, "step": 5613 }, { "epoch": 1.0097096107165333, "grad_norm": 1.4280247688293457, "learning_rate": 9.245068737569241e-06, "loss": 0.7122, "step": 5614 }, { "epoch": 1.009889418322395, "grad_norm": 1.528572678565979, "learning_rate": 9.24476097319566e-06, "loss": 0.7262, "step": 5615 }, { "epoch": 1.0100692259282569, "grad_norm": 1.2099380493164062, "learning_rate": 9.244453151226243e-06, "loss": 0.9448, "step": 5616 }, { "epoch": 1.0102490335341185, "grad_norm": 1.4672467708587646, "learning_rate": 9.244145271665165e-06, "loss": 0.7223, "step": 5617 }, { "epoch": 1.0104288411399802, "grad_norm": 1.171207070350647, "learning_rate": 9.243837334516608e-06, "loss": 0.9279, "step": 5618 }, { "epoch": 1.0106086487458419, "grad_norm": 1.5172580480575562, "learning_rate": 9.243529339784744e-06, "loss": 0.728, "step": 5619 }, { "epoch": 1.0107884563517038, "grad_norm": 1.4269697666168213, "learning_rate": 9.243221287473755e-06, "loss": 0.7371, "step": 5620 }, { "epoch": 1.0109682639575654, "grad_norm": 1.4172812700271606, "learning_rate": 9.242913177587823e-06, "loss": 0.7054, "step": 5621 }, { "epoch": 1.0111480715634271, "grad_norm": 1.4368653297424316, "learning_rate": 9.242605010131125e-06, "loss": 0.7158, "step": 5622 }, { "epoch": 1.0113278791692888, "grad_norm": 1.526574969291687, "learning_rate": 9.242296785107843e-06, "loss": 0.7143, "step": 5623 }, { "epoch": 1.0115076867751507, "grad_norm": 1.4783798456192017, "learning_rate": 9.241988502522162e-06, "loss": 0.7398, "step": 5624 }, { "epoch": 1.0116874943810124, "grad_norm": 1.4779304265975952, "learning_rate": 9.241680162378261e-06, "loss": 0.7947, "step": 5625 }, { "epoch": 1.011867301986874, "grad_norm": 1.499954104423523, "learning_rate": 9.241371764680326e-06, "loss": 0.6847, "step": 5626 }, { "epoch": 1.0120471095927357, "grad_norm": 1.4357830286026, "learning_rate": 9.241063309432543e-06, "loss": 0.6954, "step": 5627 }, { "epoch": 1.0122269171985976, "grad_norm": 1.5150551795959473, "learning_rate": 9.240754796639095e-06, "loss": 0.7384, "step": 5628 }, { "epoch": 1.0124067248044593, "grad_norm": 1.56706964969635, "learning_rate": 9.240446226304169e-06, "loss": 0.6961, "step": 5629 }, { "epoch": 1.012586532410321, "grad_norm": 1.445920705795288, "learning_rate": 9.240137598431951e-06, "loss": 0.7154, "step": 5630 }, { "epoch": 1.0127663400161826, "grad_norm": 1.463571310043335, "learning_rate": 9.23982891302663e-06, "loss": 0.721, "step": 5631 }, { "epoch": 1.0129461476220445, "grad_norm": 1.4446630477905273, "learning_rate": 9.239520170092393e-06, "loss": 0.6938, "step": 5632 }, { "epoch": 1.0131259552279062, "grad_norm": 1.5183343887329102, "learning_rate": 9.239211369633431e-06, "loss": 0.8445, "step": 5633 }, { "epoch": 1.0133057628337678, "grad_norm": 1.391709566116333, "learning_rate": 9.238902511653934e-06, "loss": 0.7313, "step": 5634 }, { "epoch": 1.0134855704396295, "grad_norm": 1.1561344861984253, "learning_rate": 9.238593596158091e-06, "loss": 0.9635, "step": 5635 }, { "epoch": 1.0136653780454914, "grad_norm": 1.7495532035827637, "learning_rate": 9.238284623150095e-06, "loss": 0.7041, "step": 5636 }, { "epoch": 1.013845185651353, "grad_norm": 1.4798754453659058, "learning_rate": 9.237975592634137e-06, "loss": 0.7433, "step": 5637 }, { "epoch": 1.0140249932572147, "grad_norm": 1.484702229499817, "learning_rate": 9.237666504614412e-06, "loss": 0.629, "step": 5638 }, { "epoch": 1.0142048008630764, "grad_norm": 1.434161901473999, "learning_rate": 9.237357359095111e-06, "loss": 0.7026, "step": 5639 }, { "epoch": 1.0143846084689383, "grad_norm": 1.714363694190979, "learning_rate": 9.237048156080433e-06, "loss": 0.766, "step": 5640 }, { "epoch": 1.0145644160748, "grad_norm": 1.5089430809020996, "learning_rate": 9.23673889557457e-06, "loss": 0.7157, "step": 5641 }, { "epoch": 1.0147442236806616, "grad_norm": 1.5539649724960327, "learning_rate": 9.23642957758172e-06, "loss": 0.7531, "step": 5642 }, { "epoch": 1.0149240312865233, "grad_norm": 1.6001121997833252, "learning_rate": 9.236120202106079e-06, "loss": 0.7337, "step": 5643 }, { "epoch": 1.0151038388923852, "grad_norm": 1.5487552881240845, "learning_rate": 9.235810769151845e-06, "loss": 0.7972, "step": 5644 }, { "epoch": 1.0152836464982469, "grad_norm": 1.4082220792770386, "learning_rate": 9.235501278723218e-06, "loss": 0.7845, "step": 5645 }, { "epoch": 1.0154634541041085, "grad_norm": 1.1588997840881348, "learning_rate": 9.235191730824394e-06, "loss": 0.9952, "step": 5646 }, { "epoch": 1.0156432617099704, "grad_norm": 1.477670431137085, "learning_rate": 9.234882125459577e-06, "loss": 0.7417, "step": 5647 }, { "epoch": 1.015823069315832, "grad_norm": 1.511650800704956, "learning_rate": 9.234572462632966e-06, "loss": 0.7204, "step": 5648 }, { "epoch": 1.0160028769216938, "grad_norm": 1.4907937049865723, "learning_rate": 9.234262742348764e-06, "loss": 0.7258, "step": 5649 }, { "epoch": 1.0161826845275554, "grad_norm": 1.50380539894104, "learning_rate": 9.23395296461117e-06, "loss": 0.7316, "step": 5650 }, { "epoch": 1.0163624921334173, "grad_norm": 1.357260823249817, "learning_rate": 9.233643129424392e-06, "loss": 0.6846, "step": 5651 }, { "epoch": 1.016542299739279, "grad_norm": 1.4566365480422974, "learning_rate": 9.23333323679263e-06, "loss": 0.774, "step": 5652 }, { "epoch": 1.0167221073451407, "grad_norm": 1.380244255065918, "learning_rate": 9.233023286720093e-06, "loss": 0.7031, "step": 5653 }, { "epoch": 1.0169019149510024, "grad_norm": 1.4014148712158203, "learning_rate": 9.232713279210982e-06, "loss": 0.6889, "step": 5654 }, { "epoch": 1.0170817225568642, "grad_norm": 1.5606322288513184, "learning_rate": 9.232403214269508e-06, "loss": 0.7466, "step": 5655 }, { "epoch": 1.017261530162726, "grad_norm": 1.4186490774154663, "learning_rate": 9.232093091899873e-06, "loss": 0.7378, "step": 5656 }, { "epoch": 1.0174413377685876, "grad_norm": 1.1846269369125366, "learning_rate": 9.23178291210629e-06, "loss": 0.9667, "step": 5657 }, { "epoch": 1.0176211453744493, "grad_norm": 1.492113709449768, "learning_rate": 9.231472674892965e-06, "loss": 0.7697, "step": 5658 }, { "epoch": 1.0178009529803111, "grad_norm": 1.4239416122436523, "learning_rate": 9.231162380264107e-06, "loss": 0.7952, "step": 5659 }, { "epoch": 1.0179807605861728, "grad_norm": 1.3979108333587646, "learning_rate": 9.23085202822393e-06, "loss": 0.7243, "step": 5660 }, { "epoch": 1.0181605681920345, "grad_norm": 1.456268310546875, "learning_rate": 9.230541618776641e-06, "loss": 0.7266, "step": 5661 }, { "epoch": 1.0183403757978962, "grad_norm": 1.5391161441802979, "learning_rate": 9.230231151926452e-06, "loss": 0.8063, "step": 5662 }, { "epoch": 1.018520183403758, "grad_norm": 1.5336904525756836, "learning_rate": 9.229920627677578e-06, "loss": 0.717, "step": 5663 }, { "epoch": 1.0186999910096197, "grad_norm": 1.4785479307174683, "learning_rate": 9.229610046034233e-06, "loss": 0.7122, "step": 5664 }, { "epoch": 1.0188797986154814, "grad_norm": 1.3682721853256226, "learning_rate": 9.229299407000628e-06, "loss": 0.7827, "step": 5665 }, { "epoch": 1.019059606221343, "grad_norm": 1.410521388053894, "learning_rate": 9.228988710580977e-06, "loss": 0.6916, "step": 5666 }, { "epoch": 1.019239413827205, "grad_norm": 1.4011951684951782, "learning_rate": 9.228677956779502e-06, "loss": 0.7686, "step": 5667 }, { "epoch": 1.0194192214330666, "grad_norm": 1.149515151977539, "learning_rate": 9.228367145600414e-06, "loss": 0.9745, "step": 5668 }, { "epoch": 1.0195990290389283, "grad_norm": 1.4128772020339966, "learning_rate": 9.228056277047931e-06, "loss": 0.7541, "step": 5669 }, { "epoch": 1.01977883664479, "grad_norm": 2.05098032951355, "learning_rate": 9.227745351126274e-06, "loss": 0.7632, "step": 5670 }, { "epoch": 1.0199586442506519, "grad_norm": 1.549769401550293, "learning_rate": 9.22743436783966e-06, "loss": 0.7054, "step": 5671 }, { "epoch": 1.0201384518565135, "grad_norm": 1.416236400604248, "learning_rate": 9.227123327192308e-06, "loss": 0.7555, "step": 5672 }, { "epoch": 1.0203182594623752, "grad_norm": 1.4301259517669678, "learning_rate": 9.226812229188439e-06, "loss": 0.7389, "step": 5673 }, { "epoch": 1.020498067068237, "grad_norm": 1.536728024482727, "learning_rate": 9.226501073832274e-06, "loss": 0.7729, "step": 5674 }, { "epoch": 1.0206778746740988, "grad_norm": 1.4189982414245605, "learning_rate": 9.226189861128036e-06, "loss": 0.7741, "step": 5675 }, { "epoch": 1.0208576822799604, "grad_norm": 1.4563860893249512, "learning_rate": 9.225878591079947e-06, "loss": 0.7752, "step": 5676 }, { "epoch": 1.021037489885822, "grad_norm": 1.4465265274047852, "learning_rate": 9.225567263692227e-06, "loss": 0.6716, "step": 5677 }, { "epoch": 1.021217297491684, "grad_norm": 1.5120795965194702, "learning_rate": 9.225255878969108e-06, "loss": 0.7884, "step": 5678 }, { "epoch": 1.0213971050975457, "grad_norm": 1.082964301109314, "learning_rate": 9.22494443691481e-06, "loss": 0.9658, "step": 5679 }, { "epoch": 1.0215769127034073, "grad_norm": 1.5893404483795166, "learning_rate": 9.224632937533558e-06, "loss": 0.7663, "step": 5680 }, { "epoch": 1.021756720309269, "grad_norm": 1.3930646181106567, "learning_rate": 9.224321380829582e-06, "loss": 0.7715, "step": 5681 }, { "epoch": 1.021936527915131, "grad_norm": 1.3700381517410278, "learning_rate": 9.224009766807107e-06, "loss": 0.6903, "step": 5682 }, { "epoch": 1.0221163355209926, "grad_norm": 0.9886298775672913, "learning_rate": 9.223698095470361e-06, "loss": 0.9607, "step": 5683 }, { "epoch": 1.0222961431268542, "grad_norm": 1.4778406620025635, "learning_rate": 9.223386366823576e-06, "loss": 0.7827, "step": 5684 }, { "epoch": 1.022475950732716, "grad_norm": 1.5600627660751343, "learning_rate": 9.223074580870979e-06, "loss": 0.762, "step": 5685 }, { "epoch": 1.0226557583385778, "grad_norm": 1.5036396980285645, "learning_rate": 9.222762737616799e-06, "loss": 0.7952, "step": 5686 }, { "epoch": 1.0228355659444395, "grad_norm": 1.4218746423721313, "learning_rate": 9.222450837065274e-06, "loss": 0.7268, "step": 5687 }, { "epoch": 1.0230153735503011, "grad_norm": 1.4845508337020874, "learning_rate": 9.222138879220628e-06, "loss": 0.7755, "step": 5688 }, { "epoch": 1.0231951811561628, "grad_norm": 1.3948051929473877, "learning_rate": 9.221826864087098e-06, "loss": 0.6279, "step": 5689 }, { "epoch": 1.0233749887620247, "grad_norm": 1.5350418090820312, "learning_rate": 9.221514791668917e-06, "loss": 0.7276, "step": 5690 }, { "epoch": 1.0235547963678864, "grad_norm": 1.4340229034423828, "learning_rate": 9.22120266197032e-06, "loss": 0.7351, "step": 5691 }, { "epoch": 1.023734603973748, "grad_norm": 1.5073548555374146, "learning_rate": 9.22089047499554e-06, "loss": 0.8097, "step": 5692 }, { "epoch": 1.0239144115796097, "grad_norm": 1.4705836772918701, "learning_rate": 9.220578230748818e-06, "loss": 0.7884, "step": 5693 }, { "epoch": 1.0240942191854716, "grad_norm": 1.609426736831665, "learning_rate": 9.220265929234384e-06, "loss": 0.6853, "step": 5694 }, { "epoch": 1.0242740267913333, "grad_norm": 1.5416369438171387, "learning_rate": 9.219953570456481e-06, "loss": 0.7568, "step": 5695 }, { "epoch": 1.024453834397195, "grad_norm": 1.2087817192077637, "learning_rate": 9.219641154419345e-06, "loss": 0.9676, "step": 5696 }, { "epoch": 1.0246336420030566, "grad_norm": 1.428736686706543, "learning_rate": 9.219328681127216e-06, "loss": 0.7968, "step": 5697 }, { "epoch": 1.0248134496089185, "grad_norm": 1.49289870262146, "learning_rate": 9.219016150584331e-06, "loss": 0.7797, "step": 5698 }, { "epoch": 1.0249932572147802, "grad_norm": 1.491161584854126, "learning_rate": 9.218703562794933e-06, "loss": 0.807, "step": 5699 }, { "epoch": 1.0251730648206419, "grad_norm": 1.385357141494751, "learning_rate": 9.218390917763265e-06, "loss": 0.7711, "step": 5700 }, { "epoch": 1.0253528724265037, "grad_norm": 1.4871817827224731, "learning_rate": 9.218078215493566e-06, "loss": 0.6301, "step": 5701 }, { "epoch": 1.0255326800323654, "grad_norm": 1.4534050226211548, "learning_rate": 9.217765455990081e-06, "loss": 0.743, "step": 5702 }, { "epoch": 1.025712487638227, "grad_norm": 1.4243519306182861, "learning_rate": 9.217452639257053e-06, "loss": 0.7565, "step": 5703 }, { "epoch": 1.0258922952440888, "grad_norm": 1.492922067642212, "learning_rate": 9.217139765298725e-06, "loss": 0.7596, "step": 5704 }, { "epoch": 1.0260721028499507, "grad_norm": 1.490250587463379, "learning_rate": 9.216826834119346e-06, "loss": 0.7415, "step": 5705 }, { "epoch": 1.0262519104558123, "grad_norm": 1.5426201820373535, "learning_rate": 9.216513845723158e-06, "loss": 0.7784, "step": 5706 }, { "epoch": 1.026431718061674, "grad_norm": 1.0487860441207886, "learning_rate": 9.216200800114412e-06, "loss": 0.9782, "step": 5707 }, { "epoch": 1.0266115256675357, "grad_norm": 1.1257117986679077, "learning_rate": 9.215887697297352e-06, "loss": 0.9704, "step": 5708 }, { "epoch": 1.0267913332733976, "grad_norm": 1.5688587427139282, "learning_rate": 9.215574537276228e-06, "loss": 0.7912, "step": 5709 }, { "epoch": 1.0269711408792592, "grad_norm": 1.5905157327651978, "learning_rate": 9.215261320055288e-06, "loss": 0.772, "step": 5710 }, { "epoch": 1.027150948485121, "grad_norm": 1.5344831943511963, "learning_rate": 9.214948045638786e-06, "loss": 0.7399, "step": 5711 }, { "epoch": 1.0273307560909826, "grad_norm": 1.4851922988891602, "learning_rate": 9.214634714030966e-06, "loss": 0.7659, "step": 5712 }, { "epoch": 1.0275105636968445, "grad_norm": 1.4858677387237549, "learning_rate": 9.214321325236084e-06, "loss": 0.7298, "step": 5713 }, { "epoch": 1.0276903713027061, "grad_norm": 1.6285176277160645, "learning_rate": 9.214007879258391e-06, "loss": 0.7859, "step": 5714 }, { "epoch": 1.0278701789085678, "grad_norm": 1.5501370429992676, "learning_rate": 9.213694376102142e-06, "loss": 0.758, "step": 5715 }, { "epoch": 1.0280499865144295, "grad_norm": 1.47209894657135, "learning_rate": 9.21338081577159e-06, "loss": 0.7478, "step": 5716 }, { "epoch": 1.0282297941202914, "grad_norm": 1.5598714351654053, "learning_rate": 9.213067198270987e-06, "loss": 0.7286, "step": 5717 }, { "epoch": 1.028409601726153, "grad_norm": 1.4270637035369873, "learning_rate": 9.21275352360459e-06, "loss": 0.7573, "step": 5718 }, { "epoch": 1.0285894093320147, "grad_norm": 1.4913058280944824, "learning_rate": 9.212439791776656e-06, "loss": 0.7828, "step": 5719 }, { "epoch": 1.0287692169378764, "grad_norm": 1.614968180656433, "learning_rate": 9.212126002791442e-06, "loss": 0.7244, "step": 5720 }, { "epoch": 1.0289490245437383, "grad_norm": 1.5189528465270996, "learning_rate": 9.211812156653204e-06, "loss": 0.7673, "step": 5721 }, { "epoch": 1.0291288321496, "grad_norm": 1.1726831197738647, "learning_rate": 9.2114982533662e-06, "loss": 0.9762, "step": 5722 }, { "epoch": 1.0293086397554616, "grad_norm": 1.4915345907211304, "learning_rate": 9.211184292934693e-06, "loss": 0.7527, "step": 5723 }, { "epoch": 1.0294884473613233, "grad_norm": 1.498150110244751, "learning_rate": 9.210870275362942e-06, "loss": 0.7585, "step": 5724 }, { "epoch": 1.0296682549671852, "grad_norm": 1.0475943088531494, "learning_rate": 9.210556200655204e-06, "loss": 0.9819, "step": 5725 }, { "epoch": 1.0298480625730468, "grad_norm": 1.4680896997451782, "learning_rate": 9.210242068815745e-06, "loss": 0.7333, "step": 5726 }, { "epoch": 1.0300278701789085, "grad_norm": 1.4008170366287231, "learning_rate": 9.209927879848825e-06, "loss": 0.7349, "step": 5727 }, { "epoch": 1.0302076777847704, "grad_norm": 1.4700355529785156, "learning_rate": 9.209613633758707e-06, "loss": 0.7182, "step": 5728 }, { "epoch": 1.030387485390632, "grad_norm": 2.935225486755371, "learning_rate": 9.209299330549657e-06, "loss": 0.7024, "step": 5729 }, { "epoch": 1.0305672929964937, "grad_norm": 1.5207966566085815, "learning_rate": 9.208984970225936e-06, "loss": 0.7295, "step": 5730 }, { "epoch": 1.0307471006023554, "grad_norm": 1.570422887802124, "learning_rate": 9.208670552791814e-06, "loss": 0.761, "step": 5731 }, { "epoch": 1.0309269082082173, "grad_norm": 1.431427001953125, "learning_rate": 9.208356078251554e-06, "loss": 0.7575, "step": 5732 }, { "epoch": 1.031106715814079, "grad_norm": 1.1005911827087402, "learning_rate": 9.208041546609424e-06, "loss": 0.9523, "step": 5733 }, { "epoch": 1.0312865234199406, "grad_norm": 1.4166061878204346, "learning_rate": 9.20772695786969e-06, "loss": 0.7353, "step": 5734 }, { "epoch": 1.0314663310258023, "grad_norm": 1.1474984884262085, "learning_rate": 9.207412312036625e-06, "loss": 0.9692, "step": 5735 }, { "epoch": 1.0316461386316642, "grad_norm": 1.4199912548065186, "learning_rate": 9.207097609114495e-06, "loss": 0.7173, "step": 5736 }, { "epoch": 1.0318259462375259, "grad_norm": 1.4537255764007568, "learning_rate": 9.20678284910757e-06, "loss": 0.7467, "step": 5737 }, { "epoch": 1.0320057538433876, "grad_norm": 1.535535454750061, "learning_rate": 9.206468032020122e-06, "loss": 0.7189, "step": 5738 }, { "epoch": 1.0321855614492492, "grad_norm": 0.9620423316955566, "learning_rate": 9.206153157856421e-06, "loss": 0.9654, "step": 5739 }, { "epoch": 1.0323653690551111, "grad_norm": 1.4186580181121826, "learning_rate": 9.205838226620743e-06, "loss": 0.6879, "step": 5740 }, { "epoch": 1.0325451766609728, "grad_norm": 1.4537264108657837, "learning_rate": 9.205523238317358e-06, "loss": 0.7573, "step": 5741 }, { "epoch": 1.0327249842668345, "grad_norm": 1.411291241645813, "learning_rate": 9.205208192950539e-06, "loss": 0.7241, "step": 5742 }, { "epoch": 1.0329047918726961, "grad_norm": 1.263964056968689, "learning_rate": 9.204893090524564e-06, "loss": 0.676, "step": 5743 }, { "epoch": 1.033084599478558, "grad_norm": 1.1176085472106934, "learning_rate": 9.204577931043708e-06, "loss": 0.9597, "step": 5744 }, { "epoch": 1.0332644070844197, "grad_norm": 1.4411234855651855, "learning_rate": 9.204262714512246e-06, "loss": 0.7398, "step": 5745 }, { "epoch": 1.0334442146902814, "grad_norm": 1.4498869180679321, "learning_rate": 9.203947440934455e-06, "loss": 0.76, "step": 5746 }, { "epoch": 1.033624022296143, "grad_norm": 1.4406543970108032, "learning_rate": 9.203632110314614e-06, "loss": 0.7621, "step": 5747 }, { "epoch": 1.033803829902005, "grad_norm": 1.590957760810852, "learning_rate": 9.203316722657e-06, "loss": 0.7102, "step": 5748 }, { "epoch": 1.0339836375078666, "grad_norm": 1.4475706815719604, "learning_rate": 9.203001277965895e-06, "loss": 0.6804, "step": 5749 }, { "epoch": 1.0341634451137283, "grad_norm": 1.5138474702835083, "learning_rate": 9.202685776245577e-06, "loss": 0.779, "step": 5750 }, { "epoch": 1.03434325271959, "grad_norm": 1.4465546607971191, "learning_rate": 9.202370217500327e-06, "loss": 0.7279, "step": 5751 }, { "epoch": 1.0345230603254518, "grad_norm": 1.1250025033950806, "learning_rate": 9.202054601734429e-06, "loss": 0.9412, "step": 5752 }, { "epoch": 1.0347028679313135, "grad_norm": 1.3182092905044556, "learning_rate": 9.201738928952163e-06, "loss": 0.9, "step": 5753 }, { "epoch": 1.0348826755371752, "grad_norm": 1.455559253692627, "learning_rate": 9.201423199157811e-06, "loss": 0.7628, "step": 5754 }, { "epoch": 1.035062483143037, "grad_norm": 1.4373012781143188, "learning_rate": 9.201107412355659e-06, "loss": 0.6972, "step": 5755 }, { "epoch": 1.0352422907488987, "grad_norm": 1.4140126705169678, "learning_rate": 9.200791568549994e-06, "loss": 0.7322, "step": 5756 }, { "epoch": 1.0354220983547604, "grad_norm": 1.512750506401062, "learning_rate": 9.200475667745098e-06, "loss": 0.8096, "step": 5757 }, { "epoch": 1.035601905960622, "grad_norm": 1.3664056062698364, "learning_rate": 9.20015970994526e-06, "loss": 0.7635, "step": 5758 }, { "epoch": 1.035781713566484, "grad_norm": 1.2446553707122803, "learning_rate": 9.199843695154765e-06, "loss": 0.9735, "step": 5759 }, { "epoch": 1.0359615211723456, "grad_norm": 1.4735602140426636, "learning_rate": 9.199527623377902e-06, "loss": 0.6941, "step": 5760 }, { "epoch": 1.0361413287782073, "grad_norm": 1.5135481357574463, "learning_rate": 9.199211494618959e-06, "loss": 0.7153, "step": 5761 }, { "epoch": 1.036321136384069, "grad_norm": 1.4822216033935547, "learning_rate": 9.198895308882227e-06, "loss": 0.7602, "step": 5762 }, { "epoch": 1.0365009439899309, "grad_norm": 1.459065556526184, "learning_rate": 9.198579066171994e-06, "loss": 0.736, "step": 5763 }, { "epoch": 1.0366807515957925, "grad_norm": 1.5530343055725098, "learning_rate": 9.198262766492554e-06, "loss": 0.739, "step": 5764 }, { "epoch": 1.0368605592016542, "grad_norm": 1.404847502708435, "learning_rate": 9.197946409848196e-06, "loss": 0.7064, "step": 5765 }, { "epoch": 1.0370403668075159, "grad_norm": 1.4972202777862549, "learning_rate": 9.197629996243213e-06, "loss": 0.708, "step": 5766 }, { "epoch": 1.0372201744133778, "grad_norm": 1.4515504837036133, "learning_rate": 9.1973135256819e-06, "loss": 0.7018, "step": 5767 }, { "epoch": 1.0373999820192394, "grad_norm": 1.4523056745529175, "learning_rate": 9.196996998168547e-06, "loss": 0.6962, "step": 5768 }, { "epoch": 1.0375797896251011, "grad_norm": 1.0497629642486572, "learning_rate": 9.196680413707456e-06, "loss": 0.9955, "step": 5769 }, { "epoch": 1.0377595972309628, "grad_norm": 1.482413411140442, "learning_rate": 9.196363772302914e-06, "loss": 0.7188, "step": 5770 }, { "epoch": 1.0379394048368247, "grad_norm": 1.6417388916015625, "learning_rate": 9.196047073959224e-06, "loss": 0.6971, "step": 5771 }, { "epoch": 1.0381192124426863, "grad_norm": 1.5118227005004883, "learning_rate": 9.195730318680682e-06, "loss": 0.7244, "step": 5772 }, { "epoch": 1.038299020048548, "grad_norm": 1.5039823055267334, "learning_rate": 9.195413506471584e-06, "loss": 0.7094, "step": 5773 }, { "epoch": 1.0384788276544097, "grad_norm": 1.485705852508545, "learning_rate": 9.19509663733623e-06, "loss": 0.791, "step": 5774 }, { "epoch": 1.0386586352602716, "grad_norm": 1.0920453071594238, "learning_rate": 9.194779711278919e-06, "loss": 0.9418, "step": 5775 }, { "epoch": 1.0388384428661332, "grad_norm": 1.596853494644165, "learning_rate": 9.19446272830395e-06, "loss": 0.7739, "step": 5776 }, { "epoch": 1.039018250471995, "grad_norm": 1.5799345970153809, "learning_rate": 9.194145688415627e-06, "loss": 0.7766, "step": 5777 }, { "epoch": 1.0391980580778566, "grad_norm": 1.0859266519546509, "learning_rate": 9.19382859161825e-06, "loss": 0.9819, "step": 5778 }, { "epoch": 1.0393778656837185, "grad_norm": 1.6500812768936157, "learning_rate": 9.193511437916121e-06, "loss": 0.7725, "step": 5779 }, { "epoch": 1.0395576732895802, "grad_norm": 1.8403345346450806, "learning_rate": 9.193194227313547e-06, "loss": 0.6999, "step": 5780 }, { "epoch": 1.0397374808954418, "grad_norm": 1.471664547920227, "learning_rate": 9.192876959814828e-06, "loss": 0.7284, "step": 5781 }, { "epoch": 1.0399172885013037, "grad_norm": 1.087530493736267, "learning_rate": 9.192559635424268e-06, "loss": 0.9668, "step": 5782 }, { "epoch": 1.0400970961071654, "grad_norm": 1.4454129934310913, "learning_rate": 9.192242254146178e-06, "loss": 0.7083, "step": 5783 }, { "epoch": 1.040276903713027, "grad_norm": 1.490599274635315, "learning_rate": 9.19192481598486e-06, "loss": 0.8163, "step": 5784 }, { "epoch": 1.0404567113188887, "grad_norm": 1.4224215745925903, "learning_rate": 9.191607320944622e-06, "loss": 0.7913, "step": 5785 }, { "epoch": 1.0406365189247506, "grad_norm": 1.4893057346343994, "learning_rate": 9.191289769029774e-06, "loss": 0.7543, "step": 5786 }, { "epoch": 1.0408163265306123, "grad_norm": 1.4922993183135986, "learning_rate": 9.190972160244623e-06, "loss": 0.734, "step": 5787 }, { "epoch": 1.040996134136474, "grad_norm": 1.4666166305541992, "learning_rate": 9.190654494593479e-06, "loss": 0.7447, "step": 5788 }, { "epoch": 1.0411759417423356, "grad_norm": 1.4951754808425903, "learning_rate": 9.190336772080651e-06, "loss": 0.8123, "step": 5789 }, { "epoch": 1.0413557493481975, "grad_norm": 1.6027759313583374, "learning_rate": 9.190018992710452e-06, "loss": 0.7766, "step": 5790 }, { "epoch": 1.0415355569540592, "grad_norm": 1.4400606155395508, "learning_rate": 9.189701156487195e-06, "loss": 0.7512, "step": 5791 }, { "epoch": 1.0417153645599209, "grad_norm": 1.4334014654159546, "learning_rate": 9.189383263415189e-06, "loss": 0.7237, "step": 5792 }, { "epoch": 1.0418951721657825, "grad_norm": 1.4461945295333862, "learning_rate": 9.189065313498748e-06, "loss": 0.7941, "step": 5793 }, { "epoch": 1.0420749797716444, "grad_norm": 1.4452329874038696, "learning_rate": 9.18874730674219e-06, "loss": 0.7598, "step": 5794 }, { "epoch": 1.042254787377506, "grad_norm": 1.183083176612854, "learning_rate": 9.188429243149824e-06, "loss": 0.9487, "step": 5795 }, { "epoch": 1.0424345949833678, "grad_norm": 1.1842206716537476, "learning_rate": 9.188111122725971e-06, "loss": 0.9275, "step": 5796 }, { "epoch": 1.0426144025892294, "grad_norm": 1.5178585052490234, "learning_rate": 9.187792945474945e-06, "loss": 0.8391, "step": 5797 }, { "epoch": 1.0427942101950913, "grad_norm": 1.5750097036361694, "learning_rate": 9.187474711401065e-06, "loss": 0.8102, "step": 5798 }, { "epoch": 1.042974017800953, "grad_norm": 1.4557713270187378, "learning_rate": 9.187156420508646e-06, "loss": 0.7465, "step": 5799 }, { "epoch": 1.0431538254068147, "grad_norm": 1.1771732568740845, "learning_rate": 9.18683807280201e-06, "loss": 0.96, "step": 5800 }, { "epoch": 1.0433336330126763, "grad_norm": 1.517438292503357, "learning_rate": 9.186519668285474e-06, "loss": 0.7575, "step": 5801 }, { "epoch": 1.0435134406185382, "grad_norm": 1.123138666152954, "learning_rate": 9.186201206963359e-06, "loss": 0.9418, "step": 5802 }, { "epoch": 1.0436932482244, "grad_norm": 1.5337401628494263, "learning_rate": 9.185882688839987e-06, "loss": 0.7629, "step": 5803 }, { "epoch": 1.0438730558302616, "grad_norm": 1.4754425287246704, "learning_rate": 9.18556411391968e-06, "loss": 0.7309, "step": 5804 }, { "epoch": 1.0440528634361232, "grad_norm": 1.6404261589050293, "learning_rate": 9.18524548220676e-06, "loss": 0.7376, "step": 5805 }, { "epoch": 1.0442326710419851, "grad_norm": 1.3915715217590332, "learning_rate": 9.184926793705549e-06, "loss": 0.7521, "step": 5806 }, { "epoch": 1.0444124786478468, "grad_norm": 1.436976432800293, "learning_rate": 9.184608048420374e-06, "loss": 0.7467, "step": 5807 }, { "epoch": 1.0445922862537085, "grad_norm": 1.5713062286376953, "learning_rate": 9.184289246355558e-06, "loss": 0.7113, "step": 5808 }, { "epoch": 1.0447720938595704, "grad_norm": 1.1107804775238037, "learning_rate": 9.183970387515427e-06, "loss": 0.9863, "step": 5809 }, { "epoch": 1.044951901465432, "grad_norm": 1.4288352727890015, "learning_rate": 9.183651471904309e-06, "loss": 0.7837, "step": 5810 }, { "epoch": 1.0451317090712937, "grad_norm": 1.5045766830444336, "learning_rate": 9.183332499526528e-06, "loss": 0.729, "step": 5811 }, { "epoch": 1.0453115166771554, "grad_norm": 1.121885895729065, "learning_rate": 9.183013470386416e-06, "loss": 0.9344, "step": 5812 }, { "epoch": 1.0454913242830173, "grad_norm": 1.4817808866500854, "learning_rate": 9.1826943844883e-06, "loss": 0.7452, "step": 5813 }, { "epoch": 1.045671131888879, "grad_norm": 1.467350959777832, "learning_rate": 9.182375241836508e-06, "loss": 0.7043, "step": 5814 }, { "epoch": 1.0458509394947406, "grad_norm": 1.498822569847107, "learning_rate": 9.182056042435373e-06, "loss": 0.8169, "step": 5815 }, { "epoch": 1.0460307471006023, "grad_norm": 1.4524205923080444, "learning_rate": 9.181736786289224e-06, "loss": 0.7215, "step": 5816 }, { "epoch": 1.0462105547064642, "grad_norm": 1.482033133506775, "learning_rate": 9.181417473402394e-06, "loss": 0.7596, "step": 5817 }, { "epoch": 1.0463903623123259, "grad_norm": 1.197745442390442, "learning_rate": 9.181098103779216e-06, "loss": 0.9661, "step": 5818 }, { "epoch": 1.0465701699181875, "grad_norm": 1.6183539628982544, "learning_rate": 9.180778677424022e-06, "loss": 0.7913, "step": 5819 }, { "epoch": 1.0467499775240492, "grad_norm": 1.516610860824585, "learning_rate": 9.180459194341146e-06, "loss": 0.7706, "step": 5820 }, { "epoch": 1.046929785129911, "grad_norm": 1.0866565704345703, "learning_rate": 9.180139654534927e-06, "loss": 0.9315, "step": 5821 }, { "epoch": 1.0471095927357728, "grad_norm": 1.0382379293441772, "learning_rate": 9.179820058009696e-06, "loss": 0.9913, "step": 5822 }, { "epoch": 1.0472894003416344, "grad_norm": 1.4237459897994995, "learning_rate": 9.179500404769792e-06, "loss": 0.7384, "step": 5823 }, { "epoch": 1.047469207947496, "grad_norm": 1.5087864398956299, "learning_rate": 9.179180694819552e-06, "loss": 0.7393, "step": 5824 }, { "epoch": 1.047649015553358, "grad_norm": 1.5020853281021118, "learning_rate": 9.178860928163313e-06, "loss": 0.7745, "step": 5825 }, { "epoch": 1.0478288231592197, "grad_norm": 1.638229489326477, "learning_rate": 9.178541104805413e-06, "loss": 0.7346, "step": 5826 }, { "epoch": 1.0480086307650813, "grad_norm": 1.5232363939285278, "learning_rate": 9.178221224750196e-06, "loss": 0.7573, "step": 5827 }, { "epoch": 1.048188438370943, "grad_norm": 1.5579181909561157, "learning_rate": 9.177901288001998e-06, "loss": 0.8063, "step": 5828 }, { "epoch": 1.048368245976805, "grad_norm": 1.054707407951355, "learning_rate": 9.177581294565162e-06, "loss": 0.924, "step": 5829 }, { "epoch": 1.0485480535826666, "grad_norm": 1.4746989011764526, "learning_rate": 9.177261244444028e-06, "loss": 0.7419, "step": 5830 }, { "epoch": 1.0487278611885282, "grad_norm": 1.4234050512313843, "learning_rate": 9.176941137642941e-06, "loss": 0.6496, "step": 5831 }, { "epoch": 1.04890766879439, "grad_norm": 1.4758951663970947, "learning_rate": 9.176620974166244e-06, "loss": 0.7304, "step": 5832 }, { "epoch": 1.0490874764002518, "grad_norm": 1.4928462505340576, "learning_rate": 9.17630075401828e-06, "loss": 0.7623, "step": 5833 }, { "epoch": 1.0492672840061135, "grad_norm": 1.5490541458129883, "learning_rate": 9.175980477203394e-06, "loss": 0.7788, "step": 5834 }, { "epoch": 1.0494470916119751, "grad_norm": 1.509677529335022, "learning_rate": 9.175660143725933e-06, "loss": 0.7212, "step": 5835 }, { "epoch": 1.049626899217837, "grad_norm": 1.555359125137329, "learning_rate": 9.175339753590243e-06, "loss": 0.7774, "step": 5836 }, { "epoch": 1.0498067068236987, "grad_norm": 1.8236197233200073, "learning_rate": 9.17501930680067e-06, "loss": 0.7607, "step": 5837 }, { "epoch": 1.0499865144295604, "grad_norm": 1.4644746780395508, "learning_rate": 9.174698803361567e-06, "loss": 0.6612, "step": 5838 }, { "epoch": 1.050166322035422, "grad_norm": 1.6238247156143188, "learning_rate": 9.174378243277274e-06, "loss": 0.7266, "step": 5839 }, { "epoch": 1.050346129641284, "grad_norm": 1.4600545167922974, "learning_rate": 9.174057626552148e-06, "loss": 0.7277, "step": 5840 }, { "epoch": 1.0505259372471456, "grad_norm": 1.6480720043182373, "learning_rate": 9.173736953190538e-06, "loss": 0.768, "step": 5841 }, { "epoch": 1.0507057448530073, "grad_norm": 1.53977632522583, "learning_rate": 9.173416223196791e-06, "loss": 0.7404, "step": 5842 }, { "epoch": 1.050885552458869, "grad_norm": 1.314099907875061, "learning_rate": 9.173095436575265e-06, "loss": 0.99, "step": 5843 }, { "epoch": 1.0510653600647308, "grad_norm": 1.4588054418563843, "learning_rate": 9.172774593330308e-06, "loss": 0.7321, "step": 5844 }, { "epoch": 1.0512451676705925, "grad_norm": 1.184618353843689, "learning_rate": 9.172453693466276e-06, "loss": 0.9189, "step": 5845 }, { "epoch": 1.0514249752764542, "grad_norm": 1.4794389009475708, "learning_rate": 9.17213273698752e-06, "loss": 0.7148, "step": 5846 }, { "epoch": 1.0516047828823158, "grad_norm": 1.4405484199523926, "learning_rate": 9.1718117238984e-06, "loss": 0.7717, "step": 5847 }, { "epoch": 1.0517845904881777, "grad_norm": 1.5017589330673218, "learning_rate": 9.171490654203267e-06, "loss": 0.7492, "step": 5848 }, { "epoch": 1.0519643980940394, "grad_norm": 1.5035394430160522, "learning_rate": 9.17116952790648e-06, "loss": 0.7413, "step": 5849 }, { "epoch": 1.052144205699901, "grad_norm": 1.417004108428955, "learning_rate": 9.170848345012396e-06, "loss": 0.6764, "step": 5850 }, { "epoch": 1.0523240133057628, "grad_norm": 1.4884721040725708, "learning_rate": 9.170527105525372e-06, "loss": 0.7922, "step": 5851 }, { "epoch": 1.0525038209116246, "grad_norm": 1.4671664237976074, "learning_rate": 9.170205809449768e-06, "loss": 0.7378, "step": 5852 }, { "epoch": 1.0526836285174863, "grad_norm": 1.520305871963501, "learning_rate": 9.169884456789943e-06, "loss": 0.8009, "step": 5853 }, { "epoch": 1.052863436123348, "grad_norm": 1.5167206525802612, "learning_rate": 9.169563047550258e-06, "loss": 0.7253, "step": 5854 }, { "epoch": 1.0530432437292097, "grad_norm": 1.4780720472335815, "learning_rate": 9.169241581735073e-06, "loss": 0.744, "step": 5855 }, { "epoch": 1.0532230513350715, "grad_norm": 1.7589105367660522, "learning_rate": 9.168920059348748e-06, "loss": 0.7362, "step": 5856 }, { "epoch": 1.0534028589409332, "grad_norm": 1.5532381534576416, "learning_rate": 9.168598480395653e-06, "loss": 0.7475, "step": 5857 }, { "epoch": 1.0535826665467949, "grad_norm": 1.4822744131088257, "learning_rate": 9.168276844880141e-06, "loss": 0.8634, "step": 5858 }, { "epoch": 1.0537624741526566, "grad_norm": 1.2298158407211304, "learning_rate": 9.167955152806585e-06, "loss": 0.992, "step": 5859 }, { "epoch": 1.0539422817585185, "grad_norm": 1.5062806606292725, "learning_rate": 9.167633404179345e-06, "loss": 0.7837, "step": 5860 }, { "epoch": 1.0541220893643801, "grad_norm": 1.4753907918930054, "learning_rate": 9.16731159900279e-06, "loss": 0.7099, "step": 5861 }, { "epoch": 1.0543018969702418, "grad_norm": 1.5137180089950562, "learning_rate": 9.166989737281283e-06, "loss": 0.7371, "step": 5862 }, { "epoch": 1.0544817045761035, "grad_norm": 1.4872081279754639, "learning_rate": 9.166667819019194e-06, "loss": 0.7097, "step": 5863 }, { "epoch": 1.0546615121819654, "grad_norm": 1.3923077583312988, "learning_rate": 9.16634584422089e-06, "loss": 0.761, "step": 5864 }, { "epoch": 1.054841319787827, "grad_norm": 1.1270173788070679, "learning_rate": 9.16602381289074e-06, "loss": 0.9803, "step": 5865 }, { "epoch": 1.0550211273936887, "grad_norm": 1.5271704196929932, "learning_rate": 9.16570172503311e-06, "loss": 0.7182, "step": 5866 }, { "epoch": 1.0552009349995506, "grad_norm": 1.3767837285995483, "learning_rate": 9.165379580652376e-06, "loss": 0.724, "step": 5867 }, { "epoch": 1.0553807426054123, "grad_norm": 1.5089223384857178, "learning_rate": 9.165057379752906e-06, "loss": 0.7596, "step": 5868 }, { "epoch": 1.055560550211274, "grad_norm": 1.4722814559936523, "learning_rate": 9.164735122339074e-06, "loss": 0.6972, "step": 5869 }, { "epoch": 1.0557403578171356, "grad_norm": 1.5834465026855469, "learning_rate": 9.16441280841525e-06, "loss": 0.7265, "step": 5870 }, { "epoch": 1.0559201654229975, "grad_norm": 1.5185095071792603, "learning_rate": 9.164090437985809e-06, "loss": 0.6787, "step": 5871 }, { "epoch": 1.0560999730288592, "grad_norm": 1.1260857582092285, "learning_rate": 9.163768011055123e-06, "loss": 0.952, "step": 5872 }, { "epoch": 1.0562797806347208, "grad_norm": 1.4637280702590942, "learning_rate": 9.16344552762757e-06, "loss": 0.7565, "step": 5873 }, { "epoch": 1.0564595882405825, "grad_norm": 1.4316147565841675, "learning_rate": 9.163122987707524e-06, "loss": 0.746, "step": 5874 }, { "epoch": 1.0566393958464444, "grad_norm": 1.5454192161560059, "learning_rate": 9.162800391299362e-06, "loss": 0.7884, "step": 5875 }, { "epoch": 1.056819203452306, "grad_norm": 1.4304471015930176, "learning_rate": 9.16247773840746e-06, "loss": 0.7536, "step": 5876 }, { "epoch": 1.0569990110581677, "grad_norm": 1.5652172565460205, "learning_rate": 9.162155029036197e-06, "loss": 0.7544, "step": 5877 }, { "epoch": 1.0571788186640294, "grad_norm": 1.5000464916229248, "learning_rate": 9.161832263189952e-06, "loss": 0.7343, "step": 5878 }, { "epoch": 1.0573586262698913, "grad_norm": 1.5267800092697144, "learning_rate": 9.161509440873104e-06, "loss": 0.794, "step": 5879 }, { "epoch": 1.057538433875753, "grad_norm": 1.5087823867797852, "learning_rate": 9.161186562090032e-06, "loss": 0.7322, "step": 5880 }, { "epoch": 1.0577182414816146, "grad_norm": 1.5806483030319214, "learning_rate": 9.16086362684512e-06, "loss": 0.8245, "step": 5881 }, { "epoch": 1.0578980490874763, "grad_norm": 1.4639692306518555, "learning_rate": 9.160540635142749e-06, "loss": 0.6889, "step": 5882 }, { "epoch": 1.0580778566933382, "grad_norm": 1.468170166015625, "learning_rate": 9.160217586987299e-06, "loss": 0.7445, "step": 5883 }, { "epoch": 1.0582576642991999, "grad_norm": 1.5623197555541992, "learning_rate": 9.159894482383156e-06, "loss": 0.681, "step": 5884 }, { "epoch": 1.0584374719050615, "grad_norm": 1.528243064880371, "learning_rate": 9.159571321334703e-06, "loss": 0.8369, "step": 5885 }, { "epoch": 1.0586172795109232, "grad_norm": 1.440222144126892, "learning_rate": 9.159248103846324e-06, "loss": 0.7585, "step": 5886 }, { "epoch": 1.058797087116785, "grad_norm": 1.5227646827697754, "learning_rate": 9.158924829922406e-06, "loss": 0.7083, "step": 5887 }, { "epoch": 1.0589768947226468, "grad_norm": 1.6094263792037964, "learning_rate": 9.158601499567337e-06, "loss": 0.7448, "step": 5888 }, { "epoch": 1.0591567023285084, "grad_norm": 1.4693366289138794, "learning_rate": 9.158278112785501e-06, "loss": 0.7196, "step": 5889 }, { "epoch": 1.0593365099343701, "grad_norm": 1.448195457458496, "learning_rate": 9.157954669581288e-06, "loss": 0.7816, "step": 5890 }, { "epoch": 1.059516317540232, "grad_norm": 1.6117517948150635, "learning_rate": 9.157631169959085e-06, "loss": 0.7649, "step": 5891 }, { "epoch": 1.0596961251460937, "grad_norm": 1.4620258808135986, "learning_rate": 9.157307613923284e-06, "loss": 0.6944, "step": 5892 }, { "epoch": 1.0598759327519554, "grad_norm": 1.1729557514190674, "learning_rate": 9.15698400147827e-06, "loss": 1.0009, "step": 5893 }, { "epoch": 1.060055740357817, "grad_norm": 1.5222818851470947, "learning_rate": 9.156660332628441e-06, "loss": 0.7484, "step": 5894 }, { "epoch": 1.060235547963679, "grad_norm": 1.5155284404754639, "learning_rate": 9.156336607378184e-06, "loss": 0.7473, "step": 5895 }, { "epoch": 1.0604153555695406, "grad_norm": 1.465526819229126, "learning_rate": 9.156012825731894e-06, "loss": 0.7287, "step": 5896 }, { "epoch": 1.0605951631754023, "grad_norm": 1.4396281242370605, "learning_rate": 9.155688987693962e-06, "loss": 0.694, "step": 5897 }, { "epoch": 1.0607749707812641, "grad_norm": 1.1477851867675781, "learning_rate": 9.155365093268785e-06, "loss": 0.9796, "step": 5898 }, { "epoch": 1.0609547783871258, "grad_norm": 1.4855326414108276, "learning_rate": 9.155041142460754e-06, "loss": 0.7501, "step": 5899 }, { "epoch": 1.0611345859929875, "grad_norm": 1.513767123222351, "learning_rate": 9.154717135274267e-06, "loss": 0.6965, "step": 5900 }, { "epoch": 1.0613143935988492, "grad_norm": 1.5023504495620728, "learning_rate": 9.154393071713722e-06, "loss": 0.7631, "step": 5901 }, { "epoch": 1.061494201204711, "grad_norm": 1.4784727096557617, "learning_rate": 9.154068951783513e-06, "loss": 0.7935, "step": 5902 }, { "epoch": 1.0616740088105727, "grad_norm": 1.5060129165649414, "learning_rate": 9.153744775488039e-06, "loss": 0.8247, "step": 5903 }, { "epoch": 1.0618538164164344, "grad_norm": 1.4150876998901367, "learning_rate": 9.153420542831699e-06, "loss": 0.7592, "step": 5904 }, { "epoch": 1.062033624022296, "grad_norm": 1.4439597129821777, "learning_rate": 9.15309625381889e-06, "loss": 0.7335, "step": 5905 }, { "epoch": 1.062213431628158, "grad_norm": 1.7466964721679688, "learning_rate": 9.152771908454017e-06, "loss": 0.7348, "step": 5906 }, { "epoch": 1.0623932392340196, "grad_norm": 1.5084500312805176, "learning_rate": 9.152447506741477e-06, "loss": 0.7577, "step": 5907 }, { "epoch": 1.0625730468398813, "grad_norm": 1.5058542490005493, "learning_rate": 9.152123048685673e-06, "loss": 0.7492, "step": 5908 }, { "epoch": 1.062752854445743, "grad_norm": 1.468445897102356, "learning_rate": 9.151798534291006e-06, "loss": 0.7798, "step": 5909 }, { "epoch": 1.0629326620516049, "grad_norm": 1.4717167615890503, "learning_rate": 9.151473963561884e-06, "loss": 0.7859, "step": 5910 }, { "epoch": 1.0631124696574665, "grad_norm": 1.4792003631591797, "learning_rate": 9.151149336502705e-06, "loss": 0.6815, "step": 5911 }, { "epoch": 1.0632922772633282, "grad_norm": 1.1624536514282227, "learning_rate": 9.150824653117876e-06, "loss": 0.9414, "step": 5912 }, { "epoch": 1.0634720848691899, "grad_norm": 1.5043059587478638, "learning_rate": 9.150499913411803e-06, "loss": 0.7564, "step": 5913 }, { "epoch": 1.0636518924750518, "grad_norm": 1.3835769891738892, "learning_rate": 9.150175117388894e-06, "loss": 0.7236, "step": 5914 }, { "epoch": 1.0638317000809134, "grad_norm": 1.4819492101669312, "learning_rate": 9.149850265053553e-06, "loss": 0.7829, "step": 5915 }, { "epoch": 1.064011507686775, "grad_norm": 1.5898115634918213, "learning_rate": 9.14952535641019e-06, "loss": 0.8183, "step": 5916 }, { "epoch": 1.0641913152926368, "grad_norm": 1.6710116863250732, "learning_rate": 9.14920039146321e-06, "loss": 0.775, "step": 5917 }, { "epoch": 1.0643711228984987, "grad_norm": 1.5452758073806763, "learning_rate": 9.148875370217028e-06, "loss": 0.7965, "step": 5918 }, { "epoch": 1.0645509305043603, "grad_norm": 1.056432843208313, "learning_rate": 9.14855029267605e-06, "loss": 0.9961, "step": 5919 }, { "epoch": 1.064730738110222, "grad_norm": 1.5339332818984985, "learning_rate": 9.148225158844688e-06, "loss": 0.7241, "step": 5920 }, { "epoch": 1.0649105457160837, "grad_norm": 1.5180729627609253, "learning_rate": 9.147899968727355e-06, "loss": 0.8, "step": 5921 }, { "epoch": 1.0650903533219456, "grad_norm": 0.9325723648071289, "learning_rate": 9.14757472232846e-06, "loss": 0.9515, "step": 5922 }, { "epoch": 1.0652701609278072, "grad_norm": 1.4627742767333984, "learning_rate": 9.14724941965242e-06, "loss": 0.6994, "step": 5923 }, { "epoch": 1.065449968533669, "grad_norm": 1.5652579069137573, "learning_rate": 9.146924060703646e-06, "loss": 0.8167, "step": 5924 }, { "epoch": 1.0656297761395308, "grad_norm": 1.554196834564209, "learning_rate": 9.146598645486554e-06, "loss": 0.7948, "step": 5925 }, { "epoch": 1.0658095837453925, "grad_norm": 1.4125311374664307, "learning_rate": 9.14627317400556e-06, "loss": 0.727, "step": 5926 }, { "epoch": 1.0659893913512541, "grad_norm": 1.4440276622772217, "learning_rate": 9.145947646265078e-06, "loss": 0.7074, "step": 5927 }, { "epoch": 1.0661691989571158, "grad_norm": 1.502332329750061, "learning_rate": 9.145622062269528e-06, "loss": 0.7937, "step": 5928 }, { "epoch": 1.0663490065629777, "grad_norm": 1.1046150922775269, "learning_rate": 9.145296422023325e-06, "loss": 0.9635, "step": 5929 }, { "epoch": 1.0665288141688394, "grad_norm": 1.4516407251358032, "learning_rate": 9.144970725530888e-06, "loss": 0.6546, "step": 5930 }, { "epoch": 1.066708621774701, "grad_norm": 1.4677484035491943, "learning_rate": 9.14464497279664e-06, "loss": 0.7048, "step": 5931 }, { "epoch": 1.0668884293805627, "grad_norm": 1.5286155939102173, "learning_rate": 9.144319163824995e-06, "loss": 0.738, "step": 5932 }, { "epoch": 1.0670682369864246, "grad_norm": 1.4574401378631592, "learning_rate": 9.14399329862038e-06, "loss": 0.7327, "step": 5933 }, { "epoch": 1.0672480445922863, "grad_norm": 1.4453338384628296, "learning_rate": 9.14366737718721e-06, "loss": 0.7081, "step": 5934 }, { "epoch": 1.067427852198148, "grad_norm": 1.4991165399551392, "learning_rate": 9.143341399529913e-06, "loss": 0.7661, "step": 5935 }, { "epoch": 1.0676076598040096, "grad_norm": 1.0767134428024292, "learning_rate": 9.143015365652908e-06, "loss": 0.9543, "step": 5936 }, { "epoch": 1.0677874674098715, "grad_norm": 1.5243792533874512, "learning_rate": 9.142689275560624e-06, "loss": 0.7244, "step": 5937 }, { "epoch": 1.0679672750157332, "grad_norm": 1.0154187679290771, "learning_rate": 9.142363129257478e-06, "loss": 0.9956, "step": 5938 }, { "epoch": 1.0681470826215949, "grad_norm": 1.4477367401123047, "learning_rate": 9.142036926747904e-06, "loss": 0.8048, "step": 5939 }, { "epoch": 1.0683268902274565, "grad_norm": 1.4756687879562378, "learning_rate": 9.141710668036322e-06, "loss": 0.7812, "step": 5940 }, { "epoch": 1.0685066978333184, "grad_norm": 2.015554904937744, "learning_rate": 9.141384353127158e-06, "loss": 0.7158, "step": 5941 }, { "epoch": 1.06868650543918, "grad_norm": 1.475105881690979, "learning_rate": 9.141057982024846e-06, "loss": 0.7347, "step": 5942 }, { "epoch": 1.0688663130450418, "grad_norm": 1.0748156309127808, "learning_rate": 9.140731554733809e-06, "loss": 0.9441, "step": 5943 }, { "epoch": 1.0690461206509034, "grad_norm": 1.4569710493087769, "learning_rate": 9.14040507125848e-06, "loss": 0.7615, "step": 5944 }, { "epoch": 1.0692259282567653, "grad_norm": 1.5342882871627808, "learning_rate": 9.140078531603284e-06, "loss": 0.6522, "step": 5945 }, { "epoch": 1.069405735862627, "grad_norm": 1.5712424516677856, "learning_rate": 9.139751935772657e-06, "loss": 0.7593, "step": 5946 }, { "epoch": 1.0695855434684887, "grad_norm": 1.3858287334442139, "learning_rate": 9.139425283771027e-06, "loss": 0.7328, "step": 5947 }, { "epoch": 1.0697653510743503, "grad_norm": 1.3413646221160889, "learning_rate": 9.139098575602828e-06, "loss": 0.6904, "step": 5948 }, { "epoch": 1.0699451586802122, "grad_norm": 1.4675095081329346, "learning_rate": 9.138771811272492e-06, "loss": 0.7534, "step": 5949 }, { "epoch": 1.070124966286074, "grad_norm": 1.4909712076187134, "learning_rate": 9.138444990784455e-06, "loss": 0.7231, "step": 5950 }, { "epoch": 1.0703047738919356, "grad_norm": 1.4917110204696655, "learning_rate": 9.138118114143147e-06, "loss": 0.7523, "step": 5951 }, { "epoch": 1.0704845814977975, "grad_norm": 1.3569085597991943, "learning_rate": 9.137791181353006e-06, "loss": 0.6738, "step": 5952 }, { "epoch": 1.0706643891036591, "grad_norm": 1.5041353702545166, "learning_rate": 9.13746419241847e-06, "loss": 0.8122, "step": 5953 }, { "epoch": 1.0708441967095208, "grad_norm": 1.4433435201644897, "learning_rate": 9.137137147343974e-06, "loss": 0.765, "step": 5954 }, { "epoch": 1.0710240043153825, "grad_norm": 1.2601673603057861, "learning_rate": 9.136810046133952e-06, "loss": 0.9337, "step": 5955 }, { "epoch": 1.0712038119212444, "grad_norm": 1.5789577960968018, "learning_rate": 9.136482888792848e-06, "loss": 0.7602, "step": 5956 }, { "epoch": 1.071383619527106, "grad_norm": 1.4567450284957886, "learning_rate": 9.1361556753251e-06, "loss": 0.7332, "step": 5957 }, { "epoch": 1.0715634271329677, "grad_norm": 1.5072717666625977, "learning_rate": 9.135828405735146e-06, "loss": 0.7497, "step": 5958 }, { "epoch": 1.0717432347388294, "grad_norm": 1.4137190580368042, "learning_rate": 9.135501080027426e-06, "loss": 0.8007, "step": 5959 }, { "epoch": 1.0719230423446913, "grad_norm": 1.541838526725769, "learning_rate": 9.135173698206383e-06, "loss": 0.7208, "step": 5960 }, { "epoch": 1.072102849950553, "grad_norm": 1.0260487794876099, "learning_rate": 9.13484626027646e-06, "loss": 0.9443, "step": 5961 }, { "epoch": 1.0722826575564146, "grad_norm": 1.564965009689331, "learning_rate": 9.134518766242097e-06, "loss": 0.782, "step": 5962 }, { "epoch": 1.0724624651622763, "grad_norm": 1.4810901880264282, "learning_rate": 9.134191216107741e-06, "loss": 0.7523, "step": 5963 }, { "epoch": 1.0726422727681382, "grad_norm": 1.5556856393814087, "learning_rate": 9.133863609877835e-06, "loss": 0.7465, "step": 5964 }, { "epoch": 1.0728220803739998, "grad_norm": 1.786451816558838, "learning_rate": 9.133535947556822e-06, "loss": 0.7977, "step": 5965 }, { "epoch": 1.0730018879798615, "grad_norm": 1.496170163154602, "learning_rate": 9.133208229149153e-06, "loss": 0.7622, "step": 5966 }, { "epoch": 1.0731816955857232, "grad_norm": 2.060737133026123, "learning_rate": 9.132880454659268e-06, "loss": 0.6858, "step": 5967 }, { "epoch": 1.073361503191585, "grad_norm": 1.5258930921554565, "learning_rate": 9.13255262409162e-06, "loss": 0.7309, "step": 5968 }, { "epoch": 1.0735413107974467, "grad_norm": 1.6035256385803223, "learning_rate": 9.132224737450656e-06, "loss": 0.7635, "step": 5969 }, { "epoch": 1.0737211184033084, "grad_norm": 1.4676599502563477, "learning_rate": 9.131896794740825e-06, "loss": 0.7793, "step": 5970 }, { "epoch": 1.07390092600917, "grad_norm": 1.58183753490448, "learning_rate": 9.131568795966574e-06, "loss": 0.7103, "step": 5971 }, { "epoch": 1.074080733615032, "grad_norm": 1.4051662683486938, "learning_rate": 9.131240741132356e-06, "loss": 0.7184, "step": 5972 }, { "epoch": 1.0742605412208936, "grad_norm": 1.4704476594924927, "learning_rate": 9.130912630242624e-06, "loss": 0.7768, "step": 5973 }, { "epoch": 1.0744403488267553, "grad_norm": 1.4773674011230469, "learning_rate": 9.130584463301824e-06, "loss": 0.7611, "step": 5974 }, { "epoch": 1.074620156432617, "grad_norm": 1.470046877861023, "learning_rate": 9.130256240314415e-06, "loss": 0.7948, "step": 5975 }, { "epoch": 1.0747999640384789, "grad_norm": 1.537715196609497, "learning_rate": 9.129927961284848e-06, "loss": 0.7225, "step": 5976 }, { "epoch": 1.0749797716443406, "grad_norm": 1.5784796476364136, "learning_rate": 9.129599626217579e-06, "loss": 0.7624, "step": 5977 }, { "epoch": 1.0751595792502022, "grad_norm": 1.52290678024292, "learning_rate": 9.12927123511706e-06, "loss": 0.7217, "step": 5978 }, { "epoch": 1.0753393868560641, "grad_norm": 1.395750641822815, "learning_rate": 9.128942787987749e-06, "loss": 0.7127, "step": 5979 }, { "epoch": 1.0755191944619258, "grad_norm": 1.4902453422546387, "learning_rate": 9.128614284834103e-06, "loss": 0.7265, "step": 5980 }, { "epoch": 1.0756990020677875, "grad_norm": 1.5121023654937744, "learning_rate": 9.128285725660577e-06, "loss": 0.756, "step": 5981 }, { "epoch": 1.0758788096736491, "grad_norm": 1.463153600692749, "learning_rate": 9.127957110471633e-06, "loss": 0.65, "step": 5982 }, { "epoch": 1.076058617279511, "grad_norm": 1.4457155466079712, "learning_rate": 9.127628439271726e-06, "loss": 0.7611, "step": 5983 }, { "epoch": 1.0762384248853727, "grad_norm": 1.488419532775879, "learning_rate": 9.127299712065315e-06, "loss": 0.7656, "step": 5984 }, { "epoch": 1.0764182324912344, "grad_norm": 1.1749175786972046, "learning_rate": 9.126970928856864e-06, "loss": 0.9608, "step": 5985 }, { "epoch": 1.076598040097096, "grad_norm": 1.5943419933319092, "learning_rate": 9.126642089650833e-06, "loss": 0.7752, "step": 5986 }, { "epoch": 1.076777847702958, "grad_norm": 1.6338683366775513, "learning_rate": 9.126313194451683e-06, "loss": 0.7786, "step": 5987 }, { "epoch": 1.0769576553088196, "grad_norm": 1.463055968284607, "learning_rate": 9.12598424326388e-06, "loss": 0.731, "step": 5988 }, { "epoch": 1.0771374629146813, "grad_norm": 1.6856616735458374, "learning_rate": 9.125655236091882e-06, "loss": 0.7267, "step": 5989 }, { "epoch": 1.077317270520543, "grad_norm": 1.5541523694992065, "learning_rate": 9.125326172940155e-06, "loss": 0.7604, "step": 5990 }, { "epoch": 1.0774970781264048, "grad_norm": 1.7680474519729614, "learning_rate": 9.124997053813165e-06, "loss": 0.8139, "step": 5991 }, { "epoch": 1.0776768857322665, "grad_norm": 1.4840432405471802, "learning_rate": 9.12466787871538e-06, "loss": 0.6778, "step": 5992 }, { "epoch": 1.0778566933381282, "grad_norm": 1.461702585220337, "learning_rate": 9.124338647651263e-06, "loss": 0.7384, "step": 5993 }, { "epoch": 1.0780365009439898, "grad_norm": 1.4979770183563232, "learning_rate": 9.124009360625281e-06, "loss": 0.8317, "step": 5994 }, { "epoch": 1.0782163085498517, "grad_norm": 1.0775173902511597, "learning_rate": 9.123680017641905e-06, "loss": 0.9586, "step": 5995 }, { "epoch": 1.0783961161557134, "grad_norm": 1.5222331285476685, "learning_rate": 9.1233506187056e-06, "loss": 0.7073, "step": 5996 }, { "epoch": 1.078575923761575, "grad_norm": 1.4214974641799927, "learning_rate": 9.123021163820839e-06, "loss": 0.7232, "step": 5997 }, { "epoch": 1.0787557313674367, "grad_norm": 1.5190225839614868, "learning_rate": 9.12269165299209e-06, "loss": 0.7585, "step": 5998 }, { "epoch": 1.0789355389732986, "grad_norm": 1.1851545572280884, "learning_rate": 9.122362086223826e-06, "loss": 0.9345, "step": 5999 }, { "epoch": 1.0791153465791603, "grad_norm": 1.604961633682251, "learning_rate": 9.122032463520516e-06, "loss": 0.7304, "step": 6000 }, { "epoch": 1.0791153465791603, "eval_loss": 0.81549072265625, "eval_runtime": 150.8743, "eval_samples_per_second": 95.324, "eval_steps_per_second": 1.491, "step": 6000 }, { "epoch": 1.079295154185022, "grad_norm": 1.6629420518875122, "learning_rate": 9.121702784886634e-06, "loss": 0.7539, "step": 6001 }, { "epoch": 1.0794749617908836, "grad_norm": 2.212460517883301, "learning_rate": 9.121373050326656e-06, "loss": 0.7291, "step": 6002 }, { "epoch": 1.0796547693967455, "grad_norm": 2.310840368270874, "learning_rate": 9.121043259845052e-06, "loss": 0.7173, "step": 6003 }, { "epoch": 1.0798345770026072, "grad_norm": 1.094534158706665, "learning_rate": 9.120713413446298e-06, "loss": 0.9774, "step": 6004 }, { "epoch": 1.0800143846084689, "grad_norm": 1.4557993412017822, "learning_rate": 9.12038351113487e-06, "loss": 0.7604, "step": 6005 }, { "epoch": 1.0801941922143308, "grad_norm": 1.0816948413848877, "learning_rate": 9.120053552915245e-06, "loss": 0.9512, "step": 6006 }, { "epoch": 1.0803739998201924, "grad_norm": 1.6470677852630615, "learning_rate": 9.119723538791898e-06, "loss": 0.719, "step": 6007 }, { "epoch": 1.0805538074260541, "grad_norm": 1.5014370679855347, "learning_rate": 9.119393468769309e-06, "loss": 0.7501, "step": 6008 }, { "epoch": 1.0807336150319158, "grad_norm": 1.7290633916854858, "learning_rate": 9.119063342851957e-06, "loss": 0.7158, "step": 6009 }, { "epoch": 1.0809134226377777, "grad_norm": 1.0979118347167969, "learning_rate": 9.118733161044318e-06, "loss": 0.9536, "step": 6010 }, { "epoch": 1.0810932302436393, "grad_norm": 1.457939624786377, "learning_rate": 9.118402923350876e-06, "loss": 0.7166, "step": 6011 }, { "epoch": 1.081273037849501, "grad_norm": 1.520918846130371, "learning_rate": 9.11807262977611e-06, "loss": 0.7698, "step": 6012 }, { "epoch": 1.0814528454553627, "grad_norm": 1.5295417308807373, "learning_rate": 9.1177422803245e-06, "loss": 0.7659, "step": 6013 }, { "epoch": 1.0816326530612246, "grad_norm": 1.4185069799423218, "learning_rate": 9.117411875000532e-06, "loss": 0.774, "step": 6014 }, { "epoch": 1.0818124606670863, "grad_norm": 1.5396138429641724, "learning_rate": 9.117081413808687e-06, "loss": 0.7397, "step": 6015 }, { "epoch": 1.081992268272948, "grad_norm": 1.6736398935317993, "learning_rate": 9.11675089675345e-06, "loss": 0.7294, "step": 6016 }, { "epoch": 1.0821720758788096, "grad_norm": 1.405755639076233, "learning_rate": 9.116420323839304e-06, "loss": 0.6823, "step": 6017 }, { "epoch": 1.0823518834846715, "grad_norm": 1.4565931558609009, "learning_rate": 9.116089695070736e-06, "loss": 0.712, "step": 6018 }, { "epoch": 1.0825316910905332, "grad_norm": 1.5121593475341797, "learning_rate": 9.115759010452232e-06, "loss": 0.757, "step": 6019 }, { "epoch": 1.0827114986963948, "grad_norm": 1.4047373533248901, "learning_rate": 9.115428269988278e-06, "loss": 0.7204, "step": 6020 }, { "epoch": 1.0828913063022565, "grad_norm": 1.4687724113464355, "learning_rate": 9.115097473683364e-06, "loss": 0.6713, "step": 6021 }, { "epoch": 1.0830711139081184, "grad_norm": 1.4170174598693848, "learning_rate": 9.114766621541975e-06, "loss": 0.7071, "step": 6022 }, { "epoch": 1.08325092151398, "grad_norm": 1.5262820720672607, "learning_rate": 9.114435713568603e-06, "loss": 0.708, "step": 6023 }, { "epoch": 1.0834307291198417, "grad_norm": 1.5658091306686401, "learning_rate": 9.114104749767738e-06, "loss": 0.7747, "step": 6024 }, { "epoch": 1.0836105367257034, "grad_norm": 1.6622936725616455, "learning_rate": 9.11377373014387e-06, "loss": 0.7886, "step": 6025 }, { "epoch": 1.0837903443315653, "grad_norm": 1.4853450059890747, "learning_rate": 9.113442654701487e-06, "loss": 0.6794, "step": 6026 }, { "epoch": 1.083970151937427, "grad_norm": 1.4263675212860107, "learning_rate": 9.113111523445087e-06, "loss": 0.7403, "step": 6027 }, { "epoch": 1.0841499595432886, "grad_norm": 1.5053379535675049, "learning_rate": 9.11278033637916e-06, "loss": 0.7746, "step": 6028 }, { "epoch": 1.0843297671491503, "grad_norm": 1.6302504539489746, "learning_rate": 9.1124490935082e-06, "loss": 0.7405, "step": 6029 }, { "epoch": 1.0845095747550122, "grad_norm": 1.7030454874038696, "learning_rate": 9.112117794836704e-06, "loss": 0.7074, "step": 6030 }, { "epoch": 1.0846893823608739, "grad_norm": 1.4523800611495972, "learning_rate": 9.111786440369163e-06, "loss": 0.7296, "step": 6031 }, { "epoch": 1.0848691899667355, "grad_norm": 1.5352001190185547, "learning_rate": 9.111455030110077e-06, "loss": 0.7746, "step": 6032 }, { "epoch": 1.0850489975725974, "grad_norm": 1.5439810752868652, "learning_rate": 9.11112356406394e-06, "loss": 0.7831, "step": 6033 }, { "epoch": 1.085228805178459, "grad_norm": 1.4854555130004883, "learning_rate": 9.11079204223525e-06, "loss": 0.7616, "step": 6034 }, { "epoch": 1.0854086127843208, "grad_norm": 1.4288979768753052, "learning_rate": 9.110460464628506e-06, "loss": 0.6553, "step": 6035 }, { "epoch": 1.0855884203901824, "grad_norm": 1.5437612533569336, "learning_rate": 9.110128831248208e-06, "loss": 0.7978, "step": 6036 }, { "epoch": 1.0857682279960443, "grad_norm": 1.7767884731292725, "learning_rate": 9.109797142098854e-06, "loss": 0.7678, "step": 6037 }, { "epoch": 1.085948035601906, "grad_norm": 1.5721075534820557, "learning_rate": 9.109465397184946e-06, "loss": 0.7542, "step": 6038 }, { "epoch": 1.0861278432077677, "grad_norm": 1.2300224304199219, "learning_rate": 9.109133596510984e-06, "loss": 0.9728, "step": 6039 }, { "epoch": 1.0863076508136293, "grad_norm": 1.5461989641189575, "learning_rate": 9.108801740081471e-06, "loss": 0.7655, "step": 6040 }, { "epoch": 1.0864874584194912, "grad_norm": 1.5431731939315796, "learning_rate": 9.10846982790091e-06, "loss": 0.7735, "step": 6041 }, { "epoch": 1.086667266025353, "grad_norm": 1.6278486251831055, "learning_rate": 9.108137859973804e-06, "loss": 0.8123, "step": 6042 }, { "epoch": 1.0868470736312146, "grad_norm": 1.4270133972167969, "learning_rate": 9.107805836304658e-06, "loss": 0.7573, "step": 6043 }, { "epoch": 1.0870268812370762, "grad_norm": 1.5262678861618042, "learning_rate": 9.107473756897976e-06, "loss": 0.7176, "step": 6044 }, { "epoch": 1.0872066888429381, "grad_norm": 1.5800410509109497, "learning_rate": 9.107141621758267e-06, "loss": 0.7801, "step": 6045 }, { "epoch": 1.0873864964487998, "grad_norm": 1.5016154050827026, "learning_rate": 9.106809430890033e-06, "loss": 0.8458, "step": 6046 }, { "epoch": 1.0875663040546615, "grad_norm": 1.5407509803771973, "learning_rate": 9.106477184297783e-06, "loss": 0.7906, "step": 6047 }, { "epoch": 1.0877461116605232, "grad_norm": 1.1311110258102417, "learning_rate": 9.106144881986029e-06, "loss": 0.9702, "step": 6048 }, { "epoch": 1.087925919266385, "grad_norm": 1.4258992671966553, "learning_rate": 9.105812523959275e-06, "loss": 0.776, "step": 6049 }, { "epoch": 1.0881057268722467, "grad_norm": 1.4342386722564697, "learning_rate": 9.10548011022203e-06, "loss": 0.7501, "step": 6050 }, { "epoch": 1.0882855344781084, "grad_norm": 1.0439413785934448, "learning_rate": 9.10514764077881e-06, "loss": 0.9577, "step": 6051 }, { "epoch": 1.08846534208397, "grad_norm": 1.4866284132003784, "learning_rate": 9.104815115634125e-06, "loss": 0.753, "step": 6052 }, { "epoch": 1.088645149689832, "grad_norm": 1.5363048315048218, "learning_rate": 9.10448253479248e-06, "loss": 0.6527, "step": 6053 }, { "epoch": 1.0888249572956936, "grad_norm": 1.5165692567825317, "learning_rate": 9.104149898258397e-06, "loss": 0.7518, "step": 6054 }, { "epoch": 1.0890047649015553, "grad_norm": 1.4231280088424683, "learning_rate": 9.103817206036383e-06, "loss": 0.7172, "step": 6055 }, { "epoch": 1.089184572507417, "grad_norm": 1.5856298208236694, "learning_rate": 9.103484458130954e-06, "loss": 0.7339, "step": 6056 }, { "epoch": 1.0893643801132789, "grad_norm": 1.4681661128997803, "learning_rate": 9.103151654546625e-06, "loss": 0.7528, "step": 6057 }, { "epoch": 1.0895441877191405, "grad_norm": 1.5503963232040405, "learning_rate": 9.102818795287912e-06, "loss": 0.7707, "step": 6058 }, { "epoch": 1.0897239953250022, "grad_norm": 1.523105502128601, "learning_rate": 9.102485880359334e-06, "loss": 0.7095, "step": 6059 }, { "epoch": 1.089903802930864, "grad_norm": 1.4033308029174805, "learning_rate": 9.102152909765403e-06, "loss": 0.6988, "step": 6060 }, { "epoch": 1.0900836105367258, "grad_norm": 1.5291708707809448, "learning_rate": 9.10181988351064e-06, "loss": 0.7415, "step": 6061 }, { "epoch": 1.0902634181425874, "grad_norm": 1.539015293121338, "learning_rate": 9.101486801599565e-06, "loss": 0.7783, "step": 6062 }, { "epoch": 1.090443225748449, "grad_norm": 1.4386671781539917, "learning_rate": 9.101153664036693e-06, "loss": 0.7452, "step": 6063 }, { "epoch": 1.090623033354311, "grad_norm": 1.438674807548523, "learning_rate": 9.100820470826548e-06, "loss": 0.7403, "step": 6064 }, { "epoch": 1.0908028409601727, "grad_norm": 1.4535516500473022, "learning_rate": 9.10048722197365e-06, "loss": 0.7682, "step": 6065 }, { "epoch": 1.0909826485660343, "grad_norm": 1.5267137289047241, "learning_rate": 9.100153917482522e-06, "loss": 0.7664, "step": 6066 }, { "epoch": 1.091162456171896, "grad_norm": 1.572049856185913, "learning_rate": 9.099820557357683e-06, "loss": 0.7284, "step": 6067 }, { "epoch": 1.091342263777758, "grad_norm": 1.5454732179641724, "learning_rate": 9.09948714160366e-06, "loss": 0.7171, "step": 6068 }, { "epoch": 1.0915220713836196, "grad_norm": 1.6913940906524658, "learning_rate": 9.099153670224976e-06, "loss": 0.8458, "step": 6069 }, { "epoch": 1.0917018789894812, "grad_norm": 1.6445802450180054, "learning_rate": 9.098820143226156e-06, "loss": 0.7323, "step": 6070 }, { "epoch": 1.091881686595343, "grad_norm": 1.279193639755249, "learning_rate": 9.098486560611724e-06, "loss": 0.9181, "step": 6071 }, { "epoch": 1.0920614942012048, "grad_norm": 1.5640287399291992, "learning_rate": 9.098152922386207e-06, "loss": 0.7445, "step": 6072 }, { "epoch": 1.0922413018070665, "grad_norm": 1.5849686861038208, "learning_rate": 9.097819228554133e-06, "loss": 0.7359, "step": 6073 }, { "epoch": 1.0924211094129281, "grad_norm": 1.4823298454284668, "learning_rate": 9.097485479120027e-06, "loss": 0.7027, "step": 6074 }, { "epoch": 1.0926009170187898, "grad_norm": 1.4889404773712158, "learning_rate": 9.09715167408842e-06, "loss": 0.7588, "step": 6075 }, { "epoch": 1.0927807246246517, "grad_norm": 1.8813793659210205, "learning_rate": 9.096817813463843e-06, "loss": 0.7746, "step": 6076 }, { "epoch": 1.0929605322305134, "grad_norm": 1.1650009155273438, "learning_rate": 9.09648389725082e-06, "loss": 0.9482, "step": 6077 }, { "epoch": 1.093140339836375, "grad_norm": 1.4379990100860596, "learning_rate": 9.09614992545389e-06, "loss": 0.7255, "step": 6078 }, { "epoch": 1.0933201474422367, "grad_norm": 1.4502543210983276, "learning_rate": 9.095815898077578e-06, "loss": 0.7315, "step": 6079 }, { "epoch": 1.0934999550480986, "grad_norm": 1.0917479991912842, "learning_rate": 9.09548181512642e-06, "loss": 0.9336, "step": 6080 }, { "epoch": 1.0936797626539603, "grad_norm": 1.1904906034469604, "learning_rate": 9.095147676604945e-06, "loss": 0.9572, "step": 6081 }, { "epoch": 1.093859570259822, "grad_norm": 1.622124195098877, "learning_rate": 9.09481348251769e-06, "loss": 0.7894, "step": 6082 }, { "epoch": 1.0940393778656836, "grad_norm": 1.5063045024871826, "learning_rate": 9.094479232869191e-06, "loss": 0.7286, "step": 6083 }, { "epoch": 1.0942191854715455, "grad_norm": 1.5352885723114014, "learning_rate": 9.094144927663979e-06, "loss": 0.7445, "step": 6084 }, { "epoch": 1.0943989930774072, "grad_norm": 1.0817543268203735, "learning_rate": 9.093810566906593e-06, "loss": 0.9438, "step": 6085 }, { "epoch": 1.0945788006832688, "grad_norm": 1.1022560596466064, "learning_rate": 9.09347615060157e-06, "loss": 0.9414, "step": 6086 }, { "epoch": 1.0947586082891307, "grad_norm": 1.4949954748153687, "learning_rate": 9.093141678753447e-06, "loss": 0.7746, "step": 6087 }, { "epoch": 1.0949384158949924, "grad_norm": 1.5584239959716797, "learning_rate": 9.092807151366763e-06, "loss": 0.8061, "step": 6088 }, { "epoch": 1.095118223500854, "grad_norm": 1.6127797365188599, "learning_rate": 9.092472568446054e-06, "loss": 0.6938, "step": 6089 }, { "epoch": 1.0952980311067158, "grad_norm": 2.5687344074249268, "learning_rate": 9.092137929995864e-06, "loss": 0.8178, "step": 6090 }, { "epoch": 1.0954778387125776, "grad_norm": 1.4839320182800293, "learning_rate": 9.091803236020731e-06, "loss": 0.739, "step": 6091 }, { "epoch": 1.0956576463184393, "grad_norm": 1.58079993724823, "learning_rate": 9.091468486525196e-06, "loss": 0.7813, "step": 6092 }, { "epoch": 1.095837453924301, "grad_norm": 1.543912649154663, "learning_rate": 9.091133681513802e-06, "loss": 0.7551, "step": 6093 }, { "epoch": 1.0960172615301627, "grad_norm": 1.4598984718322754, "learning_rate": 9.090798820991093e-06, "loss": 0.7431, "step": 6094 }, { "epoch": 1.0961970691360245, "grad_norm": 1.5154622793197632, "learning_rate": 9.090463904961613e-06, "loss": 0.7738, "step": 6095 }, { "epoch": 1.0963768767418862, "grad_norm": 1.6299717426300049, "learning_rate": 9.090128933429904e-06, "loss": 0.763, "step": 6096 }, { "epoch": 1.096556684347748, "grad_norm": 1.4696836471557617, "learning_rate": 9.089793906400512e-06, "loss": 0.7483, "step": 6097 }, { "epoch": 1.0967364919536096, "grad_norm": 1.8484233617782593, "learning_rate": 9.089458823877984e-06, "loss": 0.7602, "step": 6098 }, { "epoch": 1.0969162995594715, "grad_norm": 1.4260094165802002, "learning_rate": 9.089123685866866e-06, "loss": 0.6564, "step": 6099 }, { "epoch": 1.0970961071653331, "grad_norm": 1.4865375757217407, "learning_rate": 9.088788492371703e-06, "loss": 0.7366, "step": 6100 }, { "epoch": 1.0972759147711948, "grad_norm": 1.5469304323196411, "learning_rate": 9.088453243397046e-06, "loss": 0.7682, "step": 6101 }, { "epoch": 1.0974557223770565, "grad_norm": 1.451970100402832, "learning_rate": 9.088117938947444e-06, "loss": 0.7519, "step": 6102 }, { "epoch": 1.0976355299829184, "grad_norm": 1.5642396211624146, "learning_rate": 9.087782579027444e-06, "loss": 0.7692, "step": 6103 }, { "epoch": 1.09781533758878, "grad_norm": 1.4648489952087402, "learning_rate": 9.0874471636416e-06, "loss": 0.7616, "step": 6104 }, { "epoch": 1.0979951451946417, "grad_norm": 1.4670791625976562, "learning_rate": 9.08711169279446e-06, "loss": 0.745, "step": 6105 }, { "epoch": 1.0981749528005034, "grad_norm": 1.1330915689468384, "learning_rate": 9.086776166490577e-06, "loss": 0.9849, "step": 6106 }, { "epoch": 1.0983547604063653, "grad_norm": 1.5044593811035156, "learning_rate": 9.086440584734505e-06, "loss": 0.7555, "step": 6107 }, { "epoch": 1.098534568012227, "grad_norm": 1.4593557119369507, "learning_rate": 9.086104947530796e-06, "loss": 0.6734, "step": 6108 }, { "epoch": 1.0987143756180886, "grad_norm": 1.5073950290679932, "learning_rate": 9.085769254884003e-06, "loss": 0.7492, "step": 6109 }, { "epoch": 1.0988941832239503, "grad_norm": 1.6809836626052856, "learning_rate": 9.085433506798684e-06, "loss": 0.7372, "step": 6110 }, { "epoch": 1.0990739908298122, "grad_norm": 1.4873085021972656, "learning_rate": 9.085097703279393e-06, "loss": 0.7615, "step": 6111 }, { "epoch": 1.0992537984356738, "grad_norm": 1.5124965906143188, "learning_rate": 9.084761844330685e-06, "loss": 0.7525, "step": 6112 }, { "epoch": 1.0994336060415355, "grad_norm": 1.4883086681365967, "learning_rate": 9.08442592995712e-06, "loss": 0.7668, "step": 6113 }, { "epoch": 1.0996134136473974, "grad_norm": 1.4793658256530762, "learning_rate": 9.084089960163254e-06, "loss": 0.7755, "step": 6114 }, { "epoch": 1.099793221253259, "grad_norm": 1.4719622135162354, "learning_rate": 9.083753934953645e-06, "loss": 0.744, "step": 6115 }, { "epoch": 1.0999730288591207, "grad_norm": 1.4472579956054688, "learning_rate": 9.083417854332855e-06, "loss": 0.7394, "step": 6116 }, { "epoch": 1.1001528364649824, "grad_norm": 1.2377853393554688, "learning_rate": 9.083081718305441e-06, "loss": 0.9919, "step": 6117 }, { "epoch": 1.100332644070844, "grad_norm": 1.5204213857650757, "learning_rate": 9.082745526875967e-06, "loss": 0.7568, "step": 6118 }, { "epoch": 1.100512451676706, "grad_norm": 1.5416088104248047, "learning_rate": 9.082409280048994e-06, "loss": 0.8123, "step": 6119 }, { "epoch": 1.1006922592825676, "grad_norm": 1.420941948890686, "learning_rate": 9.082072977829082e-06, "loss": 0.7723, "step": 6120 }, { "epoch": 1.1008720668884293, "grad_norm": 1.0294055938720703, "learning_rate": 9.081736620220797e-06, "loss": 0.9638, "step": 6121 }, { "epoch": 1.1010518744942912, "grad_norm": 1.6543829441070557, "learning_rate": 9.081400207228702e-06, "loss": 0.7259, "step": 6122 }, { "epoch": 1.1012316821001529, "grad_norm": 1.5391769409179688, "learning_rate": 9.08106373885736e-06, "loss": 0.7963, "step": 6123 }, { "epoch": 1.1014114897060145, "grad_norm": 1.5862129926681519, "learning_rate": 9.08072721511134e-06, "loss": 0.7027, "step": 6124 }, { "epoch": 1.1015912973118762, "grad_norm": 1.423410415649414, "learning_rate": 9.080390635995205e-06, "loss": 0.7281, "step": 6125 }, { "epoch": 1.101771104917738, "grad_norm": 1.4760781526565552, "learning_rate": 9.080054001513523e-06, "loss": 0.74, "step": 6126 }, { "epoch": 1.1019509125235998, "grad_norm": 1.631041407585144, "learning_rate": 9.079717311670862e-06, "loss": 0.7269, "step": 6127 }, { "epoch": 1.1021307201294614, "grad_norm": 1.4194860458374023, "learning_rate": 9.079380566471791e-06, "loss": 0.7556, "step": 6128 }, { "epoch": 1.1023105277353231, "grad_norm": 1.9095942974090576, "learning_rate": 9.079043765920877e-06, "loss": 0.7299, "step": 6129 }, { "epoch": 1.102490335341185, "grad_norm": 1.13141930103302, "learning_rate": 9.078706910022693e-06, "loss": 0.9766, "step": 6130 }, { "epoch": 1.1026701429470467, "grad_norm": 1.4211678504943848, "learning_rate": 9.078369998781806e-06, "loss": 0.7003, "step": 6131 }, { "epoch": 1.1028499505529084, "grad_norm": 1.5506528615951538, "learning_rate": 9.07803303220279e-06, "loss": 0.7743, "step": 6132 }, { "epoch": 1.10302975815877, "grad_norm": 1.4323382377624512, "learning_rate": 9.077696010290219e-06, "loss": 0.7344, "step": 6133 }, { "epoch": 1.103209565764632, "grad_norm": 1.6623667478561401, "learning_rate": 9.077358933048663e-06, "loss": 0.7642, "step": 6134 }, { "epoch": 1.1033893733704936, "grad_norm": 2.0107080936431885, "learning_rate": 9.077021800482695e-06, "loss": 0.8398, "step": 6135 }, { "epoch": 1.1035691809763553, "grad_norm": 1.5146087408065796, "learning_rate": 9.076684612596891e-06, "loss": 0.6645, "step": 6136 }, { "epoch": 1.103748988582217, "grad_norm": 1.797719955444336, "learning_rate": 9.076347369395825e-06, "loss": 0.7287, "step": 6137 }, { "epoch": 1.1039287961880788, "grad_norm": 1.3854748010635376, "learning_rate": 9.076010070884076e-06, "loss": 0.672, "step": 6138 }, { "epoch": 1.1041086037939405, "grad_norm": 1.4850760698318481, "learning_rate": 9.075672717066218e-06, "loss": 0.7339, "step": 6139 }, { "epoch": 1.1042884113998022, "grad_norm": 1.5578566789627075, "learning_rate": 9.075335307946829e-06, "loss": 0.7435, "step": 6140 }, { "epoch": 1.104468219005664, "grad_norm": 1.4807720184326172, "learning_rate": 9.074997843530487e-06, "loss": 0.7512, "step": 6141 }, { "epoch": 1.1046480266115257, "grad_norm": 1.3926397562026978, "learning_rate": 9.074660323821772e-06, "loss": 0.7048, "step": 6142 }, { "epoch": 1.1048278342173874, "grad_norm": 1.4890224933624268, "learning_rate": 9.074322748825261e-06, "loss": 0.7884, "step": 6143 }, { "epoch": 1.105007641823249, "grad_norm": 1.6892757415771484, "learning_rate": 9.073985118545536e-06, "loss": 0.7658, "step": 6144 }, { "epoch": 1.1051874494291107, "grad_norm": 1.402949333190918, "learning_rate": 9.07364743298718e-06, "loss": 0.7151, "step": 6145 }, { "epoch": 1.1053672570349726, "grad_norm": 1.3909136056900024, "learning_rate": 9.073309692154775e-06, "loss": 0.7453, "step": 6146 }, { "epoch": 1.1055470646408343, "grad_norm": 1.5451405048370361, "learning_rate": 9.0729718960529e-06, "loss": 0.7646, "step": 6147 }, { "epoch": 1.105726872246696, "grad_norm": 1.4480122327804565, "learning_rate": 9.072634044686141e-06, "loss": 0.7843, "step": 6148 }, { "epoch": 1.1059066798525579, "grad_norm": 1.44106924533844, "learning_rate": 9.072296138059083e-06, "loss": 0.716, "step": 6149 }, { "epoch": 1.1060864874584195, "grad_norm": 1.1837023496627808, "learning_rate": 9.07195817617631e-06, "loss": 0.9422, "step": 6150 }, { "epoch": 1.1062662950642812, "grad_norm": 1.4831839799880981, "learning_rate": 9.071620159042407e-06, "loss": 0.7597, "step": 6151 }, { "epoch": 1.1064461026701429, "grad_norm": 1.4359227418899536, "learning_rate": 9.07128208666196e-06, "loss": 0.7801, "step": 6152 }, { "epoch": 1.1066259102760048, "grad_norm": 1.439986228942871, "learning_rate": 9.070943959039557e-06, "loss": 0.709, "step": 6153 }, { "epoch": 1.1068057178818664, "grad_norm": 1.3903629779815674, "learning_rate": 9.070605776179788e-06, "loss": 0.6877, "step": 6154 }, { "epoch": 1.106985525487728, "grad_norm": 1.623366355895996, "learning_rate": 9.07026753808724e-06, "loss": 0.73, "step": 6155 }, { "epoch": 1.1071653330935898, "grad_norm": 1.4986281394958496, "learning_rate": 9.0699292447665e-06, "loss": 0.722, "step": 6156 }, { "epoch": 1.1073451406994517, "grad_norm": 1.4477790594100952, "learning_rate": 9.06959089622216e-06, "loss": 0.746, "step": 6157 }, { "epoch": 1.1075249483053133, "grad_norm": 1.1720460653305054, "learning_rate": 9.069252492458813e-06, "loss": 0.9859, "step": 6158 }, { "epoch": 1.107704755911175, "grad_norm": 1.6012635231018066, "learning_rate": 9.06891403348105e-06, "loss": 0.7168, "step": 6159 }, { "epoch": 1.1078845635170367, "grad_norm": 1.6084249019622803, "learning_rate": 9.06857551929346e-06, "loss": 0.7373, "step": 6160 }, { "epoch": 1.1080643711228986, "grad_norm": 1.5063854455947876, "learning_rate": 9.06823694990064e-06, "loss": 0.6576, "step": 6161 }, { "epoch": 1.1082441787287602, "grad_norm": 1.5135785341262817, "learning_rate": 9.067898325307182e-06, "loss": 0.7947, "step": 6162 }, { "epoch": 1.108423986334622, "grad_norm": 1.5534595251083374, "learning_rate": 9.067559645517684e-06, "loss": 0.7843, "step": 6163 }, { "epoch": 1.1086037939404836, "grad_norm": 1.8850152492523193, "learning_rate": 9.067220910536735e-06, "loss": 0.7351, "step": 6164 }, { "epoch": 1.1087836015463455, "grad_norm": 1.4731059074401855, "learning_rate": 9.066882120368939e-06, "loss": 0.6995, "step": 6165 }, { "epoch": 1.1089634091522071, "grad_norm": 1.350075364112854, "learning_rate": 9.066543275018887e-06, "loss": 0.7577, "step": 6166 }, { "epoch": 1.1091432167580688, "grad_norm": 1.4971222877502441, "learning_rate": 9.066204374491178e-06, "loss": 0.7676, "step": 6167 }, { "epoch": 1.1093230243639307, "grad_norm": 1.5050435066223145, "learning_rate": 9.065865418790411e-06, "loss": 0.7424, "step": 6168 }, { "epoch": 1.1095028319697924, "grad_norm": 1.519970178604126, "learning_rate": 9.065526407921187e-06, "loss": 0.7329, "step": 6169 }, { "epoch": 1.109682639575654, "grad_norm": 1.0306124687194824, "learning_rate": 9.065187341888102e-06, "loss": 0.9613, "step": 6170 }, { "epoch": 1.1098624471815157, "grad_norm": 1.4014796018600464, "learning_rate": 9.06484822069576e-06, "loss": 0.7618, "step": 6171 }, { "epoch": 1.1100422547873774, "grad_norm": 1.5331897735595703, "learning_rate": 9.064509044348762e-06, "loss": 0.7548, "step": 6172 }, { "epoch": 1.1102220623932393, "grad_norm": 1.4873586893081665, "learning_rate": 9.064169812851709e-06, "loss": 0.6781, "step": 6173 }, { "epoch": 1.110401869999101, "grad_norm": 1.452191710472107, "learning_rate": 9.063830526209203e-06, "loss": 0.713, "step": 6174 }, { "epoch": 1.1105816776049626, "grad_norm": 1.4575031995773315, "learning_rate": 9.06349118442585e-06, "loss": 0.77, "step": 6175 }, { "epoch": 1.1107614852108245, "grad_norm": 1.164331078529358, "learning_rate": 9.063151787506254e-06, "loss": 0.976, "step": 6176 }, { "epoch": 1.1109412928166862, "grad_norm": 1.5596603155136108, "learning_rate": 9.062812335455019e-06, "loss": 0.774, "step": 6177 }, { "epoch": 1.1111211004225479, "grad_norm": 1.5925724506378174, "learning_rate": 9.062472828276751e-06, "loss": 0.75, "step": 6178 }, { "epoch": 1.1113009080284095, "grad_norm": 1.442568302154541, "learning_rate": 9.062133265976058e-06, "loss": 0.7406, "step": 6179 }, { "epoch": 1.1114807156342714, "grad_norm": 1.4786863327026367, "learning_rate": 9.061793648557547e-06, "loss": 0.7628, "step": 6180 }, { "epoch": 1.111660523240133, "grad_norm": 1.4004466533660889, "learning_rate": 9.061453976025826e-06, "loss": 0.7638, "step": 6181 }, { "epoch": 1.1118403308459948, "grad_norm": 1.3761652708053589, "learning_rate": 9.061114248385504e-06, "loss": 0.731, "step": 6182 }, { "epoch": 1.1120201384518564, "grad_norm": 1.5588593482971191, "learning_rate": 9.06077446564119e-06, "loss": 0.7558, "step": 6183 }, { "epoch": 1.1121999460577183, "grad_norm": 1.4440585374832153, "learning_rate": 9.060434627797493e-06, "loss": 0.7146, "step": 6184 }, { "epoch": 1.11237975366358, "grad_norm": 1.256726861000061, "learning_rate": 9.060094734859027e-06, "loss": 0.8537, "step": 6185 }, { "epoch": 1.1125595612694417, "grad_norm": 1.4484786987304688, "learning_rate": 9.059754786830404e-06, "loss": 0.7781, "step": 6186 }, { "epoch": 1.1127393688753033, "grad_norm": 1.4419506788253784, "learning_rate": 9.059414783716233e-06, "loss": 0.7155, "step": 6187 }, { "epoch": 1.1129191764811652, "grad_norm": 1.5230567455291748, "learning_rate": 9.059074725521133e-06, "loss": 0.7137, "step": 6188 }, { "epoch": 1.113098984087027, "grad_norm": 1.5796442031860352, "learning_rate": 9.058734612249714e-06, "loss": 0.7078, "step": 6189 }, { "epoch": 1.1132787916928886, "grad_norm": 1.537147879600525, "learning_rate": 9.058394443906591e-06, "loss": 0.7432, "step": 6190 }, { "epoch": 1.1134585992987502, "grad_norm": 1.431822657585144, "learning_rate": 9.058054220496381e-06, "loss": 0.7484, "step": 6191 }, { "epoch": 1.1136384069046121, "grad_norm": 1.5600882768630981, "learning_rate": 9.0577139420237e-06, "loss": 0.808, "step": 6192 }, { "epoch": 1.1138182145104738, "grad_norm": 1.496824026107788, "learning_rate": 9.057373608493165e-06, "loss": 0.7857, "step": 6193 }, { "epoch": 1.1139980221163355, "grad_norm": 1.4281673431396484, "learning_rate": 9.057033219909394e-06, "loss": 0.7405, "step": 6194 }, { "epoch": 1.1141778297221974, "grad_norm": 1.467221975326538, "learning_rate": 9.056692776277004e-06, "loss": 0.7374, "step": 6195 }, { "epoch": 1.114357637328059, "grad_norm": 1.5473181009292603, "learning_rate": 9.056352277600619e-06, "loss": 0.8148, "step": 6196 }, { "epoch": 1.1145374449339207, "grad_norm": 1.555580735206604, "learning_rate": 9.056011723884854e-06, "loss": 0.8189, "step": 6197 }, { "epoch": 1.1147172525397824, "grad_norm": 1.5845186710357666, "learning_rate": 9.055671115134333e-06, "loss": 0.738, "step": 6198 }, { "epoch": 1.114897060145644, "grad_norm": 1.468480110168457, "learning_rate": 9.055330451353676e-06, "loss": 0.7773, "step": 6199 }, { "epoch": 1.115076867751506, "grad_norm": 1.4220678806304932, "learning_rate": 9.054989732547507e-06, "loss": 0.7505, "step": 6200 }, { "epoch": 1.1152566753573676, "grad_norm": 1.4599740505218506, "learning_rate": 9.054648958720446e-06, "loss": 0.6971, "step": 6201 }, { "epoch": 1.1154364829632293, "grad_norm": 1.4629838466644287, "learning_rate": 9.054308129877121e-06, "loss": 0.7025, "step": 6202 }, { "epoch": 1.1156162905690912, "grad_norm": 1.3851491212844849, "learning_rate": 9.053967246022152e-06, "loss": 0.7266, "step": 6203 }, { "epoch": 1.1157960981749528, "grad_norm": 1.4394035339355469, "learning_rate": 9.053626307160171e-06, "loss": 0.7458, "step": 6204 }, { "epoch": 1.1159759057808145, "grad_norm": 1.5579394102096558, "learning_rate": 9.053285313295797e-06, "loss": 0.7651, "step": 6205 }, { "epoch": 1.1161557133866762, "grad_norm": 1.4948612451553345, "learning_rate": 9.052944264433659e-06, "loss": 0.7735, "step": 6206 }, { "epoch": 1.116335520992538, "grad_norm": 1.0658767223358154, "learning_rate": 9.052603160578385e-06, "loss": 0.9509, "step": 6207 }, { "epoch": 1.1165153285983997, "grad_norm": 1.4729645252227783, "learning_rate": 9.052262001734606e-06, "loss": 0.7611, "step": 6208 }, { "epoch": 1.1166951362042614, "grad_norm": 1.4292750358581543, "learning_rate": 9.051920787906948e-06, "loss": 0.6882, "step": 6209 }, { "epoch": 1.116874943810123, "grad_norm": 1.0843815803527832, "learning_rate": 9.051579519100043e-06, "loss": 0.9847, "step": 6210 }, { "epoch": 1.117054751415985, "grad_norm": 1.444725513458252, "learning_rate": 9.051238195318516e-06, "loss": 0.812, "step": 6211 }, { "epoch": 1.1172345590218467, "grad_norm": 1.416322112083435, "learning_rate": 9.050896816567006e-06, "loss": 0.7752, "step": 6212 }, { "epoch": 1.1174143666277083, "grad_norm": 1.5119426250457764, "learning_rate": 9.050555382850142e-06, "loss": 0.8332, "step": 6213 }, { "epoch": 1.11759417423357, "grad_norm": 1.0280126333236694, "learning_rate": 9.050213894172554e-06, "loss": 0.9603, "step": 6214 }, { "epoch": 1.1177739818394319, "grad_norm": 1.529288411140442, "learning_rate": 9.04987235053888e-06, "loss": 0.7548, "step": 6215 }, { "epoch": 1.1179537894452936, "grad_norm": 1.5558180809020996, "learning_rate": 9.04953075195375e-06, "loss": 0.6706, "step": 6216 }, { "epoch": 1.1181335970511552, "grad_norm": 1.5285543203353882, "learning_rate": 9.049189098421803e-06, "loss": 0.7516, "step": 6217 }, { "epoch": 1.118313404657017, "grad_norm": 1.4318524599075317, "learning_rate": 9.048847389947671e-06, "loss": 0.7292, "step": 6218 }, { "epoch": 1.1184932122628788, "grad_norm": 1.4825401306152344, "learning_rate": 9.048505626535994e-06, "loss": 0.7507, "step": 6219 }, { "epoch": 1.1186730198687405, "grad_norm": 1.4313709735870361, "learning_rate": 9.048163808191407e-06, "loss": 0.6557, "step": 6220 }, { "epoch": 1.1188528274746021, "grad_norm": 1.1441054344177246, "learning_rate": 9.04782193491855e-06, "loss": 0.9466, "step": 6221 }, { "epoch": 1.1190326350804638, "grad_norm": 1.5424671173095703, "learning_rate": 9.04748000672206e-06, "loss": 0.7589, "step": 6222 }, { "epoch": 1.1192124426863257, "grad_norm": 1.3886432647705078, "learning_rate": 9.047138023606577e-06, "loss": 0.7477, "step": 6223 }, { "epoch": 1.1193922502921874, "grad_norm": 1.591591715812683, "learning_rate": 9.046795985576742e-06, "loss": 0.6988, "step": 6224 }, { "epoch": 1.119572057898049, "grad_norm": 1.1390740871429443, "learning_rate": 9.046453892637195e-06, "loss": 0.9625, "step": 6225 }, { "epoch": 1.1197518655039107, "grad_norm": 1.5221495628356934, "learning_rate": 9.046111744792579e-06, "loss": 0.7664, "step": 6226 }, { "epoch": 1.1199316731097726, "grad_norm": 1.0874884128570557, "learning_rate": 9.045769542047533e-06, "loss": 0.982, "step": 6227 }, { "epoch": 1.1201114807156343, "grad_norm": 0.9955385327339172, "learning_rate": 9.045427284406706e-06, "loss": 0.9193, "step": 6228 }, { "epoch": 1.120291288321496, "grad_norm": 1.3562878370285034, "learning_rate": 9.045084971874738e-06, "loss": 0.6917, "step": 6229 }, { "epoch": 1.1204710959273578, "grad_norm": 1.5155621767044067, "learning_rate": 9.044742604456274e-06, "loss": 0.746, "step": 6230 }, { "epoch": 1.1206509035332195, "grad_norm": 1.6253082752227783, "learning_rate": 9.044400182155961e-06, "loss": 0.7229, "step": 6231 }, { "epoch": 1.1208307111390812, "grad_norm": 1.382166862487793, "learning_rate": 9.044057704978444e-06, "loss": 0.7474, "step": 6232 }, { "epoch": 1.1210105187449428, "grad_norm": 1.572378158569336, "learning_rate": 9.04371517292837e-06, "loss": 0.7656, "step": 6233 }, { "epoch": 1.1211903263508047, "grad_norm": 1.4248296022415161, "learning_rate": 9.043372586010387e-06, "loss": 0.7164, "step": 6234 }, { "epoch": 1.1213701339566664, "grad_norm": 3.2466623783111572, "learning_rate": 9.043029944229143e-06, "loss": 0.7387, "step": 6235 }, { "epoch": 1.121549941562528, "grad_norm": 1.5326793193817139, "learning_rate": 9.042687247589289e-06, "loss": 0.7236, "step": 6236 }, { "epoch": 1.1217297491683897, "grad_norm": 1.4700710773468018, "learning_rate": 9.042344496095473e-06, "loss": 0.7331, "step": 6237 }, { "epoch": 1.1219095567742516, "grad_norm": 1.5038671493530273, "learning_rate": 9.042001689752346e-06, "loss": 0.7442, "step": 6238 }, { "epoch": 1.1220893643801133, "grad_norm": 1.2409435510635376, "learning_rate": 9.04165882856456e-06, "loss": 0.9899, "step": 6239 }, { "epoch": 1.122269171985975, "grad_norm": 1.5764340162277222, "learning_rate": 9.041315912536768e-06, "loss": 0.6959, "step": 6240 }, { "epoch": 1.1224489795918366, "grad_norm": 2.6884806156158447, "learning_rate": 9.040972941673621e-06, "loss": 0.7492, "step": 6241 }, { "epoch": 1.1226287871976985, "grad_norm": 1.5440956354141235, "learning_rate": 9.040629915979771e-06, "loss": 0.7479, "step": 6242 }, { "epoch": 1.1228085948035602, "grad_norm": 1.5118331909179688, "learning_rate": 9.040286835459877e-06, "loss": 0.7832, "step": 6243 }, { "epoch": 1.1229884024094219, "grad_norm": 1.9514106512069702, "learning_rate": 9.039943700118593e-06, "loss": 0.7378, "step": 6244 }, { "epoch": 1.1231682100152836, "grad_norm": 1.4719797372817993, "learning_rate": 9.039600509960572e-06, "loss": 0.6959, "step": 6245 }, { "epoch": 1.1233480176211454, "grad_norm": 1.4794472455978394, "learning_rate": 9.039257264990475e-06, "loss": 0.7297, "step": 6246 }, { "epoch": 1.1235278252270071, "grad_norm": 1.498910665512085, "learning_rate": 9.038913965212956e-06, "loss": 0.7264, "step": 6247 }, { "epoch": 1.1237076328328688, "grad_norm": 1.426519751548767, "learning_rate": 9.038570610632674e-06, "loss": 0.7673, "step": 6248 }, { "epoch": 1.1238874404387305, "grad_norm": 1.5036193132400513, "learning_rate": 9.038227201254286e-06, "loss": 0.7435, "step": 6249 }, { "epoch": 1.1240672480445923, "grad_norm": 1.6816463470458984, "learning_rate": 9.037883737082455e-06, "loss": 0.7398, "step": 6250 }, { "epoch": 1.124247055650454, "grad_norm": 1.2602418661117554, "learning_rate": 9.03754021812184e-06, "loss": 0.8988, "step": 6251 }, { "epoch": 1.1244268632563157, "grad_norm": 1.578059196472168, "learning_rate": 9.037196644377104e-06, "loss": 0.7049, "step": 6252 }, { "epoch": 1.1246066708621774, "grad_norm": 2.9537088871002197, "learning_rate": 9.036853015852904e-06, "loss": 0.739, "step": 6253 }, { "epoch": 1.1247864784680393, "grad_norm": 1.492850422859192, "learning_rate": 9.036509332553907e-06, "loss": 0.8193, "step": 6254 }, { "epoch": 1.124966286073901, "grad_norm": 1.0725213289260864, "learning_rate": 9.036165594484774e-06, "loss": 0.9718, "step": 6255 }, { "epoch": 1.1251460936797626, "grad_norm": 1.4724301099777222, "learning_rate": 9.03582180165017e-06, "loss": 0.7216, "step": 6256 }, { "epoch": 1.1253259012856245, "grad_norm": 1.3082433938980103, "learning_rate": 9.035477954054761e-06, "loss": 0.7364, "step": 6257 }, { "epoch": 1.1255057088914862, "grad_norm": 1.5885933637619019, "learning_rate": 9.03513405170321e-06, "loss": 0.7243, "step": 6258 }, { "epoch": 1.1256855164973478, "grad_norm": 3.642507791519165, "learning_rate": 9.034790094600185e-06, "loss": 0.7318, "step": 6259 }, { "epoch": 1.1258653241032095, "grad_norm": 1.1205602884292603, "learning_rate": 9.034446082750352e-06, "loss": 0.996, "step": 6260 }, { "epoch": 1.1260451317090714, "grad_norm": 1.528465747833252, "learning_rate": 9.034102016158381e-06, "loss": 0.8428, "step": 6261 }, { "epoch": 1.126224939314933, "grad_norm": 1.5867797136306763, "learning_rate": 9.033757894828937e-06, "loss": 0.7062, "step": 6262 }, { "epoch": 1.1264047469207947, "grad_norm": 1.5987987518310547, "learning_rate": 9.033413718766693e-06, "loss": 0.7828, "step": 6263 }, { "epoch": 1.1265845545266564, "grad_norm": 1.3918139934539795, "learning_rate": 9.033069487976316e-06, "loss": 0.6896, "step": 6264 }, { "epoch": 1.1267643621325183, "grad_norm": 1.6949121952056885, "learning_rate": 9.032725202462478e-06, "loss": 0.6986, "step": 6265 }, { "epoch": 1.12694416973838, "grad_norm": 1.4344017505645752, "learning_rate": 9.03238086222985e-06, "loss": 0.7089, "step": 6266 }, { "epoch": 1.1271239773442416, "grad_norm": 1.5739866495132446, "learning_rate": 9.032036467283106e-06, "loss": 0.78, "step": 6267 }, { "epoch": 1.1273037849501033, "grad_norm": 1.4668585062026978, "learning_rate": 9.031692017626917e-06, "loss": 0.7568, "step": 6268 }, { "epoch": 1.1274835925559652, "grad_norm": 1.4841407537460327, "learning_rate": 9.031347513265958e-06, "loss": 0.7406, "step": 6269 }, { "epoch": 1.1276634001618269, "grad_norm": 1.575467586517334, "learning_rate": 9.031002954204901e-06, "loss": 0.7054, "step": 6270 }, { "epoch": 1.1278432077676885, "grad_norm": 1.5396089553833008, "learning_rate": 9.030658340448427e-06, "loss": 0.7591, "step": 6271 }, { "epoch": 1.1280230153735502, "grad_norm": 1.3648223876953125, "learning_rate": 9.030313672001205e-06, "loss": 0.7157, "step": 6272 }, { "epoch": 1.128202822979412, "grad_norm": 1.4954092502593994, "learning_rate": 9.029968948867916e-06, "loss": 0.7702, "step": 6273 }, { "epoch": 1.1283826305852738, "grad_norm": 1.4374781847000122, "learning_rate": 9.029624171053235e-06, "loss": 0.7416, "step": 6274 }, { "epoch": 1.1285624381911354, "grad_norm": 1.3757529258728027, "learning_rate": 9.029279338561843e-06, "loss": 0.7132, "step": 6275 }, { "epoch": 1.1287422457969973, "grad_norm": 1.4500606060028076, "learning_rate": 9.028934451398415e-06, "loss": 0.7459, "step": 6276 }, { "epoch": 1.128922053402859, "grad_norm": 1.395751714706421, "learning_rate": 9.028589509567635e-06, "loss": 0.6987, "step": 6277 }, { "epoch": 1.1291018610087207, "grad_norm": 1.509857416152954, "learning_rate": 9.028244513074182e-06, "loss": 0.8066, "step": 6278 }, { "epoch": 1.1292816686145823, "grad_norm": 1.5733020305633545, "learning_rate": 9.027899461922734e-06, "loss": 0.7461, "step": 6279 }, { "epoch": 1.129461476220444, "grad_norm": 1.666446328163147, "learning_rate": 9.027554356117978e-06, "loss": 0.8285, "step": 6280 }, { "epoch": 1.129641283826306, "grad_norm": 1.22947096824646, "learning_rate": 9.027209195664592e-06, "loss": 0.9815, "step": 6281 }, { "epoch": 1.1298210914321676, "grad_norm": 1.4249510765075684, "learning_rate": 9.026863980567265e-06, "loss": 0.7366, "step": 6282 }, { "epoch": 1.1300008990380292, "grad_norm": 1.4160364866256714, "learning_rate": 9.026518710830674e-06, "loss": 0.7123, "step": 6283 }, { "epoch": 1.1301807066438911, "grad_norm": 1.517980694770813, "learning_rate": 9.026173386459508e-06, "loss": 0.719, "step": 6284 }, { "epoch": 1.1303605142497528, "grad_norm": 1.4816462993621826, "learning_rate": 9.025828007458453e-06, "loss": 0.7596, "step": 6285 }, { "epoch": 1.1305403218556145, "grad_norm": 1.487781047821045, "learning_rate": 9.025482573832193e-06, "loss": 0.7324, "step": 6286 }, { "epoch": 1.1307201294614762, "grad_norm": 1.5498325824737549, "learning_rate": 9.025137085585417e-06, "loss": 0.7681, "step": 6287 }, { "epoch": 1.1308999370673378, "grad_norm": 1.5134016275405884, "learning_rate": 9.024791542722814e-06, "loss": 0.6941, "step": 6288 }, { "epoch": 1.1310797446731997, "grad_norm": 1.426063060760498, "learning_rate": 9.02444594524907e-06, "loss": 0.7099, "step": 6289 }, { "epoch": 1.1312595522790614, "grad_norm": 1.4735502004623413, "learning_rate": 9.024100293168874e-06, "loss": 0.7547, "step": 6290 }, { "epoch": 1.131439359884923, "grad_norm": 1.40668523311615, "learning_rate": 9.023754586486916e-06, "loss": 0.8114, "step": 6291 }, { "epoch": 1.131619167490785, "grad_norm": 1.584762454032898, "learning_rate": 9.02340882520789e-06, "loss": 0.7939, "step": 6292 }, { "epoch": 1.1317989750966466, "grad_norm": 1.4465893507003784, "learning_rate": 9.023063009336487e-06, "loss": 0.7377, "step": 6293 }, { "epoch": 1.1319787827025083, "grad_norm": 1.3326529264450073, "learning_rate": 9.022717138877397e-06, "loss": 0.7711, "step": 6294 }, { "epoch": 1.13215859030837, "grad_norm": 1.4564849138259888, "learning_rate": 9.022371213835313e-06, "loss": 0.7432, "step": 6295 }, { "epoch": 1.1323383979142319, "grad_norm": 1.5166972875595093, "learning_rate": 9.022025234214928e-06, "loss": 0.6705, "step": 6296 }, { "epoch": 1.1325182055200935, "grad_norm": 1.494554877281189, "learning_rate": 9.02167920002094e-06, "loss": 0.7984, "step": 6297 }, { "epoch": 1.1326980131259552, "grad_norm": 1.2057157754898071, "learning_rate": 9.021333111258042e-06, "loss": 0.9211, "step": 6298 }, { "epoch": 1.1328778207318169, "grad_norm": 1.7019000053405762, "learning_rate": 9.02098696793093e-06, "loss": 0.7208, "step": 6299 }, { "epoch": 1.1330576283376788, "grad_norm": 1.4760260581970215, "learning_rate": 9.0206407700443e-06, "loss": 0.7342, "step": 6300 }, { "epoch": 1.1332374359435404, "grad_norm": 1.6165964603424072, "learning_rate": 9.020294517602853e-06, "loss": 0.7967, "step": 6301 }, { "epoch": 1.133417243549402, "grad_norm": 1.517875075340271, "learning_rate": 9.01994821061128e-06, "loss": 0.7543, "step": 6302 }, { "epoch": 1.133597051155264, "grad_norm": 1.5405638217926025, "learning_rate": 9.019601849074288e-06, "loss": 0.741, "step": 6303 }, { "epoch": 1.1337768587611257, "grad_norm": 1.1781654357910156, "learning_rate": 9.019255432996574e-06, "loss": 0.9951, "step": 6304 }, { "epoch": 1.1339566663669873, "grad_norm": 1.6149601936340332, "learning_rate": 9.018908962382835e-06, "loss": 0.7456, "step": 6305 }, { "epoch": 1.134136473972849, "grad_norm": 1.045628309249878, "learning_rate": 9.018562437237777e-06, "loss": 0.987, "step": 6306 }, { "epoch": 1.1343162815787107, "grad_norm": 1.1838105916976929, "learning_rate": 9.018215857566097e-06, "loss": 0.9463, "step": 6307 }, { "epoch": 1.1344960891845726, "grad_norm": 1.415712594985962, "learning_rate": 9.017869223372503e-06, "loss": 0.6673, "step": 6308 }, { "epoch": 1.1346758967904342, "grad_norm": 1.6075446605682373, "learning_rate": 9.017522534661694e-06, "loss": 0.7194, "step": 6309 }, { "epoch": 1.134855704396296, "grad_norm": 1.4752095937728882, "learning_rate": 9.017175791438376e-06, "loss": 0.6835, "step": 6310 }, { "epoch": 1.1350355120021578, "grad_norm": 1.5694079399108887, "learning_rate": 9.016828993707254e-06, "loss": 0.6892, "step": 6311 }, { "epoch": 1.1352153196080195, "grad_norm": 1.2483851909637451, "learning_rate": 9.016482141473032e-06, "loss": 0.9614, "step": 6312 }, { "epoch": 1.1353951272138811, "grad_norm": 1.427740216255188, "learning_rate": 9.016135234740418e-06, "loss": 0.7722, "step": 6313 }, { "epoch": 1.1355749348197428, "grad_norm": 1.4149354696273804, "learning_rate": 9.01578827351412e-06, "loss": 0.7495, "step": 6314 }, { "epoch": 1.1357547424256045, "grad_norm": 1.4413256645202637, "learning_rate": 9.015441257798842e-06, "loss": 0.7191, "step": 6315 }, { "epoch": 1.1359345500314664, "grad_norm": 1.209676742553711, "learning_rate": 9.015094187599297e-06, "loss": 0.9534, "step": 6316 }, { "epoch": 1.136114357637328, "grad_norm": 1.736029863357544, "learning_rate": 9.014747062920191e-06, "loss": 0.7595, "step": 6317 }, { "epoch": 1.1362941652431897, "grad_norm": 1.527138352394104, "learning_rate": 9.014399883766235e-06, "loss": 0.7185, "step": 6318 }, { "epoch": 1.1364739728490516, "grad_norm": 1.6005123853683472, "learning_rate": 9.014052650142142e-06, "loss": 0.772, "step": 6319 }, { "epoch": 1.1366537804549133, "grad_norm": 1.549475908279419, "learning_rate": 9.01370536205262e-06, "loss": 0.7583, "step": 6320 }, { "epoch": 1.136833588060775, "grad_norm": 1.4783954620361328, "learning_rate": 9.013358019502382e-06, "loss": 0.7409, "step": 6321 }, { "epoch": 1.1370133956666366, "grad_norm": 1.4979958534240723, "learning_rate": 9.013010622496145e-06, "loss": 0.7639, "step": 6322 }, { "epoch": 1.1371932032724985, "grad_norm": 1.5043976306915283, "learning_rate": 9.012663171038617e-06, "loss": 0.7689, "step": 6323 }, { "epoch": 1.1373730108783602, "grad_norm": 1.5762308835983276, "learning_rate": 9.012315665134515e-06, "loss": 0.7954, "step": 6324 }, { "epoch": 1.1375528184842219, "grad_norm": 1.5798472166061401, "learning_rate": 9.011968104788554e-06, "loss": 0.775, "step": 6325 }, { "epoch": 1.1377326260900835, "grad_norm": 1.3673338890075684, "learning_rate": 9.01162049000545e-06, "loss": 0.6792, "step": 6326 }, { "epoch": 1.1379124336959454, "grad_norm": 1.0737022161483765, "learning_rate": 9.01127282078992e-06, "loss": 0.9582, "step": 6327 }, { "epoch": 1.138092241301807, "grad_norm": 1.4922387599945068, "learning_rate": 9.010925097146682e-06, "loss": 0.6607, "step": 6328 }, { "epoch": 1.1382720489076688, "grad_norm": 1.064526081085205, "learning_rate": 9.010577319080452e-06, "loss": 0.9297, "step": 6329 }, { "epoch": 1.1384518565135306, "grad_norm": 1.5564275979995728, "learning_rate": 9.010229486595952e-06, "loss": 0.7487, "step": 6330 }, { "epoch": 1.1386316641193923, "grad_norm": 1.3876316547393799, "learning_rate": 9.009881599697898e-06, "loss": 0.7006, "step": 6331 }, { "epoch": 1.138811471725254, "grad_norm": 1.4533450603485107, "learning_rate": 9.009533658391013e-06, "loss": 0.7529, "step": 6332 }, { "epoch": 1.1389912793311157, "grad_norm": 1.532545804977417, "learning_rate": 9.009185662680018e-06, "loss": 0.7679, "step": 6333 }, { "epoch": 1.1391710869369773, "grad_norm": 1.1606694459915161, "learning_rate": 9.008837612569632e-06, "loss": 0.9643, "step": 6334 }, { "epoch": 1.1393508945428392, "grad_norm": 1.4949246644973755, "learning_rate": 9.008489508064582e-06, "loss": 0.7494, "step": 6335 }, { "epoch": 1.139530702148701, "grad_norm": 1.7089658975601196, "learning_rate": 9.008141349169588e-06, "loss": 0.7532, "step": 6336 }, { "epoch": 1.1397105097545626, "grad_norm": 1.4821298122406006, "learning_rate": 9.007793135889375e-06, "loss": 0.7255, "step": 6337 }, { "epoch": 1.1398903173604245, "grad_norm": 1.5189917087554932, "learning_rate": 9.00744486822867e-06, "loss": 0.7818, "step": 6338 }, { "epoch": 1.1400701249662861, "grad_norm": 1.4256407022476196, "learning_rate": 9.007096546192194e-06, "loss": 0.7772, "step": 6339 }, { "epoch": 1.1402499325721478, "grad_norm": 1.4663987159729004, "learning_rate": 9.006748169784675e-06, "loss": 0.7755, "step": 6340 }, { "epoch": 1.1404297401780095, "grad_norm": 1.0551996231079102, "learning_rate": 9.006399739010842e-06, "loss": 0.9877, "step": 6341 }, { "epoch": 1.1406095477838711, "grad_norm": 1.5508226156234741, "learning_rate": 9.006051253875421e-06, "loss": 0.7415, "step": 6342 }, { "epoch": 1.140789355389733, "grad_norm": 1.3328473567962646, "learning_rate": 9.005702714383142e-06, "loss": 0.6497, "step": 6343 }, { "epoch": 1.1409691629955947, "grad_norm": 1.5724514722824097, "learning_rate": 9.005354120538732e-06, "loss": 0.7692, "step": 6344 }, { "epoch": 1.1411489706014564, "grad_norm": 1.5037415027618408, "learning_rate": 9.005005472346923e-06, "loss": 0.7625, "step": 6345 }, { "epoch": 1.1413287782073183, "grad_norm": 1.0965710878372192, "learning_rate": 9.004656769812445e-06, "loss": 0.9711, "step": 6346 }, { "epoch": 1.14150858581318, "grad_norm": 1.480493426322937, "learning_rate": 9.004308012940029e-06, "loss": 0.725, "step": 6347 }, { "epoch": 1.1416883934190416, "grad_norm": 1.566947340965271, "learning_rate": 9.003959201734408e-06, "loss": 0.8178, "step": 6348 }, { "epoch": 1.1418682010249033, "grad_norm": 1.697932481765747, "learning_rate": 9.003610336200315e-06, "loss": 0.7791, "step": 6349 }, { "epoch": 1.1420480086307652, "grad_norm": 1.5423221588134766, "learning_rate": 9.003261416342481e-06, "loss": 0.7133, "step": 6350 }, { "epoch": 1.1422278162366268, "grad_norm": 1.4988852739334106, "learning_rate": 9.002912442165643e-06, "loss": 0.7025, "step": 6351 }, { "epoch": 1.1424076238424885, "grad_norm": 1.4462140798568726, "learning_rate": 9.002563413674537e-06, "loss": 0.7195, "step": 6352 }, { "epoch": 1.1425874314483502, "grad_norm": 1.4544931650161743, "learning_rate": 9.002214330873895e-06, "loss": 0.6988, "step": 6353 }, { "epoch": 1.142767239054212, "grad_norm": 1.3593909740447998, "learning_rate": 9.001865193768458e-06, "loss": 0.751, "step": 6354 }, { "epoch": 1.1429470466600737, "grad_norm": 1.3536213636398315, "learning_rate": 9.00151600236296e-06, "loss": 0.691, "step": 6355 }, { "epoch": 1.1431268542659354, "grad_norm": 1.5394580364227295, "learning_rate": 9.001166756662141e-06, "loss": 0.7943, "step": 6356 }, { "epoch": 1.143306661871797, "grad_norm": 1.1580870151519775, "learning_rate": 9.00081745667074e-06, "loss": 0.9599, "step": 6357 }, { "epoch": 1.143486469477659, "grad_norm": 1.5195425748825073, "learning_rate": 9.000468102393494e-06, "loss": 0.7542, "step": 6358 }, { "epoch": 1.1436662770835206, "grad_norm": 1.4347048997879028, "learning_rate": 9.000118693835146e-06, "loss": 0.7079, "step": 6359 }, { "epoch": 1.1438460846893823, "grad_norm": 1.514960765838623, "learning_rate": 8.999769231000435e-06, "loss": 0.7429, "step": 6360 }, { "epoch": 1.144025892295244, "grad_norm": 1.453700304031372, "learning_rate": 8.999419713894106e-06, "loss": 0.758, "step": 6361 }, { "epoch": 1.1442056999011059, "grad_norm": 1.4497156143188477, "learning_rate": 8.999070142520898e-06, "loss": 0.7165, "step": 6362 }, { "epoch": 1.1443855075069675, "grad_norm": 1.152388334274292, "learning_rate": 8.998720516885555e-06, "loss": 0.9768, "step": 6363 }, { "epoch": 1.1445653151128292, "grad_norm": 1.549386978149414, "learning_rate": 8.998370836992821e-06, "loss": 0.7396, "step": 6364 }, { "epoch": 1.144745122718691, "grad_norm": 1.759982705116272, "learning_rate": 8.998021102847444e-06, "loss": 0.7117, "step": 6365 }, { "epoch": 1.1449249303245528, "grad_norm": 1.6085608005523682, "learning_rate": 8.997671314454164e-06, "loss": 0.7884, "step": 6366 }, { "epoch": 1.1451047379304145, "grad_norm": 1.6849887371063232, "learning_rate": 8.99732147181773e-06, "loss": 0.6921, "step": 6367 }, { "epoch": 1.1452845455362761, "grad_norm": 1.5332591533660889, "learning_rate": 8.996971574942887e-06, "loss": 0.7764, "step": 6368 }, { "epoch": 1.1454643531421378, "grad_norm": 1.488213062286377, "learning_rate": 8.996621623834387e-06, "loss": 0.7585, "step": 6369 }, { "epoch": 1.1456441607479997, "grad_norm": 1.4324613809585571, "learning_rate": 8.996271618496976e-06, "loss": 0.7268, "step": 6370 }, { "epoch": 1.1458239683538614, "grad_norm": 1.4700508117675781, "learning_rate": 8.9959215589354e-06, "loss": 0.723, "step": 6371 }, { "epoch": 1.146003775959723, "grad_norm": 1.616408348083496, "learning_rate": 8.995571445154414e-06, "loss": 0.7677, "step": 6372 }, { "epoch": 1.146183583565585, "grad_norm": 1.5138301849365234, "learning_rate": 8.995221277158766e-06, "loss": 0.7244, "step": 6373 }, { "epoch": 1.1463633911714466, "grad_norm": 1.603481411933899, "learning_rate": 8.994871054953207e-06, "loss": 0.8006, "step": 6374 }, { "epoch": 1.1465431987773083, "grad_norm": 1.5575319528579712, "learning_rate": 8.99452077854249e-06, "loss": 0.7562, "step": 6375 }, { "epoch": 1.14672300638317, "grad_norm": 1.581910490989685, "learning_rate": 8.994170447931367e-06, "loss": 0.7587, "step": 6376 }, { "epoch": 1.1469028139890318, "grad_norm": 1.534547209739685, "learning_rate": 8.993820063124592e-06, "loss": 0.7972, "step": 6377 }, { "epoch": 1.1470826215948935, "grad_norm": 1.5971364974975586, "learning_rate": 8.99346962412692e-06, "loss": 0.6981, "step": 6378 }, { "epoch": 1.1472624292007552, "grad_norm": 1.6731902360916138, "learning_rate": 8.993119130943103e-06, "loss": 0.7653, "step": 6379 }, { "epoch": 1.1474422368066168, "grad_norm": 1.0813857316970825, "learning_rate": 8.992768583577902e-06, "loss": 0.9884, "step": 6380 }, { "epoch": 1.1476220444124787, "grad_norm": 1.1106051206588745, "learning_rate": 8.992417982036067e-06, "loss": 0.9399, "step": 6381 }, { "epoch": 1.1478018520183404, "grad_norm": 1.5840046405792236, "learning_rate": 8.992067326322363e-06, "loss": 0.7766, "step": 6382 }, { "epoch": 1.147981659624202, "grad_norm": 1.5869715213775635, "learning_rate": 8.991716616441539e-06, "loss": 0.7426, "step": 6383 }, { "epoch": 1.1481614672300637, "grad_norm": 1.447065830230713, "learning_rate": 8.99136585239836e-06, "loss": 0.7922, "step": 6384 }, { "epoch": 1.1483412748359256, "grad_norm": 1.479609727859497, "learning_rate": 8.991015034197585e-06, "loss": 0.6938, "step": 6385 }, { "epoch": 1.1485210824417873, "grad_norm": 1.5305298566818237, "learning_rate": 8.990664161843971e-06, "loss": 0.7622, "step": 6386 }, { "epoch": 1.148700890047649, "grad_norm": 1.4695146083831787, "learning_rate": 8.99031323534228e-06, "loss": 0.8463, "step": 6387 }, { "epoch": 1.1488806976535106, "grad_norm": 1.5582877397537231, "learning_rate": 8.989962254697276e-06, "loss": 0.7898, "step": 6388 }, { "epoch": 1.1490605052593725, "grad_norm": 1.459946870803833, "learning_rate": 8.989611219913719e-06, "loss": 0.7655, "step": 6389 }, { "epoch": 1.1492403128652342, "grad_norm": 1.3213319778442383, "learning_rate": 8.989260130996372e-06, "loss": 0.9665, "step": 6390 }, { "epoch": 1.1494201204710959, "grad_norm": 1.5497796535491943, "learning_rate": 8.98890898795e-06, "loss": 0.7784, "step": 6391 }, { "epoch": 1.1495999280769578, "grad_norm": 1.3993417024612427, "learning_rate": 8.988557790779366e-06, "loss": 0.6986, "step": 6392 }, { "epoch": 1.1497797356828194, "grad_norm": 1.3853216171264648, "learning_rate": 8.988206539489238e-06, "loss": 0.7414, "step": 6393 }, { "epoch": 1.149959543288681, "grad_norm": 1.4633437395095825, "learning_rate": 8.98785523408438e-06, "loss": 0.76, "step": 6394 }, { "epoch": 1.1501393508945428, "grad_norm": 1.4897743463516235, "learning_rate": 8.987503874569558e-06, "loss": 0.6925, "step": 6395 }, { "epoch": 1.1503191585004044, "grad_norm": 1.5350232124328613, "learning_rate": 8.987152460949543e-06, "loss": 0.7668, "step": 6396 }, { "epoch": 1.1504989661062663, "grad_norm": 1.5200618505477905, "learning_rate": 8.986800993229098e-06, "loss": 0.7382, "step": 6397 }, { "epoch": 1.150678773712128, "grad_norm": 1.3539098501205444, "learning_rate": 8.986449471412995e-06, "loss": 0.7645, "step": 6398 }, { "epoch": 1.1508585813179897, "grad_norm": 1.1297677755355835, "learning_rate": 8.986097895506006e-06, "loss": 0.9586, "step": 6399 }, { "epoch": 1.1510383889238516, "grad_norm": 1.7270989418029785, "learning_rate": 8.985746265512896e-06, "loss": 0.7859, "step": 6400 }, { "epoch": 1.1512181965297132, "grad_norm": 1.478249192237854, "learning_rate": 8.98539458143844e-06, "loss": 0.7132, "step": 6401 }, { "epoch": 1.151398004135575, "grad_norm": 1.5121362209320068, "learning_rate": 8.98504284328741e-06, "loss": 0.7672, "step": 6402 }, { "epoch": 1.1515778117414366, "grad_norm": 1.5938773155212402, "learning_rate": 8.984691051064576e-06, "loss": 0.7333, "step": 6403 }, { "epoch": 1.1517576193472985, "grad_norm": 1.383040189743042, "learning_rate": 8.984339204774714e-06, "loss": 0.7332, "step": 6404 }, { "epoch": 1.1519374269531601, "grad_norm": 1.4211781024932861, "learning_rate": 8.983987304422596e-06, "loss": 0.7408, "step": 6405 }, { "epoch": 1.1521172345590218, "grad_norm": 1.5844331979751587, "learning_rate": 8.983635350012998e-06, "loss": 0.8092, "step": 6406 }, { "epoch": 1.1522970421648835, "grad_norm": 1.4613136053085327, "learning_rate": 8.983283341550696e-06, "loss": 0.7782, "step": 6407 }, { "epoch": 1.1524768497707454, "grad_norm": 1.3887203931808472, "learning_rate": 8.982931279040466e-06, "loss": 0.7851, "step": 6408 }, { "epoch": 1.152656657376607, "grad_norm": 1.9697954654693604, "learning_rate": 8.982579162487084e-06, "loss": 0.7721, "step": 6409 }, { "epoch": 1.1528364649824687, "grad_norm": 1.409052848815918, "learning_rate": 8.982226991895327e-06, "loss": 0.7089, "step": 6410 }, { "epoch": 1.1530162725883304, "grad_norm": 1.443352222442627, "learning_rate": 8.981874767269977e-06, "loss": 0.7007, "step": 6411 }, { "epoch": 1.1531960801941923, "grad_norm": 1.523354411125183, "learning_rate": 8.98152248861581e-06, "loss": 0.7604, "step": 6412 }, { "epoch": 1.153375887800054, "grad_norm": 1.532335638999939, "learning_rate": 8.981170155937608e-06, "loss": 0.7578, "step": 6413 }, { "epoch": 1.1535556954059156, "grad_norm": 1.4754304885864258, "learning_rate": 8.98081776924015e-06, "loss": 0.7256, "step": 6414 }, { "epoch": 1.1537355030117773, "grad_norm": 1.5276395082473755, "learning_rate": 8.98046532852822e-06, "loss": 0.7479, "step": 6415 }, { "epoch": 1.1539153106176392, "grad_norm": 1.7323393821716309, "learning_rate": 8.980112833806597e-06, "loss": 0.7834, "step": 6416 }, { "epoch": 1.1540951182235009, "grad_norm": 1.4957669973373413, "learning_rate": 8.979760285080066e-06, "loss": 0.7548, "step": 6417 }, { "epoch": 1.1542749258293625, "grad_norm": 2.4623847007751465, "learning_rate": 8.97940768235341e-06, "loss": 0.7243, "step": 6418 }, { "epoch": 1.1544547334352244, "grad_norm": 1.4736359119415283, "learning_rate": 8.97905502563141e-06, "loss": 0.738, "step": 6419 }, { "epoch": 1.154634541041086, "grad_norm": 1.5063014030456543, "learning_rate": 8.978702314918859e-06, "loss": 0.7979, "step": 6420 }, { "epoch": 1.1548143486469478, "grad_norm": 1.7705843448638916, "learning_rate": 8.978349550220535e-06, "loss": 0.7779, "step": 6421 }, { "epoch": 1.1549941562528094, "grad_norm": 1.1035112142562866, "learning_rate": 8.97799673154123e-06, "loss": 0.9299, "step": 6422 }, { "epoch": 1.155173963858671, "grad_norm": 1.1643965244293213, "learning_rate": 8.977643858885728e-06, "loss": 0.9726, "step": 6423 }, { "epoch": 1.155353771464533, "grad_norm": 1.4687880277633667, "learning_rate": 8.977290932258818e-06, "loss": 0.7753, "step": 6424 }, { "epoch": 1.1555335790703947, "grad_norm": 1.4199472665786743, "learning_rate": 8.976937951665289e-06, "loss": 0.6564, "step": 6425 }, { "epoch": 1.1557133866762563, "grad_norm": 1.5644477605819702, "learning_rate": 8.976584917109929e-06, "loss": 0.8113, "step": 6426 }, { "epoch": 1.1558931942821182, "grad_norm": 1.6113569736480713, "learning_rate": 8.976231828597531e-06, "loss": 0.7385, "step": 6427 }, { "epoch": 1.15607300188798, "grad_norm": 1.6754951477050781, "learning_rate": 8.975878686132884e-06, "loss": 0.7459, "step": 6428 }, { "epoch": 1.1562528094938416, "grad_norm": 1.4719396829605103, "learning_rate": 8.97552548972078e-06, "loss": 0.7568, "step": 6429 }, { "epoch": 1.1564326170997032, "grad_norm": 1.5181996822357178, "learning_rate": 8.975172239366012e-06, "loss": 0.697, "step": 6430 }, { "epoch": 1.1566124247055651, "grad_norm": 1.5038026571273804, "learning_rate": 8.974818935073372e-06, "loss": 0.7235, "step": 6431 }, { "epoch": 1.1567922323114268, "grad_norm": 1.5822889804840088, "learning_rate": 8.974465576847655e-06, "loss": 0.728, "step": 6432 }, { "epoch": 1.1569720399172885, "grad_norm": 1.5563563108444214, "learning_rate": 8.974112164693656e-06, "loss": 0.7023, "step": 6433 }, { "epoch": 1.1571518475231501, "grad_norm": 1.1462855339050293, "learning_rate": 8.973758698616168e-06, "loss": 1.0109, "step": 6434 }, { "epoch": 1.157331655129012, "grad_norm": 1.7359225749969482, "learning_rate": 8.973405178619989e-06, "loss": 0.7431, "step": 6435 }, { "epoch": 1.1575114627348737, "grad_norm": 1.5932167768478394, "learning_rate": 8.973051604709918e-06, "loss": 0.8254, "step": 6436 }, { "epoch": 1.1576912703407354, "grad_norm": 1.532837152481079, "learning_rate": 8.972697976890745e-06, "loss": 0.7801, "step": 6437 }, { "epoch": 1.157871077946597, "grad_norm": 1.4404722452163696, "learning_rate": 8.972344295167276e-06, "loss": 0.7601, "step": 6438 }, { "epoch": 1.158050885552459, "grad_norm": 1.1178492307662964, "learning_rate": 8.971990559544308e-06, "loss": 0.9862, "step": 6439 }, { "epoch": 1.1582306931583206, "grad_norm": 1.3863366842269897, "learning_rate": 8.971636770026638e-06, "loss": 0.7244, "step": 6440 }, { "epoch": 1.1584105007641823, "grad_norm": 1.4021798372268677, "learning_rate": 8.97128292661907e-06, "loss": 0.7485, "step": 6441 }, { "epoch": 1.158590308370044, "grad_norm": 1.4632866382598877, "learning_rate": 8.970929029326402e-06, "loss": 0.7627, "step": 6442 }, { "epoch": 1.1587701159759058, "grad_norm": 1.0930688381195068, "learning_rate": 8.970575078153438e-06, "loss": 0.9421, "step": 6443 }, { "epoch": 1.1589499235817675, "grad_norm": 1.4482998847961426, "learning_rate": 8.97022107310498e-06, "loss": 0.664, "step": 6444 }, { "epoch": 1.1591297311876292, "grad_norm": 1.475741982460022, "learning_rate": 8.969867014185832e-06, "loss": 0.7482, "step": 6445 }, { "epoch": 1.159309538793491, "grad_norm": 1.443737268447876, "learning_rate": 8.969512901400798e-06, "loss": 0.738, "step": 6446 }, { "epoch": 1.1594893463993527, "grad_norm": 1.5233372449874878, "learning_rate": 8.969158734754682e-06, "loss": 0.6995, "step": 6447 }, { "epoch": 1.1596691540052144, "grad_norm": 1.4841737747192383, "learning_rate": 8.96880451425229e-06, "loss": 0.8066, "step": 6448 }, { "epoch": 1.159848961611076, "grad_norm": 1.4382072687149048, "learning_rate": 8.968450239898427e-06, "loss": 0.7128, "step": 6449 }, { "epoch": 1.1600287692169378, "grad_norm": 1.648695468902588, "learning_rate": 8.968095911697903e-06, "loss": 0.7382, "step": 6450 }, { "epoch": 1.1602085768227997, "grad_norm": 1.5904351472854614, "learning_rate": 8.967741529655525e-06, "loss": 0.7085, "step": 6451 }, { "epoch": 1.1603883844286613, "grad_norm": 1.4839104413986206, "learning_rate": 8.9673870937761e-06, "loss": 0.7499, "step": 6452 }, { "epoch": 1.160568192034523, "grad_norm": 1.463578224182129, "learning_rate": 8.967032604064436e-06, "loss": 0.7354, "step": 6453 }, { "epoch": 1.1607479996403849, "grad_norm": 1.462020754814148, "learning_rate": 8.966678060525347e-06, "loss": 0.7616, "step": 6454 }, { "epoch": 1.1609278072462466, "grad_norm": 1.4865301847457886, "learning_rate": 8.96632346316364e-06, "loss": 0.7111, "step": 6455 }, { "epoch": 1.1611076148521082, "grad_norm": 1.2316060066223145, "learning_rate": 8.96596881198413e-06, "loss": 0.9569, "step": 6456 }, { "epoch": 1.16128742245797, "grad_norm": 1.6287230253219604, "learning_rate": 8.965614106991624e-06, "loss": 0.7321, "step": 6457 }, { "epoch": 1.1614672300638318, "grad_norm": 1.0901273488998413, "learning_rate": 8.96525934819094e-06, "loss": 0.9785, "step": 6458 }, { "epoch": 1.1616470376696935, "grad_norm": 1.52635657787323, "learning_rate": 8.964904535586888e-06, "loss": 0.7297, "step": 6459 }, { "epoch": 1.1618268452755551, "grad_norm": 1.4775880575180054, "learning_rate": 8.964549669184286e-06, "loss": 0.7447, "step": 6460 }, { "epoch": 1.1620066528814168, "grad_norm": 1.4416303634643555, "learning_rate": 8.964194748987948e-06, "loss": 0.7697, "step": 6461 }, { "epoch": 1.1621864604872787, "grad_norm": 1.462375283241272, "learning_rate": 8.963839775002687e-06, "loss": 0.7762, "step": 6462 }, { "epoch": 1.1623662680931404, "grad_norm": 1.4649354219436646, "learning_rate": 8.963484747233322e-06, "loss": 0.7513, "step": 6463 }, { "epoch": 1.162546075699002, "grad_norm": 1.5597448348999023, "learning_rate": 8.963129665684669e-06, "loss": 0.8002, "step": 6464 }, { "epoch": 1.1627258833048637, "grad_norm": 1.4062803983688354, "learning_rate": 8.962774530361547e-06, "loss": 0.743, "step": 6465 }, { "epoch": 1.1629056909107256, "grad_norm": 1.4137769937515259, "learning_rate": 8.962419341268773e-06, "loss": 0.7436, "step": 6466 }, { "epoch": 1.1630854985165873, "grad_norm": 1.4883010387420654, "learning_rate": 8.96206409841117e-06, "loss": 0.6994, "step": 6467 }, { "epoch": 1.163265306122449, "grad_norm": 1.3663578033447266, "learning_rate": 8.961708801793554e-06, "loss": 0.9399, "step": 6468 }, { "epoch": 1.1634451137283106, "grad_norm": 1.154111385345459, "learning_rate": 8.96135345142075e-06, "loss": 0.9335, "step": 6469 }, { "epoch": 1.1636249213341725, "grad_norm": 1.5168696641921997, "learning_rate": 8.960998047297575e-06, "loss": 0.7915, "step": 6470 }, { "epoch": 1.1638047289400342, "grad_norm": 1.0338951349258423, "learning_rate": 8.960642589428856e-06, "loss": 0.9223, "step": 6471 }, { "epoch": 1.1639845365458958, "grad_norm": 1.6020283699035645, "learning_rate": 8.960287077819411e-06, "loss": 0.8061, "step": 6472 }, { "epoch": 1.1641643441517577, "grad_norm": 1.5704867839813232, "learning_rate": 8.95993151247407e-06, "loss": 0.7599, "step": 6473 }, { "epoch": 1.1643441517576194, "grad_norm": 1.5210720300674438, "learning_rate": 8.959575893397653e-06, "loss": 0.7669, "step": 6474 }, { "epoch": 1.164523959363481, "grad_norm": 1.759217619895935, "learning_rate": 8.959220220594988e-06, "loss": 0.7583, "step": 6475 }, { "epoch": 1.1647037669693427, "grad_norm": 1.1224690675735474, "learning_rate": 8.958864494070898e-06, "loss": 0.9786, "step": 6476 }, { "epoch": 1.1648835745752044, "grad_norm": 1.4301815032958984, "learning_rate": 8.958508713830212e-06, "loss": 0.6895, "step": 6477 }, { "epoch": 1.1650633821810663, "grad_norm": 1.5165250301361084, "learning_rate": 8.958152879877756e-06, "loss": 0.7663, "step": 6478 }, { "epoch": 1.165243189786928, "grad_norm": 1.5430775880813599, "learning_rate": 8.95779699221836e-06, "loss": 0.7195, "step": 6479 }, { "epoch": 1.1654229973927897, "grad_norm": 1.517203450202942, "learning_rate": 8.957441050856851e-06, "loss": 0.8466, "step": 6480 }, { "epoch": 1.1656028049986515, "grad_norm": 1.5423306226730347, "learning_rate": 8.95708505579806e-06, "loss": 0.6967, "step": 6481 }, { "epoch": 1.1657826126045132, "grad_norm": 1.4121513366699219, "learning_rate": 8.956729007046819e-06, "loss": 0.6917, "step": 6482 }, { "epoch": 1.1659624202103749, "grad_norm": 1.5025116205215454, "learning_rate": 8.956372904607955e-06, "loss": 0.7442, "step": 6483 }, { "epoch": 1.1661422278162366, "grad_norm": 1.5187016725540161, "learning_rate": 8.956016748486302e-06, "loss": 0.7714, "step": 6484 }, { "epoch": 1.1663220354220984, "grad_norm": 1.3682340383529663, "learning_rate": 8.955660538686693e-06, "loss": 0.6563, "step": 6485 }, { "epoch": 1.1665018430279601, "grad_norm": 1.5272777080535889, "learning_rate": 8.955304275213962e-06, "loss": 0.7698, "step": 6486 }, { "epoch": 1.1666816506338218, "grad_norm": 1.5104188919067383, "learning_rate": 8.95494795807294e-06, "loss": 0.755, "step": 6487 }, { "epoch": 1.1668614582396835, "grad_norm": 1.786109447479248, "learning_rate": 8.954591587268465e-06, "loss": 0.7861, "step": 6488 }, { "epoch": 1.1670412658455454, "grad_norm": 1.5391632318496704, "learning_rate": 8.95423516280537e-06, "loss": 0.7827, "step": 6489 }, { "epoch": 1.167221073451407, "grad_norm": 1.4975991249084473, "learning_rate": 8.953878684688492e-06, "loss": 0.7842, "step": 6490 }, { "epoch": 1.1674008810572687, "grad_norm": 1.4300808906555176, "learning_rate": 8.953522152922671e-06, "loss": 0.7347, "step": 6491 }, { "epoch": 1.1675806886631304, "grad_norm": 1.2034339904785156, "learning_rate": 8.95316556751274e-06, "loss": 0.9922, "step": 6492 }, { "epoch": 1.1677604962689923, "grad_norm": 1.6311107873916626, "learning_rate": 8.952808928463539e-06, "loss": 0.777, "step": 6493 }, { "epoch": 1.167940303874854, "grad_norm": 1.535439372062683, "learning_rate": 8.95245223577991e-06, "loss": 0.6487, "step": 6494 }, { "epoch": 1.1681201114807156, "grad_norm": 1.500845193862915, "learning_rate": 8.952095489466687e-06, "loss": 0.8273, "step": 6495 }, { "epoch": 1.1682999190865773, "grad_norm": 1.479170799255371, "learning_rate": 8.951738689528716e-06, "loss": 0.6995, "step": 6496 }, { "epoch": 1.1684797266924392, "grad_norm": 1.5384619235992432, "learning_rate": 8.951381835970834e-06, "loss": 0.7278, "step": 6497 }, { "epoch": 1.1686595342983008, "grad_norm": 1.6464072465896606, "learning_rate": 8.951024928797887e-06, "loss": 0.7906, "step": 6498 }, { "epoch": 1.1688393419041625, "grad_norm": 1.5951217412948608, "learning_rate": 8.950667968014716e-06, "loss": 0.8086, "step": 6499 }, { "epoch": 1.1690191495100244, "grad_norm": 1.427729845046997, "learning_rate": 8.950310953626164e-06, "loss": 0.6714, "step": 6500 }, { "epoch": 1.1690191495100244, "eval_loss": 0.8116524815559387, "eval_runtime": 148.6462, "eval_samples_per_second": 96.753, "eval_steps_per_second": 1.514, "step": 6500 }, { "epoch": 1.169198957115886, "grad_norm": 1.572485089302063, "learning_rate": 8.949953885637076e-06, "loss": 0.7808, "step": 6501 }, { "epoch": 1.1693787647217477, "grad_norm": 1.5069389343261719, "learning_rate": 8.949596764052296e-06, "loss": 0.7487, "step": 6502 }, { "epoch": 1.1695585723276094, "grad_norm": 1.027882695198059, "learning_rate": 8.949239588876672e-06, "loss": 0.9685, "step": 6503 }, { "epoch": 1.169738379933471, "grad_norm": 1.4307336807250977, "learning_rate": 8.948882360115047e-06, "loss": 0.7285, "step": 6504 }, { "epoch": 1.169918187539333, "grad_norm": 1.3898361921310425, "learning_rate": 8.94852507777227e-06, "loss": 0.7178, "step": 6505 }, { "epoch": 1.1700979951451946, "grad_norm": 1.483737587928772, "learning_rate": 8.948167741853188e-06, "loss": 0.7728, "step": 6506 }, { "epoch": 1.1702778027510563, "grad_norm": 1.4409065246582031, "learning_rate": 8.947810352362653e-06, "loss": 0.7149, "step": 6507 }, { "epoch": 1.1704576103569182, "grad_norm": 1.4791942834854126, "learning_rate": 8.94745290930551e-06, "loss": 0.7347, "step": 6508 }, { "epoch": 1.1706374179627799, "grad_norm": 1.537038803100586, "learning_rate": 8.947095412686611e-06, "loss": 0.7897, "step": 6509 }, { "epoch": 1.1708172255686415, "grad_norm": 1.0675833225250244, "learning_rate": 8.946737862510805e-06, "loss": 0.9696, "step": 6510 }, { "epoch": 1.1709970331745032, "grad_norm": 1.4447758197784424, "learning_rate": 8.946380258782945e-06, "loss": 0.7707, "step": 6511 }, { "epoch": 1.171176840780365, "grad_norm": 1.4620431661605835, "learning_rate": 8.946022601507885e-06, "loss": 0.7584, "step": 6512 }, { "epoch": 1.1713566483862268, "grad_norm": 1.6993253231048584, "learning_rate": 8.945664890690475e-06, "loss": 0.778, "step": 6513 }, { "epoch": 1.1715364559920884, "grad_norm": 1.476387858390808, "learning_rate": 8.94530712633557e-06, "loss": 0.7328, "step": 6514 }, { "epoch": 1.1717162635979501, "grad_norm": 1.5236213207244873, "learning_rate": 8.944949308448024e-06, "loss": 0.7206, "step": 6515 }, { "epoch": 1.171896071203812, "grad_norm": 1.1160601377487183, "learning_rate": 8.94459143703269e-06, "loss": 0.972, "step": 6516 }, { "epoch": 1.1720758788096737, "grad_norm": 1.5624617338180542, "learning_rate": 8.94423351209443e-06, "loss": 0.7413, "step": 6517 }, { "epoch": 1.1722556864155353, "grad_norm": 1.4048669338226318, "learning_rate": 8.943875533638093e-06, "loss": 0.7753, "step": 6518 }, { "epoch": 1.172435494021397, "grad_norm": 1.4711663722991943, "learning_rate": 8.943517501668541e-06, "loss": 0.7372, "step": 6519 }, { "epoch": 1.172615301627259, "grad_norm": 1.5233043432235718, "learning_rate": 8.943159416190632e-06, "loss": 0.731, "step": 6520 }, { "epoch": 1.1727951092331206, "grad_norm": 1.578395128250122, "learning_rate": 8.942801277209223e-06, "loss": 0.7619, "step": 6521 }, { "epoch": 1.1729749168389823, "grad_norm": 1.4601988792419434, "learning_rate": 8.942443084729174e-06, "loss": 0.7931, "step": 6522 }, { "epoch": 1.173154724444844, "grad_norm": 1.506011724472046, "learning_rate": 8.942084838755346e-06, "loss": 0.7392, "step": 6523 }, { "epoch": 1.1733345320507058, "grad_norm": 1.6898012161254883, "learning_rate": 8.941726539292598e-06, "loss": 0.6931, "step": 6524 }, { "epoch": 1.1735143396565675, "grad_norm": 1.6043784618377686, "learning_rate": 8.941368186345793e-06, "loss": 0.7403, "step": 6525 }, { "epoch": 1.1736941472624292, "grad_norm": 1.6624865531921387, "learning_rate": 8.941009779919795e-06, "loss": 0.7093, "step": 6526 }, { "epoch": 1.173873954868291, "grad_norm": 1.6214474439620972, "learning_rate": 8.940651320019464e-06, "loss": 0.7088, "step": 6527 }, { "epoch": 1.1740537624741527, "grad_norm": 1.421806812286377, "learning_rate": 8.940292806649667e-06, "loss": 0.7079, "step": 6528 }, { "epoch": 1.1742335700800144, "grad_norm": 1.3562722206115723, "learning_rate": 8.939934239815265e-06, "loss": 0.6477, "step": 6529 }, { "epoch": 1.174413377685876, "grad_norm": 1.423161268234253, "learning_rate": 8.939575619521126e-06, "loss": 0.6952, "step": 6530 }, { "epoch": 1.1745931852917377, "grad_norm": 1.4338221549987793, "learning_rate": 8.939216945772116e-06, "loss": 0.746, "step": 6531 }, { "epoch": 1.1747729928975996, "grad_norm": 1.4408001899719238, "learning_rate": 8.938858218573098e-06, "loss": 0.772, "step": 6532 }, { "epoch": 1.1749528005034613, "grad_norm": 1.5764158964157104, "learning_rate": 8.938499437928944e-06, "loss": 0.7302, "step": 6533 }, { "epoch": 1.175132608109323, "grad_norm": 1.502866506576538, "learning_rate": 8.93814060384452e-06, "loss": 0.7503, "step": 6534 }, { "epoch": 1.1753124157151849, "grad_norm": 1.394081473350525, "learning_rate": 8.937781716324697e-06, "loss": 0.6903, "step": 6535 }, { "epoch": 1.1754922233210465, "grad_norm": 1.4431071281433105, "learning_rate": 8.937422775374343e-06, "loss": 0.7486, "step": 6536 }, { "epoch": 1.1756720309269082, "grad_norm": 1.1130073070526123, "learning_rate": 8.937063780998326e-06, "loss": 0.9162, "step": 6537 }, { "epoch": 1.1758518385327699, "grad_norm": 1.5571649074554443, "learning_rate": 8.93670473320152e-06, "loss": 0.7722, "step": 6538 }, { "epoch": 1.1760316461386318, "grad_norm": 1.5505712032318115, "learning_rate": 8.9363456319888e-06, "loss": 0.7367, "step": 6539 }, { "epoch": 1.1762114537444934, "grad_norm": 1.5343716144561768, "learning_rate": 8.93598647736503e-06, "loss": 0.758, "step": 6540 }, { "epoch": 1.176391261350355, "grad_norm": 1.6709312200546265, "learning_rate": 8.93562726933509e-06, "loss": 0.7732, "step": 6541 }, { "epoch": 1.1765710689562168, "grad_norm": 1.5177371501922607, "learning_rate": 8.93526800790385e-06, "loss": 0.7363, "step": 6542 }, { "epoch": 1.1767508765620787, "grad_norm": 1.6441725492477417, "learning_rate": 8.93490869307619e-06, "loss": 0.7687, "step": 6543 }, { "epoch": 1.1769306841679403, "grad_norm": 1.4852020740509033, "learning_rate": 8.934549324856981e-06, "loss": 0.7006, "step": 6544 }, { "epoch": 1.177110491773802, "grad_norm": 1.489181637763977, "learning_rate": 8.9341899032511e-06, "loss": 0.7696, "step": 6545 }, { "epoch": 1.1772902993796637, "grad_norm": 1.0713571310043335, "learning_rate": 8.933830428263424e-06, "loss": 0.9685, "step": 6546 }, { "epoch": 1.1774701069855256, "grad_norm": 1.605700969696045, "learning_rate": 8.933470899898831e-06, "loss": 0.7719, "step": 6547 }, { "epoch": 1.1776499145913872, "grad_norm": 1.5247141122817993, "learning_rate": 8.9331113181622e-06, "loss": 0.6901, "step": 6548 }, { "epoch": 1.177829722197249, "grad_norm": 2.0617780685424805, "learning_rate": 8.932751683058407e-06, "loss": 0.7295, "step": 6549 }, { "epoch": 1.1780095298031106, "grad_norm": 1.5194154977798462, "learning_rate": 8.932391994592336e-06, "loss": 0.7414, "step": 6550 }, { "epoch": 1.1781893374089725, "grad_norm": 1.2434918880462646, "learning_rate": 8.932032252768864e-06, "loss": 0.9369, "step": 6551 }, { "epoch": 1.1783691450148341, "grad_norm": 1.497511386871338, "learning_rate": 8.931672457592875e-06, "loss": 0.735, "step": 6552 }, { "epoch": 1.1785489526206958, "grad_norm": 1.6207093000411987, "learning_rate": 8.931312609069249e-06, "loss": 0.656, "step": 6553 }, { "epoch": 1.1787287602265577, "grad_norm": 1.4749457836151123, "learning_rate": 8.93095270720287e-06, "loss": 0.8187, "step": 6554 }, { "epoch": 1.1789085678324194, "grad_norm": 1.0636839866638184, "learning_rate": 8.93059275199862e-06, "loss": 0.9324, "step": 6555 }, { "epoch": 1.179088375438281, "grad_norm": 1.5735355615615845, "learning_rate": 8.930232743461384e-06, "loss": 0.7307, "step": 6556 }, { "epoch": 1.1792681830441427, "grad_norm": 1.4191210269927979, "learning_rate": 8.929872681596048e-06, "loss": 0.6957, "step": 6557 }, { "epoch": 1.1794479906500044, "grad_norm": 1.42540442943573, "learning_rate": 8.929512566407494e-06, "loss": 0.7344, "step": 6558 }, { "epoch": 1.1796277982558663, "grad_norm": 1.4831023216247559, "learning_rate": 8.929152397900611e-06, "loss": 0.6839, "step": 6559 }, { "epoch": 1.179807605861728, "grad_norm": 1.100088357925415, "learning_rate": 8.928792176080287e-06, "loss": 0.9867, "step": 6560 }, { "epoch": 1.1799874134675896, "grad_norm": 1.5814368724822998, "learning_rate": 8.928431900951406e-06, "loss": 0.7219, "step": 6561 }, { "epoch": 1.1801672210734515, "grad_norm": 1.377379298210144, "learning_rate": 8.928071572518862e-06, "loss": 0.7624, "step": 6562 }, { "epoch": 1.1803470286793132, "grad_norm": 1.1103512048721313, "learning_rate": 8.927711190787538e-06, "loss": 0.9475, "step": 6563 }, { "epoch": 1.1805268362851749, "grad_norm": 1.531182885169983, "learning_rate": 8.927350755762327e-06, "loss": 0.7265, "step": 6564 }, { "epoch": 1.1807066438910365, "grad_norm": 1.5107378959655762, "learning_rate": 8.926990267448121e-06, "loss": 0.7691, "step": 6565 }, { "epoch": 1.1808864514968984, "grad_norm": 1.538317084312439, "learning_rate": 8.92662972584981e-06, "loss": 0.711, "step": 6566 }, { "epoch": 1.18106625910276, "grad_norm": 1.5056846141815186, "learning_rate": 8.926269130972285e-06, "loss": 0.7586, "step": 6567 }, { "epoch": 1.1812460667086218, "grad_norm": 1.0561772584915161, "learning_rate": 8.92590848282044e-06, "loss": 0.9812, "step": 6568 }, { "epoch": 1.1814258743144834, "grad_norm": 1.267275333404541, "learning_rate": 8.925547781399166e-06, "loss": 0.9654, "step": 6569 }, { "epoch": 1.1816056819203453, "grad_norm": 1.527882695198059, "learning_rate": 8.925187026713363e-06, "loss": 0.805, "step": 6570 }, { "epoch": 1.181785489526207, "grad_norm": 1.4734153747558594, "learning_rate": 8.92482621876792e-06, "loss": 0.7021, "step": 6571 }, { "epoch": 1.1819652971320687, "grad_norm": 1.103821039199829, "learning_rate": 8.924465357567737e-06, "loss": 0.9623, "step": 6572 }, { "epoch": 1.1821451047379303, "grad_norm": 1.4567844867706299, "learning_rate": 8.924104443117708e-06, "loss": 0.7138, "step": 6573 }, { "epoch": 1.1823249123437922, "grad_norm": 1.4840980768203735, "learning_rate": 8.923743475422729e-06, "loss": 0.7239, "step": 6574 }, { "epoch": 1.182504719949654, "grad_norm": 1.4740090370178223, "learning_rate": 8.9233824544877e-06, "loss": 0.6954, "step": 6575 }, { "epoch": 1.1826845275555156, "grad_norm": 1.4707354307174683, "learning_rate": 8.92302138031752e-06, "loss": 0.7228, "step": 6576 }, { "epoch": 1.1828643351613772, "grad_norm": 1.0773311853408813, "learning_rate": 8.922660252917088e-06, "loss": 0.9606, "step": 6577 }, { "epoch": 1.1830441427672391, "grad_norm": 1.510880470275879, "learning_rate": 8.922299072291302e-06, "loss": 0.7333, "step": 6578 }, { "epoch": 1.1832239503731008, "grad_norm": 1.469735860824585, "learning_rate": 8.921937838445064e-06, "loss": 0.7509, "step": 6579 }, { "epoch": 1.1834037579789625, "grad_norm": 1.6734040975570679, "learning_rate": 8.921576551383277e-06, "loss": 0.773, "step": 6580 }, { "epoch": 1.1835835655848244, "grad_norm": 1.323660969734192, "learning_rate": 8.92121521111084e-06, "loss": 0.6593, "step": 6581 }, { "epoch": 1.183763373190686, "grad_norm": 1.5754328966140747, "learning_rate": 8.920853817632662e-06, "loss": 0.8073, "step": 6582 }, { "epoch": 1.1839431807965477, "grad_norm": 1.445918321609497, "learning_rate": 8.920492370953638e-06, "loss": 0.7541, "step": 6583 }, { "epoch": 1.1841229884024094, "grad_norm": 1.5015816688537598, "learning_rate": 8.920130871078678e-06, "loss": 0.7613, "step": 6584 }, { "epoch": 1.184302796008271, "grad_norm": 1.4206864833831787, "learning_rate": 8.919769318012685e-06, "loss": 0.7221, "step": 6585 }, { "epoch": 1.184482603614133, "grad_norm": 1.4336222410202026, "learning_rate": 8.919407711760568e-06, "loss": 0.7321, "step": 6586 }, { "epoch": 1.1846624112199946, "grad_norm": 1.5288100242614746, "learning_rate": 8.919046052327229e-06, "loss": 0.6833, "step": 6587 }, { "epoch": 1.1848422188258563, "grad_norm": 1.5584328174591064, "learning_rate": 8.918684339717577e-06, "loss": 0.7795, "step": 6588 }, { "epoch": 1.1850220264317182, "grad_norm": 1.0469601154327393, "learning_rate": 8.918322573936524e-06, "loss": 0.9609, "step": 6589 }, { "epoch": 1.1852018340375798, "grad_norm": 1.080870509147644, "learning_rate": 8.917960754988973e-06, "loss": 0.9622, "step": 6590 }, { "epoch": 1.1853816416434415, "grad_norm": 1.4466960430145264, "learning_rate": 8.917598882879834e-06, "loss": 0.6862, "step": 6591 }, { "epoch": 1.1855614492493032, "grad_norm": 1.485069751739502, "learning_rate": 8.91723695761402e-06, "loss": 0.7865, "step": 6592 }, { "epoch": 1.1857412568551648, "grad_norm": 1.4179141521453857, "learning_rate": 8.91687497919644e-06, "loss": 0.7342, "step": 6593 }, { "epoch": 1.1859210644610267, "grad_norm": 1.4659978151321411, "learning_rate": 8.916512947632006e-06, "loss": 0.7123, "step": 6594 }, { "epoch": 1.1861008720668884, "grad_norm": 1.5445780754089355, "learning_rate": 8.91615086292563e-06, "loss": 0.7289, "step": 6595 }, { "epoch": 1.18628067967275, "grad_norm": 1.5266718864440918, "learning_rate": 8.915788725082226e-06, "loss": 0.6801, "step": 6596 }, { "epoch": 1.186460487278612, "grad_norm": 1.6105806827545166, "learning_rate": 8.915426534106705e-06, "loss": 0.7438, "step": 6597 }, { "epoch": 1.1866402948844736, "grad_norm": 1.4368903636932373, "learning_rate": 8.915064290003986e-06, "loss": 0.7819, "step": 6598 }, { "epoch": 1.1868201024903353, "grad_norm": 1.581687092781067, "learning_rate": 8.914701992778981e-06, "loss": 0.984, "step": 6599 }, { "epoch": 1.186999910096197, "grad_norm": 1.7971137762069702, "learning_rate": 8.914339642436606e-06, "loss": 0.7645, "step": 6600 }, { "epoch": 1.1871797177020589, "grad_norm": 1.5537354946136475, "learning_rate": 8.91397723898178e-06, "loss": 0.7487, "step": 6601 }, { "epoch": 1.1873595253079205, "grad_norm": 1.5394917726516724, "learning_rate": 8.913614782419416e-06, "loss": 0.7813, "step": 6602 }, { "epoch": 1.1875393329137822, "grad_norm": 1.500401258468628, "learning_rate": 8.913252272754437e-06, "loss": 0.7455, "step": 6603 }, { "epoch": 1.187719140519644, "grad_norm": 1.7750773429870605, "learning_rate": 8.912889709991758e-06, "loss": 0.7774, "step": 6604 }, { "epoch": 1.1878989481255058, "grad_norm": 1.0142027139663696, "learning_rate": 8.9125270941363e-06, "loss": 0.9394, "step": 6605 }, { "epoch": 1.1880787557313675, "grad_norm": 1.5537489652633667, "learning_rate": 8.912164425192983e-06, "loss": 0.6617, "step": 6606 }, { "epoch": 1.1882585633372291, "grad_norm": 1.5360678434371948, "learning_rate": 8.911801703166728e-06, "loss": 0.7421, "step": 6607 }, { "epoch": 1.188438370943091, "grad_norm": 1.9368717670440674, "learning_rate": 8.911438928062457e-06, "loss": 0.7778, "step": 6608 }, { "epoch": 1.1886181785489527, "grad_norm": 1.5042541027069092, "learning_rate": 8.911076099885093e-06, "loss": 0.7542, "step": 6609 }, { "epoch": 1.1887979861548144, "grad_norm": 1.4452745914459229, "learning_rate": 8.910713218639556e-06, "loss": 0.6921, "step": 6610 }, { "epoch": 1.188977793760676, "grad_norm": 1.412623405456543, "learning_rate": 8.910350284330773e-06, "loss": 0.7094, "step": 6611 }, { "epoch": 1.1891576013665377, "grad_norm": 1.4031047821044922, "learning_rate": 8.909987296963668e-06, "loss": 0.7365, "step": 6612 }, { "epoch": 1.1893374089723996, "grad_norm": 1.5080337524414062, "learning_rate": 8.909624256543165e-06, "loss": 0.7594, "step": 6613 }, { "epoch": 1.1895172165782613, "grad_norm": 1.4278172254562378, "learning_rate": 8.909261163074193e-06, "loss": 0.7064, "step": 6614 }, { "epoch": 1.189697024184123, "grad_norm": 1.5126093626022339, "learning_rate": 8.908898016561674e-06, "loss": 0.7857, "step": 6615 }, { "epoch": 1.1898768317899848, "grad_norm": 1.6098315715789795, "learning_rate": 8.90853481701054e-06, "loss": 0.7683, "step": 6616 }, { "epoch": 1.1900566393958465, "grad_norm": 1.5179834365844727, "learning_rate": 8.908171564425715e-06, "loss": 0.7495, "step": 6617 }, { "epoch": 1.1902364470017082, "grad_norm": 1.4943962097167969, "learning_rate": 8.907808258812132e-06, "loss": 0.6981, "step": 6618 }, { "epoch": 1.1904162546075698, "grad_norm": 2.7938408851623535, "learning_rate": 8.907444900174716e-06, "loss": 0.6606, "step": 6619 }, { "epoch": 1.1905960622134315, "grad_norm": 1.1755913496017456, "learning_rate": 8.907081488518402e-06, "loss": 0.9752, "step": 6620 }, { "epoch": 1.1907758698192934, "grad_norm": 1.5384820699691772, "learning_rate": 8.906718023848118e-06, "loss": 0.741, "step": 6621 }, { "epoch": 1.190955677425155, "grad_norm": 1.3705816268920898, "learning_rate": 8.906354506168796e-06, "loss": 0.7851, "step": 6622 }, { "epoch": 1.1911354850310167, "grad_norm": 1.4989871978759766, "learning_rate": 8.905990935485369e-06, "loss": 0.753, "step": 6623 }, { "epoch": 1.1913152926368786, "grad_norm": 1.441838264465332, "learning_rate": 8.90562731180277e-06, "loss": 0.7498, "step": 6624 }, { "epoch": 1.1914951002427403, "grad_norm": 1.465008020401001, "learning_rate": 8.905263635125934e-06, "loss": 0.7561, "step": 6625 }, { "epoch": 1.191674907848602, "grad_norm": 1.4829248189926147, "learning_rate": 8.904899905459797e-06, "loss": 0.748, "step": 6626 }, { "epoch": 1.1918547154544636, "grad_norm": 1.6871200799942017, "learning_rate": 8.904536122809289e-06, "loss": 0.7114, "step": 6627 }, { "epoch": 1.1920345230603255, "grad_norm": 1.4670357704162598, "learning_rate": 8.904172287179348e-06, "loss": 0.6953, "step": 6628 }, { "epoch": 1.1922143306661872, "grad_norm": 1.652880311012268, "learning_rate": 8.903808398574914e-06, "loss": 0.7708, "step": 6629 }, { "epoch": 1.1923941382720489, "grad_norm": 1.4387580156326294, "learning_rate": 8.903444457000923e-06, "loss": 0.7189, "step": 6630 }, { "epoch": 1.1925739458779105, "grad_norm": 1.5915212631225586, "learning_rate": 8.90308046246231e-06, "loss": 0.7613, "step": 6631 }, { "epoch": 1.1927537534837724, "grad_norm": 1.4225248098373413, "learning_rate": 8.902716414964018e-06, "loss": 0.7282, "step": 6632 }, { "epoch": 1.192933561089634, "grad_norm": 1.4112697839736938, "learning_rate": 8.902352314510985e-06, "loss": 0.7137, "step": 6633 }, { "epoch": 1.1931133686954958, "grad_norm": 1.3667311668395996, "learning_rate": 8.901988161108153e-06, "loss": 0.7138, "step": 6634 }, { "epoch": 1.1932931763013577, "grad_norm": 1.6033531427383423, "learning_rate": 8.90162395476046e-06, "loss": 0.7654, "step": 6635 }, { "epoch": 1.1934729839072193, "grad_norm": 1.4007940292358398, "learning_rate": 8.90125969547285e-06, "loss": 0.7184, "step": 6636 }, { "epoch": 1.193652791513081, "grad_norm": 1.567846655845642, "learning_rate": 8.900895383250265e-06, "loss": 0.7673, "step": 6637 }, { "epoch": 1.1938325991189427, "grad_norm": 1.628895878791809, "learning_rate": 8.900531018097647e-06, "loss": 0.7135, "step": 6638 }, { "epoch": 1.1940124067248044, "grad_norm": 1.2420158386230469, "learning_rate": 8.90016660001994e-06, "loss": 1.0, "step": 6639 }, { "epoch": 1.1941922143306662, "grad_norm": 1.4978032112121582, "learning_rate": 8.899802129022093e-06, "loss": 0.7749, "step": 6640 }, { "epoch": 1.194372021936528, "grad_norm": 1.427410364151001, "learning_rate": 8.899437605109048e-06, "loss": 0.749, "step": 6641 }, { "epoch": 1.1945518295423896, "grad_norm": 1.096980333328247, "learning_rate": 8.89907302828575e-06, "loss": 0.9295, "step": 6642 }, { "epoch": 1.1947316371482515, "grad_norm": 1.4929003715515137, "learning_rate": 8.898708398557147e-06, "loss": 0.8186, "step": 6643 }, { "epoch": 1.1949114447541132, "grad_norm": 1.530582308769226, "learning_rate": 8.898343715928187e-06, "loss": 0.7383, "step": 6644 }, { "epoch": 1.1950912523599748, "grad_norm": 1.666019320487976, "learning_rate": 8.897978980403816e-06, "loss": 0.7914, "step": 6645 }, { "epoch": 1.1952710599658365, "grad_norm": 1.6278001070022583, "learning_rate": 8.897614191988989e-06, "loss": 0.7872, "step": 6646 }, { "epoch": 1.1954508675716982, "grad_norm": 1.4084111452102661, "learning_rate": 8.897249350688648e-06, "loss": 0.7117, "step": 6647 }, { "epoch": 1.19563067517756, "grad_norm": 1.6227060556411743, "learning_rate": 8.896884456507749e-06, "loss": 0.7273, "step": 6648 }, { "epoch": 1.1958104827834217, "grad_norm": 1.1684207916259766, "learning_rate": 8.89651950945124e-06, "loss": 0.9313, "step": 6649 }, { "epoch": 1.1959902903892834, "grad_norm": 1.5141535997390747, "learning_rate": 8.896154509524076e-06, "loss": 0.7981, "step": 6650 }, { "epoch": 1.1961700979951453, "grad_norm": 1.4815444946289062, "learning_rate": 8.895789456731206e-06, "loss": 0.7666, "step": 6651 }, { "epoch": 1.196349905601007, "grad_norm": 1.5426236391067505, "learning_rate": 8.895424351077584e-06, "loss": 0.7676, "step": 6652 }, { "epoch": 1.1965297132068686, "grad_norm": 1.0286519527435303, "learning_rate": 8.895059192568165e-06, "loss": 0.9548, "step": 6653 }, { "epoch": 1.1967095208127303, "grad_norm": 1.6546614170074463, "learning_rate": 8.894693981207905e-06, "loss": 0.7254, "step": 6654 }, { "epoch": 1.1968893284185922, "grad_norm": 1.091495394706726, "learning_rate": 8.894328717001757e-06, "loss": 0.9467, "step": 6655 }, { "epoch": 1.1970691360244539, "grad_norm": 1.2013680934906006, "learning_rate": 8.893963399954679e-06, "loss": 0.9066, "step": 6656 }, { "epoch": 1.1972489436303155, "grad_norm": 1.5227559804916382, "learning_rate": 8.893598030071628e-06, "loss": 0.8184, "step": 6657 }, { "epoch": 1.1974287512361772, "grad_norm": 1.4594755172729492, "learning_rate": 8.893232607357559e-06, "loss": 0.6846, "step": 6658 }, { "epoch": 1.197608558842039, "grad_norm": 1.3967499732971191, "learning_rate": 8.892867131817433e-06, "loss": 0.7082, "step": 6659 }, { "epoch": 1.1977883664479008, "grad_norm": 1.5622637271881104, "learning_rate": 8.892501603456207e-06, "loss": 0.7582, "step": 6660 }, { "epoch": 1.1979681740537624, "grad_norm": 1.688849925994873, "learning_rate": 8.892136022278843e-06, "loss": 0.7998, "step": 6661 }, { "epoch": 1.198147981659624, "grad_norm": 1.4524023532867432, "learning_rate": 8.891770388290298e-06, "loss": 0.7167, "step": 6662 }, { "epoch": 1.198327789265486, "grad_norm": 1.2835899591445923, "learning_rate": 8.891404701495538e-06, "loss": 0.9493, "step": 6663 }, { "epoch": 1.1985075968713477, "grad_norm": 1.5640302896499634, "learning_rate": 8.891038961899521e-06, "loss": 0.746, "step": 6664 }, { "epoch": 1.1986874044772093, "grad_norm": 1.5550462007522583, "learning_rate": 8.89067316950721e-06, "loss": 0.7562, "step": 6665 }, { "epoch": 1.198867212083071, "grad_norm": 1.49855637550354, "learning_rate": 8.89030732432357e-06, "loss": 0.7627, "step": 6666 }, { "epoch": 1.199047019688933, "grad_norm": 1.4780577421188354, "learning_rate": 8.889941426353566e-06, "loss": 0.7405, "step": 6667 }, { "epoch": 1.1992268272947946, "grad_norm": 1.5325771570205688, "learning_rate": 8.889575475602158e-06, "loss": 0.7557, "step": 6668 }, { "epoch": 1.1994066349006562, "grad_norm": 1.508410096168518, "learning_rate": 8.889209472074315e-06, "loss": 0.7544, "step": 6669 }, { "epoch": 1.1995864425065181, "grad_norm": 1.480505108833313, "learning_rate": 8.888843415775004e-06, "loss": 0.745, "step": 6670 }, { "epoch": 1.1997662501123798, "grad_norm": 1.563260555267334, "learning_rate": 8.88847730670919e-06, "loss": 0.7873, "step": 6671 }, { "epoch": 1.1999460577182415, "grad_norm": 1.460632085800171, "learning_rate": 8.888111144881842e-06, "loss": 0.7375, "step": 6672 }, { "epoch": 1.2001258653241031, "grad_norm": 1.5091944932937622, "learning_rate": 8.887744930297926e-06, "loss": 0.7195, "step": 6673 }, { "epoch": 1.2003056729299648, "grad_norm": 1.5146538019180298, "learning_rate": 8.887378662962414e-06, "loss": 0.7533, "step": 6674 }, { "epoch": 1.2004854805358267, "grad_norm": 1.097155213356018, "learning_rate": 8.887012342880273e-06, "loss": 0.9727, "step": 6675 }, { "epoch": 1.2006652881416884, "grad_norm": 1.7446739673614502, "learning_rate": 8.886645970056475e-06, "loss": 0.7232, "step": 6676 }, { "epoch": 1.20084509574755, "grad_norm": 1.5954362154006958, "learning_rate": 8.88627954449599e-06, "loss": 0.6936, "step": 6677 }, { "epoch": 1.201024903353412, "grad_norm": 1.4867151975631714, "learning_rate": 8.885913066203793e-06, "loss": 0.6911, "step": 6678 }, { "epoch": 1.2012047109592736, "grad_norm": 1.5778570175170898, "learning_rate": 8.885546535184853e-06, "loss": 0.7963, "step": 6679 }, { "epoch": 1.2013845185651353, "grad_norm": 1.5127400159835815, "learning_rate": 8.885179951444146e-06, "loss": 0.7529, "step": 6680 }, { "epoch": 1.201564326170997, "grad_norm": 1.5448527336120605, "learning_rate": 8.884813314986644e-06, "loss": 0.7769, "step": 6681 }, { "epoch": 1.2017441337768588, "grad_norm": 1.671718955039978, "learning_rate": 8.884446625817325e-06, "loss": 0.7819, "step": 6682 }, { "epoch": 1.2019239413827205, "grad_norm": 1.1740505695343018, "learning_rate": 8.884079883941159e-06, "loss": 0.9307, "step": 6683 }, { "epoch": 1.2021037489885822, "grad_norm": 1.5792481899261475, "learning_rate": 8.883713089363128e-06, "loss": 0.7533, "step": 6684 }, { "epoch": 1.2022835565944439, "grad_norm": 1.511915922164917, "learning_rate": 8.883346242088204e-06, "loss": 0.7253, "step": 6685 }, { "epoch": 1.2024633642003058, "grad_norm": 1.549994707107544, "learning_rate": 8.88297934212137e-06, "loss": 0.7247, "step": 6686 }, { "epoch": 1.2026431718061674, "grad_norm": 1.4563853740692139, "learning_rate": 8.882612389467599e-06, "loss": 0.7476, "step": 6687 }, { "epoch": 1.202822979412029, "grad_norm": 1.4102686643600464, "learning_rate": 8.882245384131872e-06, "loss": 0.8103, "step": 6688 }, { "epoch": 1.2030027870178908, "grad_norm": 1.5074893236160278, "learning_rate": 8.88187832611917e-06, "loss": 0.7814, "step": 6689 }, { "epoch": 1.2031825946237527, "grad_norm": 1.5131992101669312, "learning_rate": 8.881511215434473e-06, "loss": 0.8007, "step": 6690 }, { "epoch": 1.2033624022296143, "grad_norm": 1.5020544528961182, "learning_rate": 8.881144052082762e-06, "loss": 0.7317, "step": 6691 }, { "epoch": 1.203542209835476, "grad_norm": 1.15437912940979, "learning_rate": 8.880776836069018e-06, "loss": 0.9693, "step": 6692 }, { "epoch": 1.2037220174413377, "grad_norm": 1.1810144186019897, "learning_rate": 8.880409567398225e-06, "loss": 0.9252, "step": 6693 }, { "epoch": 1.2039018250471996, "grad_norm": 1.4636403322219849, "learning_rate": 8.880042246075366e-06, "loss": 0.7258, "step": 6694 }, { "epoch": 1.2040816326530612, "grad_norm": 1.5668437480926514, "learning_rate": 8.879674872105424e-06, "loss": 0.7145, "step": 6695 }, { "epoch": 1.204261440258923, "grad_norm": 1.584791898727417, "learning_rate": 8.879307445493386e-06, "loss": 0.7761, "step": 6696 }, { "epoch": 1.2044412478647848, "grad_norm": 1.4611271619796753, "learning_rate": 8.878939966244236e-06, "loss": 0.7436, "step": 6697 }, { "epoch": 1.2046210554706465, "grad_norm": 1.492060661315918, "learning_rate": 8.87857243436296e-06, "loss": 0.723, "step": 6698 }, { "epoch": 1.2048008630765081, "grad_norm": 1.4251837730407715, "learning_rate": 8.878204849854543e-06, "loss": 0.7017, "step": 6699 }, { "epoch": 1.2049806706823698, "grad_norm": 1.0983052253723145, "learning_rate": 8.877837212723976e-06, "loss": 0.9861, "step": 6700 }, { "epoch": 1.2051604782882315, "grad_norm": 1.48482346534729, "learning_rate": 8.877469522976247e-06, "loss": 0.7239, "step": 6701 }, { "epoch": 1.2053402858940934, "grad_norm": 1.46824312210083, "learning_rate": 8.877101780616346e-06, "loss": 0.7019, "step": 6702 }, { "epoch": 1.205520093499955, "grad_norm": 1.1309024095535278, "learning_rate": 8.87673398564926e-06, "loss": 0.9471, "step": 6703 }, { "epoch": 1.2056999011058167, "grad_norm": 1.3895801305770874, "learning_rate": 8.87636613807998e-06, "loss": 0.6826, "step": 6704 }, { "epoch": 1.2058797087116786, "grad_norm": 1.5117430686950684, "learning_rate": 8.875998237913498e-06, "loss": 0.7616, "step": 6705 }, { "epoch": 1.2060595163175403, "grad_norm": 1.4812296628952026, "learning_rate": 8.875630285154806e-06, "loss": 0.7148, "step": 6706 }, { "epoch": 1.206239323923402, "grad_norm": 1.4946701526641846, "learning_rate": 8.875262279808897e-06, "loss": 0.7355, "step": 6707 }, { "epoch": 1.2064191315292636, "grad_norm": 1.458320140838623, "learning_rate": 8.874894221880762e-06, "loss": 0.734, "step": 6708 }, { "epoch": 1.2065989391351255, "grad_norm": 1.5144678354263306, "learning_rate": 8.874526111375397e-06, "loss": 0.7658, "step": 6709 }, { "epoch": 1.2067787467409872, "grad_norm": 1.6396113634109497, "learning_rate": 8.874157948297797e-06, "loss": 0.6861, "step": 6710 }, { "epoch": 1.2069585543468488, "grad_norm": 1.5654664039611816, "learning_rate": 8.873789732652958e-06, "loss": 0.702, "step": 6711 }, { "epoch": 1.2071383619527105, "grad_norm": 1.436140775680542, "learning_rate": 8.873421464445874e-06, "loss": 0.763, "step": 6712 }, { "epoch": 1.2073181695585724, "grad_norm": 1.5228289365768433, "learning_rate": 8.873053143681544e-06, "loss": 0.7426, "step": 6713 }, { "epoch": 1.207497977164434, "grad_norm": 1.4137144088745117, "learning_rate": 8.872684770364965e-06, "loss": 0.7488, "step": 6714 }, { "epoch": 1.2076777847702957, "grad_norm": 1.521942377090454, "learning_rate": 8.872316344501136e-06, "loss": 0.8303, "step": 6715 }, { "epoch": 1.2078575923761574, "grad_norm": 1.682030439376831, "learning_rate": 8.871947866095054e-06, "loss": 0.7582, "step": 6716 }, { "epoch": 1.2080373999820193, "grad_norm": 1.5186196565628052, "learning_rate": 8.871579335151719e-06, "loss": 0.7556, "step": 6717 }, { "epoch": 1.208217207587881, "grad_norm": 1.580851435661316, "learning_rate": 8.871210751676134e-06, "loss": 0.7719, "step": 6718 }, { "epoch": 1.2083970151937427, "grad_norm": 1.4462642669677734, "learning_rate": 8.870842115673297e-06, "loss": 0.6928, "step": 6719 }, { "epoch": 1.2085768227996043, "grad_norm": 1.4345531463623047, "learning_rate": 8.870473427148214e-06, "loss": 0.7773, "step": 6720 }, { "epoch": 1.2087566304054662, "grad_norm": 1.4882808923721313, "learning_rate": 8.870104686105884e-06, "loss": 0.7017, "step": 6721 }, { "epoch": 1.2089364380113279, "grad_norm": 1.4073289632797241, "learning_rate": 8.869735892551312e-06, "loss": 0.7357, "step": 6722 }, { "epoch": 1.2091162456171896, "grad_norm": 1.4122247695922852, "learning_rate": 8.869367046489498e-06, "loss": 0.7628, "step": 6723 }, { "epoch": 1.2092960532230514, "grad_norm": 1.434175968170166, "learning_rate": 8.868998147925455e-06, "loss": 0.8004, "step": 6724 }, { "epoch": 1.2094758608289131, "grad_norm": 1.5439488887786865, "learning_rate": 8.868629196864182e-06, "loss": 0.7216, "step": 6725 }, { "epoch": 1.2096556684347748, "grad_norm": 1.5306899547576904, "learning_rate": 8.868260193310688e-06, "loss": 0.7755, "step": 6726 }, { "epoch": 1.2098354760406365, "grad_norm": 1.5229347944259644, "learning_rate": 8.867891137269977e-06, "loss": 0.7067, "step": 6727 }, { "epoch": 1.2100152836464981, "grad_norm": 1.5337986946105957, "learning_rate": 8.86752202874706e-06, "loss": 0.7763, "step": 6728 }, { "epoch": 1.21019509125236, "grad_norm": 1.4435944557189941, "learning_rate": 8.867152867746942e-06, "loss": 0.7747, "step": 6729 }, { "epoch": 1.2103748988582217, "grad_norm": 1.1134462356567383, "learning_rate": 8.866783654274635e-06, "loss": 0.9886, "step": 6730 }, { "epoch": 1.2105547064640834, "grad_norm": 1.4911550283432007, "learning_rate": 8.866414388335147e-06, "loss": 0.7829, "step": 6731 }, { "epoch": 1.2107345140699453, "grad_norm": 1.5766971111297607, "learning_rate": 8.86604506993349e-06, "loss": 0.7336, "step": 6732 }, { "epoch": 1.210914321675807, "grad_norm": 1.5706957578659058, "learning_rate": 8.865675699074674e-06, "loss": 0.762, "step": 6733 }, { "epoch": 1.2110941292816686, "grad_norm": 1.4219728708267212, "learning_rate": 8.865306275763712e-06, "loss": 0.6977, "step": 6734 }, { "epoch": 1.2112739368875303, "grad_norm": 1.5025928020477295, "learning_rate": 8.864936800005614e-06, "loss": 0.6985, "step": 6735 }, { "epoch": 1.2114537444933922, "grad_norm": 1.5314701795578003, "learning_rate": 8.864567271805395e-06, "loss": 0.7803, "step": 6736 }, { "epoch": 1.2116335520992538, "grad_norm": 1.4852973222732544, "learning_rate": 8.864197691168069e-06, "loss": 0.7098, "step": 6737 }, { "epoch": 1.2118133597051155, "grad_norm": 1.6003673076629639, "learning_rate": 8.863828058098652e-06, "loss": 0.7596, "step": 6738 }, { "epoch": 1.2119931673109772, "grad_norm": 1.2821053266525269, "learning_rate": 8.863458372602156e-06, "loss": 0.9275, "step": 6739 }, { "epoch": 1.212172974916839, "grad_norm": 1.4358805418014526, "learning_rate": 8.8630886346836e-06, "loss": 0.7376, "step": 6740 }, { "epoch": 1.2123527825227007, "grad_norm": 1.2069752216339111, "learning_rate": 8.862718844348002e-06, "loss": 0.9573, "step": 6741 }, { "epoch": 1.2125325901285624, "grad_norm": 1.5042473077774048, "learning_rate": 8.862349001600376e-06, "loss": 0.7701, "step": 6742 }, { "epoch": 1.212712397734424, "grad_norm": 1.5488245487213135, "learning_rate": 8.861979106445741e-06, "loss": 0.7349, "step": 6743 }, { "epoch": 1.212892205340286, "grad_norm": 1.5705482959747314, "learning_rate": 8.86160915888912e-06, "loss": 0.7768, "step": 6744 }, { "epoch": 1.2130720129461476, "grad_norm": 1.5274412631988525, "learning_rate": 8.861239158935527e-06, "loss": 0.7178, "step": 6745 }, { "epoch": 1.2132518205520093, "grad_norm": 1.1967869997024536, "learning_rate": 8.860869106589986e-06, "loss": 0.9716, "step": 6746 }, { "epoch": 1.213431628157871, "grad_norm": 1.3749196529388428, "learning_rate": 8.860499001857516e-06, "loss": 0.7393, "step": 6747 }, { "epoch": 1.2136114357637329, "grad_norm": 1.6910864114761353, "learning_rate": 8.860128844743143e-06, "loss": 0.713, "step": 6748 }, { "epoch": 1.2137912433695945, "grad_norm": 1.4774835109710693, "learning_rate": 8.859758635251884e-06, "loss": 0.7278, "step": 6749 }, { "epoch": 1.2139710509754562, "grad_norm": 1.4855245351791382, "learning_rate": 8.859388373388765e-06, "loss": 0.7423, "step": 6750 }, { "epoch": 1.214150858581318, "grad_norm": 1.4910273551940918, "learning_rate": 8.85901805915881e-06, "loss": 0.7123, "step": 6751 }, { "epoch": 1.2143306661871798, "grad_norm": 1.5861133337020874, "learning_rate": 8.858647692567045e-06, "loss": 0.774, "step": 6752 }, { "epoch": 1.2145104737930414, "grad_norm": 1.4352505207061768, "learning_rate": 8.858277273618493e-06, "loss": 0.7305, "step": 6753 }, { "epoch": 1.2146902813989031, "grad_norm": 1.4238300323486328, "learning_rate": 8.857906802318181e-06, "loss": 0.7172, "step": 6754 }, { "epoch": 1.2148700890047648, "grad_norm": 1.4431337118148804, "learning_rate": 8.857536278671136e-06, "loss": 0.7497, "step": 6755 }, { "epoch": 1.2150498966106267, "grad_norm": 1.4330896139144897, "learning_rate": 8.857165702682385e-06, "loss": 0.7514, "step": 6756 }, { "epoch": 1.2152297042164883, "grad_norm": 1.4231349229812622, "learning_rate": 8.856795074356956e-06, "loss": 0.7054, "step": 6757 }, { "epoch": 1.21540951182235, "grad_norm": 1.492047905921936, "learning_rate": 8.856424393699878e-06, "loss": 0.748, "step": 6758 }, { "epoch": 1.215589319428212, "grad_norm": 1.110585331916809, "learning_rate": 8.856053660716183e-06, "loss": 0.9341, "step": 6759 }, { "epoch": 1.2157691270340736, "grad_norm": 1.4642361402511597, "learning_rate": 8.855682875410899e-06, "loss": 0.7622, "step": 6760 }, { "epoch": 1.2159489346399353, "grad_norm": 1.4862343072891235, "learning_rate": 8.855312037789056e-06, "loss": 0.791, "step": 6761 }, { "epoch": 1.216128742245797, "grad_norm": 1.545258641242981, "learning_rate": 8.854941147855689e-06, "loss": 0.7454, "step": 6762 }, { "epoch": 1.2163085498516588, "grad_norm": 1.4804245233535767, "learning_rate": 8.85457020561583e-06, "loss": 0.7691, "step": 6763 }, { "epoch": 1.2164883574575205, "grad_norm": 1.4362894296646118, "learning_rate": 8.854199211074508e-06, "loss": 0.6936, "step": 6764 }, { "epoch": 1.2166681650633822, "grad_norm": 1.4327361583709717, "learning_rate": 8.853828164236761e-06, "loss": 0.7619, "step": 6765 }, { "epoch": 1.2168479726692438, "grad_norm": 1.4188710451126099, "learning_rate": 8.853457065107623e-06, "loss": 0.69, "step": 6766 }, { "epoch": 1.2170277802751057, "grad_norm": 1.1863007545471191, "learning_rate": 8.853085913692128e-06, "loss": 0.9908, "step": 6767 }, { "epoch": 1.2172075878809674, "grad_norm": 1.1576502323150635, "learning_rate": 8.852714709995314e-06, "loss": 0.947, "step": 6768 }, { "epoch": 1.217387395486829, "grad_norm": 1.5988881587982178, "learning_rate": 8.852343454022217e-06, "loss": 0.7375, "step": 6769 }, { "epoch": 1.2175672030926907, "grad_norm": 1.6488512754440308, "learning_rate": 8.851972145777873e-06, "loss": 0.7451, "step": 6770 }, { "epoch": 1.2177470106985526, "grad_norm": 1.5334303379058838, "learning_rate": 8.851600785267322e-06, "loss": 0.7516, "step": 6771 }, { "epoch": 1.2179268183044143, "grad_norm": 1.4951527118682861, "learning_rate": 8.851229372495602e-06, "loss": 0.7453, "step": 6772 }, { "epoch": 1.218106625910276, "grad_norm": 1.4481239318847656, "learning_rate": 8.850857907467753e-06, "loss": 0.7379, "step": 6773 }, { "epoch": 1.2182864335161376, "grad_norm": 1.5520882606506348, "learning_rate": 8.850486390188813e-06, "loss": 0.6858, "step": 6774 }, { "epoch": 1.2184662411219995, "grad_norm": 1.3731849193572998, "learning_rate": 8.850114820663828e-06, "loss": 0.7085, "step": 6775 }, { "epoch": 1.2186460487278612, "grad_norm": 1.1659294366836548, "learning_rate": 8.849743198897836e-06, "loss": 0.926, "step": 6776 }, { "epoch": 1.2188258563337229, "grad_norm": 1.0976548194885254, "learning_rate": 8.84937152489588e-06, "loss": 0.9671, "step": 6777 }, { "epoch": 1.2190056639395848, "grad_norm": 1.423638939857483, "learning_rate": 8.848999798663002e-06, "loss": 0.6686, "step": 6778 }, { "epoch": 1.2191854715454464, "grad_norm": 1.3969062566757202, "learning_rate": 8.848628020204248e-06, "loss": 0.76, "step": 6779 }, { "epoch": 1.219365279151308, "grad_norm": 1.5515178442001343, "learning_rate": 8.848256189524661e-06, "loss": 0.8046, "step": 6780 }, { "epoch": 1.2195450867571698, "grad_norm": 1.3439759016036987, "learning_rate": 8.847884306629288e-06, "loss": 0.7654, "step": 6781 }, { "epoch": 1.2197248943630314, "grad_norm": 1.4611546993255615, "learning_rate": 8.847512371523175e-06, "loss": 0.7161, "step": 6782 }, { "epoch": 1.2199047019688933, "grad_norm": 1.3409658670425415, "learning_rate": 8.847140384211366e-06, "loss": 0.9785, "step": 6783 }, { "epoch": 1.220084509574755, "grad_norm": 1.35678231716156, "learning_rate": 8.84676834469891e-06, "loss": 0.6774, "step": 6784 }, { "epoch": 1.2202643171806167, "grad_norm": 1.4082831144332886, "learning_rate": 8.846396252990857e-06, "loss": 0.7556, "step": 6785 }, { "epoch": 1.2204441247864786, "grad_norm": 1.5711556673049927, "learning_rate": 8.84602410909225e-06, "loss": 0.8221, "step": 6786 }, { "epoch": 1.2206239323923402, "grad_norm": 1.4032971858978271, "learning_rate": 8.845651913008145e-06, "loss": 0.7047, "step": 6787 }, { "epoch": 1.220803739998202, "grad_norm": 1.6025882959365845, "learning_rate": 8.845279664743589e-06, "loss": 0.7484, "step": 6788 }, { "epoch": 1.2209835476040636, "grad_norm": 1.4082525968551636, "learning_rate": 8.844907364303634e-06, "loss": 0.6849, "step": 6789 }, { "epoch": 1.2211633552099255, "grad_norm": 1.5404127836227417, "learning_rate": 8.844535011693331e-06, "loss": 0.7716, "step": 6790 }, { "epoch": 1.2213431628157871, "grad_norm": 1.5138332843780518, "learning_rate": 8.844162606917731e-06, "loss": 0.7075, "step": 6791 }, { "epoch": 1.2215229704216488, "grad_norm": 1.437190055847168, "learning_rate": 8.84379014998189e-06, "loss": 0.7584, "step": 6792 }, { "epoch": 1.2217027780275105, "grad_norm": 1.517525553703308, "learning_rate": 8.84341764089086e-06, "loss": 0.7428, "step": 6793 }, { "epoch": 1.2218825856333724, "grad_norm": 1.4665182828903198, "learning_rate": 8.843045079649696e-06, "loss": 0.7494, "step": 6794 }, { "epoch": 1.222062393239234, "grad_norm": 1.4568270444869995, "learning_rate": 8.842672466263453e-06, "loss": 0.6585, "step": 6795 }, { "epoch": 1.2222422008450957, "grad_norm": 1.5129724740982056, "learning_rate": 8.842299800737185e-06, "loss": 0.7592, "step": 6796 }, { "epoch": 1.2224220084509574, "grad_norm": 1.56190025806427, "learning_rate": 8.841927083075951e-06, "loss": 0.7311, "step": 6797 }, { "epoch": 1.2226018160568193, "grad_norm": 1.4550212621688843, "learning_rate": 8.84155431328481e-06, "loss": 0.8227, "step": 6798 }, { "epoch": 1.222781623662681, "grad_norm": 1.1449614763259888, "learning_rate": 8.841181491368814e-06, "loss": 0.9539, "step": 6799 }, { "epoch": 1.2229614312685426, "grad_norm": 1.5033878087997437, "learning_rate": 8.840808617333028e-06, "loss": 0.7959, "step": 6800 }, { "epoch": 1.2231412388744043, "grad_norm": 1.5947011709213257, "learning_rate": 8.840435691182507e-06, "loss": 0.7298, "step": 6801 }, { "epoch": 1.2233210464802662, "grad_norm": 1.2478326559066772, "learning_rate": 8.840062712922314e-06, "loss": 0.9825, "step": 6802 }, { "epoch": 1.2235008540861279, "grad_norm": 1.4323010444641113, "learning_rate": 8.839689682557508e-06, "loss": 0.7176, "step": 6803 }, { "epoch": 1.2236806616919895, "grad_norm": 1.490984320640564, "learning_rate": 8.83931660009315e-06, "loss": 0.7093, "step": 6804 }, { "epoch": 1.2238604692978514, "grad_norm": 1.4638248682022095, "learning_rate": 8.838943465534307e-06, "loss": 0.7099, "step": 6805 }, { "epoch": 1.224040276903713, "grad_norm": 1.5072367191314697, "learning_rate": 8.838570278886037e-06, "loss": 0.7791, "step": 6806 }, { "epoch": 1.2242200845095748, "grad_norm": 1.5588680505752563, "learning_rate": 8.838197040153403e-06, "loss": 0.8253, "step": 6807 }, { "epoch": 1.2243998921154364, "grad_norm": 1.4764454364776611, "learning_rate": 8.837823749341473e-06, "loss": 0.7012, "step": 6808 }, { "epoch": 1.224579699721298, "grad_norm": 1.420458436012268, "learning_rate": 8.83745040645531e-06, "loss": 0.7226, "step": 6809 }, { "epoch": 1.22475950732716, "grad_norm": 1.0213589668273926, "learning_rate": 8.837077011499981e-06, "loss": 0.936, "step": 6810 }, { "epoch": 1.2249393149330217, "grad_norm": 1.470903754234314, "learning_rate": 8.836703564480552e-06, "loss": 0.794, "step": 6811 }, { "epoch": 1.2251191225388833, "grad_norm": 0.9776678681373596, "learning_rate": 8.836330065402088e-06, "loss": 0.9497, "step": 6812 }, { "epoch": 1.2252989301447452, "grad_norm": 1.573195457458496, "learning_rate": 8.83595651426966e-06, "loss": 0.7668, "step": 6813 }, { "epoch": 1.225478737750607, "grad_norm": 1.5333794355392456, "learning_rate": 8.835582911088335e-06, "loss": 0.7422, "step": 6814 }, { "epoch": 1.2256585453564686, "grad_norm": 1.677449107170105, "learning_rate": 8.835209255863182e-06, "loss": 0.8051, "step": 6815 }, { "epoch": 1.2258383529623302, "grad_norm": 1.188025712966919, "learning_rate": 8.834835548599274e-06, "loss": 0.962, "step": 6816 }, { "epoch": 1.2260181605681921, "grad_norm": 1.5624092817306519, "learning_rate": 8.834461789301678e-06, "loss": 0.7148, "step": 6817 }, { "epoch": 1.2261979681740538, "grad_norm": 1.40103018283844, "learning_rate": 8.834087977975467e-06, "loss": 0.7726, "step": 6818 }, { "epoch": 1.2263777757799155, "grad_norm": 1.3302010297775269, "learning_rate": 8.833714114625713e-06, "loss": 0.9859, "step": 6819 }, { "epoch": 1.2265575833857771, "grad_norm": 1.5249403715133667, "learning_rate": 8.833340199257489e-06, "loss": 0.7232, "step": 6820 }, { "epoch": 1.226737390991639, "grad_norm": 1.16160249710083, "learning_rate": 8.832966231875868e-06, "loss": 0.9572, "step": 6821 }, { "epoch": 1.2269171985975007, "grad_norm": 1.4670984745025635, "learning_rate": 8.832592212485925e-06, "loss": 0.7541, "step": 6822 }, { "epoch": 1.2270970062033624, "grad_norm": 1.154819369316101, "learning_rate": 8.832218141092734e-06, "loss": 0.9632, "step": 6823 }, { "epoch": 1.227276813809224, "grad_norm": 1.353586196899414, "learning_rate": 8.831844017701372e-06, "loss": 0.7154, "step": 6824 }, { "epoch": 1.227456621415086, "grad_norm": 1.5467175245285034, "learning_rate": 8.831469842316914e-06, "loss": 0.7597, "step": 6825 }, { "epoch": 1.2276364290209476, "grad_norm": 1.1884620189666748, "learning_rate": 8.831095614944438e-06, "loss": 0.9687, "step": 6826 }, { "epoch": 1.2278162366268093, "grad_norm": 1.5447814464569092, "learning_rate": 8.830721335589022e-06, "loss": 0.814, "step": 6827 }, { "epoch": 1.227996044232671, "grad_norm": 1.5246667861938477, "learning_rate": 8.830347004255742e-06, "loss": 0.7536, "step": 6828 }, { "epoch": 1.2281758518385328, "grad_norm": 1.583030104637146, "learning_rate": 8.829972620949681e-06, "loss": 0.8268, "step": 6829 }, { "epoch": 1.2283556594443945, "grad_norm": 1.541405439376831, "learning_rate": 8.829598185675916e-06, "loss": 0.7573, "step": 6830 }, { "epoch": 1.2285354670502562, "grad_norm": 1.4708333015441895, "learning_rate": 8.829223698439529e-06, "loss": 0.7112, "step": 6831 }, { "epoch": 1.228715274656118, "grad_norm": 1.559511423110962, "learning_rate": 8.8288491592456e-06, "loss": 0.7842, "step": 6832 }, { "epoch": 1.2288950822619797, "grad_norm": 1.142159104347229, "learning_rate": 8.828474568099212e-06, "loss": 0.9707, "step": 6833 }, { "epoch": 1.2290748898678414, "grad_norm": 1.525680661201477, "learning_rate": 8.828099925005449e-06, "loss": 0.7716, "step": 6834 }, { "epoch": 1.229254697473703, "grad_norm": 1.5611498355865479, "learning_rate": 8.827725229969393e-06, "loss": 0.7331, "step": 6835 }, { "epoch": 1.2294345050795648, "grad_norm": 1.449902892112732, "learning_rate": 8.827350482996126e-06, "loss": 0.7361, "step": 6836 }, { "epoch": 1.2296143126854266, "grad_norm": 1.5360842943191528, "learning_rate": 8.826975684090736e-06, "loss": 0.773, "step": 6837 }, { "epoch": 1.2297941202912883, "grad_norm": 1.4582624435424805, "learning_rate": 8.826600833258307e-06, "loss": 0.7503, "step": 6838 }, { "epoch": 1.22997392789715, "grad_norm": 1.2007900476455688, "learning_rate": 8.826225930503926e-06, "loss": 0.9692, "step": 6839 }, { "epoch": 1.2301537355030119, "grad_norm": 1.5990262031555176, "learning_rate": 8.825850975832682e-06, "loss": 0.7149, "step": 6840 }, { "epoch": 1.2303335431088736, "grad_norm": 1.844692587852478, "learning_rate": 8.825475969249658e-06, "loss": 0.7369, "step": 6841 }, { "epoch": 1.2305133507147352, "grad_norm": 1.504225730895996, "learning_rate": 8.825100910759945e-06, "loss": 0.7068, "step": 6842 }, { "epoch": 1.230693158320597, "grad_norm": 1.4727946519851685, "learning_rate": 8.824725800368632e-06, "loss": 0.7836, "step": 6843 }, { "epoch": 1.2308729659264588, "grad_norm": 1.576256275177002, "learning_rate": 8.824350638080808e-06, "loss": 0.7738, "step": 6844 }, { "epoch": 1.2310527735323205, "grad_norm": 1.5589277744293213, "learning_rate": 8.823975423901562e-06, "loss": 0.6833, "step": 6845 }, { "epoch": 1.2312325811381821, "grad_norm": 1.5870959758758545, "learning_rate": 8.82360015783599e-06, "loss": 0.837, "step": 6846 }, { "epoch": 1.2314123887440438, "grad_norm": 1.9108877182006836, "learning_rate": 8.823224839889181e-06, "loss": 0.6996, "step": 6847 }, { "epoch": 1.2315921963499057, "grad_norm": 1.3542261123657227, "learning_rate": 8.822849470066227e-06, "loss": 0.7391, "step": 6848 }, { "epoch": 1.2317720039557674, "grad_norm": 1.284582257270813, "learning_rate": 8.82247404837222e-06, "loss": 0.9836, "step": 6849 }, { "epoch": 1.231951811561629, "grad_norm": 1.5532546043395996, "learning_rate": 8.82209857481226e-06, "loss": 0.7795, "step": 6850 }, { "epoch": 1.2321316191674907, "grad_norm": 1.465989112854004, "learning_rate": 8.821723049391433e-06, "loss": 0.7436, "step": 6851 }, { "epoch": 1.2323114267733526, "grad_norm": 1.4134727716445923, "learning_rate": 8.82134747211484e-06, "loss": 0.7535, "step": 6852 }, { "epoch": 1.2324912343792143, "grad_norm": 1.3938645124435425, "learning_rate": 8.820971842987577e-06, "loss": 0.6807, "step": 6853 }, { "epoch": 1.232671041985076, "grad_norm": 1.479687213897705, "learning_rate": 8.820596162014739e-06, "loss": 0.712, "step": 6854 }, { "epoch": 1.2328508495909376, "grad_norm": 1.4815014600753784, "learning_rate": 8.820220429201425e-06, "loss": 0.7374, "step": 6855 }, { "epoch": 1.2330306571967995, "grad_norm": 1.4864344596862793, "learning_rate": 8.81984464455273e-06, "loss": 0.7621, "step": 6856 }, { "epoch": 1.2332104648026612, "grad_norm": 1.4459667205810547, "learning_rate": 8.819468808073758e-06, "loss": 0.7916, "step": 6857 }, { "epoch": 1.2333902724085228, "grad_norm": 1.62680983543396, "learning_rate": 8.819092919769606e-06, "loss": 0.7428, "step": 6858 }, { "epoch": 1.2335700800143847, "grad_norm": 1.3870829343795776, "learning_rate": 8.818716979645372e-06, "loss": 0.6865, "step": 6859 }, { "epoch": 1.2337498876202464, "grad_norm": 1.6564041376113892, "learning_rate": 8.81834098770616e-06, "loss": 0.8058, "step": 6860 }, { "epoch": 1.233929695226108, "grad_norm": 1.0648307800292969, "learning_rate": 8.817964943957073e-06, "loss": 0.8948, "step": 6861 }, { "epoch": 1.2341095028319697, "grad_norm": 1.5272136926651, "learning_rate": 8.817588848403208e-06, "loss": 0.7975, "step": 6862 }, { "epoch": 1.2342893104378314, "grad_norm": 1.5413683652877808, "learning_rate": 8.817212701049675e-06, "loss": 0.7536, "step": 6863 }, { "epoch": 1.2344691180436933, "grad_norm": 1.0086696147918701, "learning_rate": 8.816836501901574e-06, "loss": 0.9298, "step": 6864 }, { "epoch": 1.234648925649555, "grad_norm": 1.1300861835479736, "learning_rate": 8.816460250964007e-06, "loss": 0.9351, "step": 6865 }, { "epoch": 1.2348287332554166, "grad_norm": 1.1402167081832886, "learning_rate": 8.816083948242085e-06, "loss": 0.9558, "step": 6866 }, { "epoch": 1.2350085408612785, "grad_norm": 1.7279645204544067, "learning_rate": 8.815707593740909e-06, "loss": 0.7657, "step": 6867 }, { "epoch": 1.2351883484671402, "grad_norm": 1.4196131229400635, "learning_rate": 8.81533118746559e-06, "loss": 0.6663, "step": 6868 }, { "epoch": 1.2353681560730019, "grad_norm": 1.6312651634216309, "learning_rate": 8.81495472942123e-06, "loss": 0.7951, "step": 6869 }, { "epoch": 1.2355479636788635, "grad_norm": 1.1881139278411865, "learning_rate": 8.814578219612941e-06, "loss": 0.934, "step": 6870 }, { "epoch": 1.2357277712847254, "grad_norm": 1.5134214162826538, "learning_rate": 8.814201658045833e-06, "loss": 0.7511, "step": 6871 }, { "epoch": 1.235907578890587, "grad_norm": 1.416900873184204, "learning_rate": 8.81382504472501e-06, "loss": 0.7144, "step": 6872 }, { "epoch": 1.2360873864964488, "grad_norm": 4.1249470710754395, "learning_rate": 8.813448379655589e-06, "loss": 0.7205, "step": 6873 }, { "epoch": 1.2362671941023105, "grad_norm": 1.5995911359786987, "learning_rate": 8.813071662842674e-06, "loss": 0.7309, "step": 6874 }, { "epoch": 1.2364470017081723, "grad_norm": 1.517118215560913, "learning_rate": 8.812694894291383e-06, "loss": 0.7889, "step": 6875 }, { "epoch": 1.236626809314034, "grad_norm": 1.3803349733352661, "learning_rate": 8.812318074006823e-06, "loss": 0.7067, "step": 6876 }, { "epoch": 1.2368066169198957, "grad_norm": 1.4973756074905396, "learning_rate": 8.811941201994107e-06, "loss": 0.7357, "step": 6877 }, { "epoch": 1.2369864245257574, "grad_norm": 1.4442998170852661, "learning_rate": 8.811564278258355e-06, "loss": 0.7908, "step": 6878 }, { "epoch": 1.2371662321316192, "grad_norm": 1.452430009841919, "learning_rate": 8.811187302804674e-06, "loss": 0.7849, "step": 6879 }, { "epoch": 1.237346039737481, "grad_norm": 1.378718376159668, "learning_rate": 8.810810275638183e-06, "loss": 0.7776, "step": 6880 }, { "epoch": 1.2375258473433426, "grad_norm": 1.478705883026123, "learning_rate": 8.810433196763997e-06, "loss": 0.7404, "step": 6881 }, { "epoch": 1.2377056549492043, "grad_norm": 1.4783031940460205, "learning_rate": 8.810056066187231e-06, "loss": 0.7771, "step": 6882 }, { "epoch": 1.2378854625550662, "grad_norm": 1.5470812320709229, "learning_rate": 8.809678883913007e-06, "loss": 0.8164, "step": 6883 }, { "epoch": 1.2380652701609278, "grad_norm": 1.5390270948410034, "learning_rate": 8.809301649946436e-06, "loss": 0.7588, "step": 6884 }, { "epoch": 1.2382450777667895, "grad_norm": 1.2390320301055908, "learning_rate": 8.808924364292642e-06, "loss": 0.9544, "step": 6885 }, { "epoch": 1.2384248853726514, "grad_norm": 1.4829736948013306, "learning_rate": 8.80854702695674e-06, "loss": 0.6982, "step": 6886 }, { "epoch": 1.238604692978513, "grad_norm": 1.4639980792999268, "learning_rate": 8.808169637943854e-06, "loss": 0.7592, "step": 6887 }, { "epoch": 1.2387845005843747, "grad_norm": 1.463702917098999, "learning_rate": 8.807792197259102e-06, "loss": 0.7253, "step": 6888 }, { "epoch": 1.2389643081902364, "grad_norm": 1.415663480758667, "learning_rate": 8.807414704907607e-06, "loss": 0.7951, "step": 6889 }, { "epoch": 1.239144115796098, "grad_norm": 1.574397325515747, "learning_rate": 8.80703716089449e-06, "loss": 0.8163, "step": 6890 }, { "epoch": 1.23932392340196, "grad_norm": 1.4819077253341675, "learning_rate": 8.806659565224873e-06, "loss": 0.7377, "step": 6891 }, { "epoch": 1.2395037310078216, "grad_norm": 1.0927245616912842, "learning_rate": 8.806281917903881e-06, "loss": 0.9519, "step": 6892 }, { "epoch": 1.2396835386136833, "grad_norm": 1.4338842630386353, "learning_rate": 8.805904218936639e-06, "loss": 0.7376, "step": 6893 }, { "epoch": 1.2398633462195452, "grad_norm": 1.4504902362823486, "learning_rate": 8.805526468328269e-06, "loss": 0.7444, "step": 6894 }, { "epoch": 1.2400431538254069, "grad_norm": 1.418641448020935, "learning_rate": 8.8051486660839e-06, "loss": 0.786, "step": 6895 }, { "epoch": 1.2402229614312685, "grad_norm": 1.4825555086135864, "learning_rate": 8.804770812208655e-06, "loss": 0.6736, "step": 6896 }, { "epoch": 1.2404027690371302, "grad_norm": 1.4675298929214478, "learning_rate": 8.804392906707663e-06, "loss": 0.8002, "step": 6897 }, { "epoch": 1.2405825766429919, "grad_norm": 1.428773045539856, "learning_rate": 8.804014949586051e-06, "loss": 0.7165, "step": 6898 }, { "epoch": 1.2407623842488538, "grad_norm": 1.4169118404388428, "learning_rate": 8.803636940848948e-06, "loss": 0.6825, "step": 6899 }, { "epoch": 1.2409421918547154, "grad_norm": 1.4769001007080078, "learning_rate": 8.803258880501482e-06, "loss": 0.7621, "step": 6900 }, { "epoch": 1.241121999460577, "grad_norm": 1.3621805906295776, "learning_rate": 8.802880768548782e-06, "loss": 0.6817, "step": 6901 }, { "epoch": 1.241301807066439, "grad_norm": 1.4417204856872559, "learning_rate": 8.802502604995983e-06, "loss": 0.7925, "step": 6902 }, { "epoch": 1.2414816146723007, "grad_norm": 1.4941781759262085, "learning_rate": 8.80212438984821e-06, "loss": 0.7575, "step": 6903 }, { "epoch": 1.2416614222781623, "grad_norm": 1.0646812915802002, "learning_rate": 8.801746123110601e-06, "loss": 0.9601, "step": 6904 }, { "epoch": 1.241841229884024, "grad_norm": 1.1712760925292969, "learning_rate": 8.801367804788283e-06, "loss": 0.9289, "step": 6905 }, { "epoch": 1.242021037489886, "grad_norm": 1.3928626775741577, "learning_rate": 8.800989434886393e-06, "loss": 0.7228, "step": 6906 }, { "epoch": 1.2422008450957476, "grad_norm": 1.470590591430664, "learning_rate": 8.800611013410065e-06, "loss": 0.749, "step": 6907 }, { "epoch": 1.2423806527016092, "grad_norm": 1.5377662181854248, "learning_rate": 8.80023254036443e-06, "loss": 0.7217, "step": 6908 }, { "epoch": 1.242560460307471, "grad_norm": 1.0669125318527222, "learning_rate": 8.799854015754626e-06, "loss": 0.9666, "step": 6909 }, { "epoch": 1.2427402679133328, "grad_norm": 1.601212501525879, "learning_rate": 8.79947543958579e-06, "loss": 0.7689, "step": 6910 }, { "epoch": 1.2429200755191945, "grad_norm": 1.741461992263794, "learning_rate": 8.799096811863058e-06, "loss": 0.8137, "step": 6911 }, { "epoch": 1.2430998831250561, "grad_norm": 1.1146974563598633, "learning_rate": 8.798718132591566e-06, "loss": 0.9309, "step": 6912 }, { "epoch": 1.243279690730918, "grad_norm": 1.4936240911483765, "learning_rate": 8.798339401776455e-06, "loss": 0.696, "step": 6913 }, { "epoch": 1.2434594983367797, "grad_norm": 1.5306986570358276, "learning_rate": 8.79796061942286e-06, "loss": 0.8179, "step": 6914 }, { "epoch": 1.2436393059426414, "grad_norm": 1.5144025087356567, "learning_rate": 8.797581785535924e-06, "loss": 0.6886, "step": 6915 }, { "epoch": 1.243819113548503, "grad_norm": 1.7267683744430542, "learning_rate": 8.797202900120786e-06, "loss": 0.8125, "step": 6916 }, { "epoch": 1.2439989211543647, "grad_norm": 1.4502047300338745, "learning_rate": 8.796823963182589e-06, "loss": 0.7439, "step": 6917 }, { "epoch": 1.2441787287602266, "grad_norm": 1.384324073791504, "learning_rate": 8.79644497472647e-06, "loss": 0.7506, "step": 6918 }, { "epoch": 1.2443585363660883, "grad_norm": 1.447700023651123, "learning_rate": 8.796065934757576e-06, "loss": 0.7861, "step": 6919 }, { "epoch": 1.24453834397195, "grad_norm": 1.533782720565796, "learning_rate": 8.795686843281048e-06, "loss": 0.7813, "step": 6920 }, { "epoch": 1.2447181515778118, "grad_norm": 1.4650641679763794, "learning_rate": 8.795307700302029e-06, "loss": 0.7231, "step": 6921 }, { "epoch": 1.2448979591836735, "grad_norm": 1.0949292182922363, "learning_rate": 8.794928505825666e-06, "loss": 0.9178, "step": 6922 }, { "epoch": 1.2450777667895352, "grad_norm": 1.4439294338226318, "learning_rate": 8.794549259857102e-06, "loss": 0.7445, "step": 6923 }, { "epoch": 1.2452575743953969, "grad_norm": 1.5343577861785889, "learning_rate": 8.794169962401482e-06, "loss": 0.7779, "step": 6924 }, { "epoch": 1.2454373820012585, "grad_norm": 1.5110325813293457, "learning_rate": 8.793790613463956e-06, "loss": 0.7362, "step": 6925 }, { "epoch": 1.2456171896071204, "grad_norm": 1.5164936780929565, "learning_rate": 8.793411213049667e-06, "loss": 0.7759, "step": 6926 }, { "epoch": 1.245796997212982, "grad_norm": 1.0636985301971436, "learning_rate": 8.793031761163768e-06, "loss": 0.9684, "step": 6927 }, { "epoch": 1.2459768048188438, "grad_norm": 1.0210952758789062, "learning_rate": 8.792652257811403e-06, "loss": 0.9679, "step": 6928 }, { "epoch": 1.2461566124247057, "grad_norm": 1.5629613399505615, "learning_rate": 8.792272702997724e-06, "loss": 0.7206, "step": 6929 }, { "epoch": 1.2463364200305673, "grad_norm": 1.4837135076522827, "learning_rate": 8.791893096727882e-06, "loss": 0.7147, "step": 6930 }, { "epoch": 1.246516227636429, "grad_norm": 1.4912818670272827, "learning_rate": 8.791513439007025e-06, "loss": 0.7313, "step": 6931 }, { "epoch": 1.2466960352422907, "grad_norm": 1.505829930305481, "learning_rate": 8.791133729840304e-06, "loss": 0.7245, "step": 6932 }, { "epoch": 1.2468758428481526, "grad_norm": 1.6363332271575928, "learning_rate": 8.790753969232875e-06, "loss": 0.7701, "step": 6933 }, { "epoch": 1.2470556504540142, "grad_norm": 1.5466278791427612, "learning_rate": 8.790374157189888e-06, "loss": 0.6835, "step": 6934 }, { "epoch": 1.247235458059876, "grad_norm": 1.46638023853302, "learning_rate": 8.789994293716497e-06, "loss": 0.6844, "step": 6935 }, { "epoch": 1.2474152656657376, "grad_norm": 1.3379747867584229, "learning_rate": 8.789614378817855e-06, "loss": 0.6825, "step": 6936 }, { "epoch": 1.2475950732715995, "grad_norm": 1.2824335098266602, "learning_rate": 8.78923441249912e-06, "loss": 0.9763, "step": 6937 }, { "epoch": 1.2477748808774611, "grad_norm": 1.412570595741272, "learning_rate": 8.788854394765447e-06, "loss": 0.6885, "step": 6938 }, { "epoch": 1.2479546884833228, "grad_norm": 1.4864660501480103, "learning_rate": 8.788474325621989e-06, "loss": 0.7621, "step": 6939 }, { "epoch": 1.2481344960891847, "grad_norm": 1.1520472764968872, "learning_rate": 8.788094205073907e-06, "loss": 0.9507, "step": 6940 }, { "epoch": 1.2483143036950464, "grad_norm": 1.4477444887161255, "learning_rate": 8.787714033126356e-06, "loss": 0.7305, "step": 6941 }, { "epoch": 1.248494111300908, "grad_norm": 1.1691240072250366, "learning_rate": 8.787333809784497e-06, "loss": 0.9111, "step": 6942 }, { "epoch": 1.2486739189067697, "grad_norm": 1.3355746269226074, "learning_rate": 8.786953535053486e-06, "loss": 0.6697, "step": 6943 }, { "epoch": 1.2488537265126314, "grad_norm": 1.540342092514038, "learning_rate": 8.786573208938485e-06, "loss": 0.8157, "step": 6944 }, { "epoch": 1.2490335341184933, "grad_norm": 1.6055952310562134, "learning_rate": 8.786192831444655e-06, "loss": 0.7049, "step": 6945 }, { "epoch": 1.249213341724355, "grad_norm": 1.5538955926895142, "learning_rate": 8.785812402577156e-06, "loss": 0.7892, "step": 6946 }, { "epoch": 1.2493931493302166, "grad_norm": 1.4272905588150024, "learning_rate": 8.78543192234115e-06, "loss": 0.6727, "step": 6947 }, { "epoch": 1.2495729569360785, "grad_norm": 1.4605928659439087, "learning_rate": 8.7850513907418e-06, "loss": 0.7551, "step": 6948 }, { "epoch": 1.2497527645419402, "grad_norm": 1.4738095998764038, "learning_rate": 8.784670807784268e-06, "loss": 0.7312, "step": 6949 }, { "epoch": 1.2499325721478018, "grad_norm": 1.2134045362472534, "learning_rate": 8.784290173473722e-06, "loss": 0.9454, "step": 6950 }, { "epoch": 1.2501123797536635, "grad_norm": 1.3736252784729004, "learning_rate": 8.783909487815321e-06, "loss": 0.7597, "step": 6951 }, { "epoch": 1.2502921873595252, "grad_norm": 1.2757542133331299, "learning_rate": 8.783528750814234e-06, "loss": 0.9212, "step": 6952 }, { "epoch": 1.250471994965387, "grad_norm": 1.7285586595535278, "learning_rate": 8.783147962475626e-06, "loss": 0.7055, "step": 6953 }, { "epoch": 1.2506518025712487, "grad_norm": 1.5502458810806274, "learning_rate": 8.782767122804664e-06, "loss": 0.7555, "step": 6954 }, { "epoch": 1.2508316101771104, "grad_norm": 1.3801723718643188, "learning_rate": 8.782386231806518e-06, "loss": 0.7, "step": 6955 }, { "epoch": 1.2510114177829723, "grad_norm": 1.4570891857147217, "learning_rate": 8.782005289486353e-06, "loss": 0.7285, "step": 6956 }, { "epoch": 1.251191225388834, "grad_norm": 1.4227863550186157, "learning_rate": 8.781624295849337e-06, "loss": 0.7375, "step": 6957 }, { "epoch": 1.2513710329946957, "grad_norm": 1.446489691734314, "learning_rate": 8.781243250900642e-06, "loss": 0.7631, "step": 6958 }, { "epoch": 1.2515508406005573, "grad_norm": 1.532546877861023, "learning_rate": 8.780862154645438e-06, "loss": 0.7815, "step": 6959 }, { "epoch": 1.251730648206419, "grad_norm": 1.3720049858093262, "learning_rate": 8.780481007088895e-06, "loss": 0.7482, "step": 6960 }, { "epoch": 1.2519104558122809, "grad_norm": 1.4220473766326904, "learning_rate": 8.780099808236185e-06, "loss": 0.7094, "step": 6961 }, { "epoch": 1.2520902634181426, "grad_norm": 1.4445366859436035, "learning_rate": 8.779718558092483e-06, "loss": 0.6683, "step": 6962 }, { "epoch": 1.2522700710240042, "grad_norm": 1.5250920057296753, "learning_rate": 8.779337256662957e-06, "loss": 0.7589, "step": 6963 }, { "epoch": 1.2524498786298661, "grad_norm": 1.6235179901123047, "learning_rate": 8.778955903952784e-06, "loss": 0.8022, "step": 6964 }, { "epoch": 1.2526296862357278, "grad_norm": 1.4864474534988403, "learning_rate": 8.778574499967138e-06, "loss": 0.7084, "step": 6965 }, { "epoch": 1.2528094938415895, "grad_norm": 1.2282356023788452, "learning_rate": 8.778193044711194e-06, "loss": 0.9949, "step": 6966 }, { "epoch": 1.2529893014474514, "grad_norm": 1.4489775896072388, "learning_rate": 8.777811538190128e-06, "loss": 0.714, "step": 6967 }, { "epoch": 1.253169109053313, "grad_norm": 1.7544987201690674, "learning_rate": 8.777429980409118e-06, "loss": 0.7132, "step": 6968 }, { "epoch": 1.2533489166591747, "grad_norm": 1.4514164924621582, "learning_rate": 8.777048371373338e-06, "loss": 0.7182, "step": 6969 }, { "epoch": 1.2535287242650364, "grad_norm": 1.5375252962112427, "learning_rate": 8.776666711087966e-06, "loss": 0.7744, "step": 6970 }, { "epoch": 1.253708531870898, "grad_norm": 1.6216003894805908, "learning_rate": 8.776284999558186e-06, "loss": 0.7443, "step": 6971 }, { "epoch": 1.25388833947676, "grad_norm": 1.1472023725509644, "learning_rate": 8.775903236789172e-06, "loss": 0.9465, "step": 6972 }, { "epoch": 1.2540681470826216, "grad_norm": 1.525593638420105, "learning_rate": 8.775521422786104e-06, "loss": 0.7874, "step": 6973 }, { "epoch": 1.2542479546884833, "grad_norm": 1.5269569158554077, "learning_rate": 8.775139557554166e-06, "loss": 0.7248, "step": 6974 }, { "epoch": 1.2544277622943452, "grad_norm": 1.0938283205032349, "learning_rate": 8.774757641098536e-06, "loss": 0.9055, "step": 6975 }, { "epoch": 1.2546075699002068, "grad_norm": 1.5675023794174194, "learning_rate": 8.7743756734244e-06, "loss": 0.7431, "step": 6976 }, { "epoch": 1.2547873775060685, "grad_norm": 1.5681838989257812, "learning_rate": 8.773993654536938e-06, "loss": 0.7694, "step": 6977 }, { "epoch": 1.2549671851119302, "grad_norm": 1.6692193746566772, "learning_rate": 8.773611584441333e-06, "loss": 0.764, "step": 6978 }, { "epoch": 1.2551469927177918, "grad_norm": 1.6299113035202026, "learning_rate": 8.773229463142772e-06, "loss": 0.7965, "step": 6979 }, { "epoch": 1.2553268003236537, "grad_norm": 1.4993815422058105, "learning_rate": 8.772847290646437e-06, "loss": 0.7325, "step": 6980 }, { "epoch": 1.2555066079295154, "grad_norm": 1.5393863916397095, "learning_rate": 8.772465066957514e-06, "loss": 0.8288, "step": 6981 }, { "epoch": 1.255686415535377, "grad_norm": 1.5638582706451416, "learning_rate": 8.772082792081191e-06, "loss": 0.7299, "step": 6982 }, { "epoch": 1.255866223141239, "grad_norm": 1.579562783241272, "learning_rate": 8.771700466022655e-06, "loss": 0.7616, "step": 6983 }, { "epoch": 1.2560460307471006, "grad_norm": 1.443215012550354, "learning_rate": 8.77131808878709e-06, "loss": 0.7089, "step": 6984 }, { "epoch": 1.2562258383529623, "grad_norm": 1.5044797658920288, "learning_rate": 8.77093566037969e-06, "loss": 0.7398, "step": 6985 }, { "epoch": 1.256405645958824, "grad_norm": 1.6078652143478394, "learning_rate": 8.77055318080564e-06, "loss": 0.777, "step": 6986 }, { "epoch": 1.2565854535646857, "grad_norm": 1.3786653280258179, "learning_rate": 8.77017065007013e-06, "loss": 0.7004, "step": 6987 }, { "epoch": 1.2567652611705475, "grad_norm": 1.4579055309295654, "learning_rate": 8.769788068178352e-06, "loss": 0.7371, "step": 6988 }, { "epoch": 1.2569450687764092, "grad_norm": 1.4731242656707764, "learning_rate": 8.769405435135497e-06, "loss": 0.6695, "step": 6989 }, { "epoch": 1.2571248763822709, "grad_norm": 1.43850576877594, "learning_rate": 8.769022750946753e-06, "loss": 0.7351, "step": 6990 }, { "epoch": 1.2573046839881328, "grad_norm": 1.549481749534607, "learning_rate": 8.76864001561732e-06, "loss": 0.7222, "step": 6991 }, { "epoch": 1.2574844915939944, "grad_norm": 1.5922431945800781, "learning_rate": 8.768257229152385e-06, "loss": 0.7223, "step": 6992 }, { "epoch": 1.2576642991998561, "grad_norm": 1.460545301437378, "learning_rate": 8.767874391557145e-06, "loss": 0.7146, "step": 6993 }, { "epoch": 1.257844106805718, "grad_norm": 1.4764207601547241, "learning_rate": 8.767491502836792e-06, "loss": 0.674, "step": 6994 }, { "epoch": 1.2580239144115797, "grad_norm": 1.4604213237762451, "learning_rate": 8.767108562996523e-06, "loss": 0.6667, "step": 6995 }, { "epoch": 1.2582037220174414, "grad_norm": 1.4118425846099854, "learning_rate": 8.766725572041535e-06, "loss": 0.7229, "step": 6996 }, { "epoch": 1.258383529623303, "grad_norm": 1.5851980447769165, "learning_rate": 8.766342529977022e-06, "loss": 0.7525, "step": 6997 }, { "epoch": 1.2585633372291647, "grad_norm": 1.8133856058120728, "learning_rate": 8.765959436808182e-06, "loss": 0.6788, "step": 6998 }, { "epoch": 1.2587431448350266, "grad_norm": 1.1639405488967896, "learning_rate": 8.765576292540217e-06, "loss": 0.9349, "step": 6999 }, { "epoch": 1.2589229524408883, "grad_norm": 1.0529422760009766, "learning_rate": 8.76519309717832e-06, "loss": 0.9886, "step": 7000 }, { "epoch": 1.2589229524408883, "eval_loss": 0.808860719203949, "eval_runtime": 150.6798, "eval_samples_per_second": 95.447, "eval_steps_per_second": 1.493, "step": 7000 }, { "epoch": 1.25910276004675, "grad_norm": 1.5297857522964478, "learning_rate": 8.764809850727694e-06, "loss": 0.7265, "step": 7001 }, { "epoch": 1.2592825676526118, "grad_norm": 1.5137004852294922, "learning_rate": 8.764426553193538e-06, "loss": 0.7696, "step": 7002 }, { "epoch": 1.2594623752584735, "grad_norm": 1.5855956077575684, "learning_rate": 8.764043204581053e-06, "loss": 0.7816, "step": 7003 }, { "epoch": 1.2596421828643352, "grad_norm": 1.1941883563995361, "learning_rate": 8.763659804895442e-06, "loss": 0.9682, "step": 7004 }, { "epoch": 1.2598219904701968, "grad_norm": 1.4235929250717163, "learning_rate": 8.763276354141904e-06, "loss": 0.7753, "step": 7005 }, { "epoch": 1.2600017980760585, "grad_norm": 1.1104602813720703, "learning_rate": 8.762892852325645e-06, "loss": 0.9732, "step": 7006 }, { "epoch": 1.2601816056819204, "grad_norm": 1.440443515777588, "learning_rate": 8.762509299451867e-06, "loss": 0.7488, "step": 7007 }, { "epoch": 1.260361413287782, "grad_norm": 1.0467451810836792, "learning_rate": 8.762125695525774e-06, "loss": 0.955, "step": 7008 }, { "epoch": 1.2605412208936437, "grad_norm": 1.3622127771377563, "learning_rate": 8.761742040552572e-06, "loss": 0.738, "step": 7009 }, { "epoch": 1.2607210284995056, "grad_norm": 1.4143391847610474, "learning_rate": 8.761358334537469e-06, "loss": 0.7088, "step": 7010 }, { "epoch": 1.2609008361053673, "grad_norm": 1.54497492313385, "learning_rate": 8.760974577485666e-06, "loss": 0.6853, "step": 7011 }, { "epoch": 1.261080643711229, "grad_norm": 1.43655526638031, "learning_rate": 8.760590769402372e-06, "loss": 0.8024, "step": 7012 }, { "epoch": 1.2612604513170906, "grad_norm": 1.5548301935195923, "learning_rate": 8.7602069102928e-06, "loss": 0.7028, "step": 7013 }, { "epoch": 1.2614402589229523, "grad_norm": 1.2349846363067627, "learning_rate": 8.759823000162151e-06, "loss": 0.9531, "step": 7014 }, { "epoch": 1.2616200665288142, "grad_norm": 1.5046508312225342, "learning_rate": 8.759439039015638e-06, "loss": 0.7397, "step": 7015 }, { "epoch": 1.2617998741346759, "grad_norm": 1.6245766878128052, "learning_rate": 8.75905502685847e-06, "loss": 0.7629, "step": 7016 }, { "epoch": 1.2619796817405375, "grad_norm": 1.068637490272522, "learning_rate": 8.758670963695857e-06, "loss": 1.0066, "step": 7017 }, { "epoch": 1.2621594893463994, "grad_norm": 1.4727450609207153, "learning_rate": 8.758286849533011e-06, "loss": 0.6875, "step": 7018 }, { "epoch": 1.262339296952261, "grad_norm": 1.4914053678512573, "learning_rate": 8.757902684375145e-06, "loss": 0.6911, "step": 7019 }, { "epoch": 1.2625191045581228, "grad_norm": 1.4676228761672974, "learning_rate": 8.75751846822747e-06, "loss": 0.7398, "step": 7020 }, { "epoch": 1.2626989121639847, "grad_norm": 1.5359615087509155, "learning_rate": 8.757134201095199e-06, "loss": 0.7994, "step": 7021 }, { "epoch": 1.2628787197698463, "grad_norm": 1.4185484647750854, "learning_rate": 8.756749882983549e-06, "loss": 0.7686, "step": 7022 }, { "epoch": 1.263058527375708, "grad_norm": 1.406294584274292, "learning_rate": 8.756365513897729e-06, "loss": 0.6964, "step": 7023 }, { "epoch": 1.2632383349815697, "grad_norm": 1.4913843870162964, "learning_rate": 8.75598109384296e-06, "loss": 0.7564, "step": 7024 }, { "epoch": 1.2634181425874313, "grad_norm": 1.4411108493804932, "learning_rate": 8.755596622824456e-06, "loss": 0.7952, "step": 7025 }, { "epoch": 1.2635979501932932, "grad_norm": 1.168362021446228, "learning_rate": 8.755212100847433e-06, "loss": 0.9497, "step": 7026 }, { "epoch": 1.263777757799155, "grad_norm": 1.4928113222122192, "learning_rate": 8.75482752791711e-06, "loss": 0.7312, "step": 7027 }, { "epoch": 1.2639575654050166, "grad_norm": 1.1499810218811035, "learning_rate": 8.754442904038702e-06, "loss": 0.9247, "step": 7028 }, { "epoch": 1.2641373730108785, "grad_norm": 1.4409065246582031, "learning_rate": 8.754058229217432e-06, "loss": 0.7211, "step": 7029 }, { "epoch": 1.2643171806167401, "grad_norm": 1.1213746070861816, "learning_rate": 8.753673503458518e-06, "loss": 0.9574, "step": 7030 }, { "epoch": 1.2644969882226018, "grad_norm": 1.4302794933319092, "learning_rate": 8.75328872676718e-06, "loss": 0.6858, "step": 7031 }, { "epoch": 1.2646767958284635, "grad_norm": 1.551822304725647, "learning_rate": 8.752903899148639e-06, "loss": 0.7138, "step": 7032 }, { "epoch": 1.2648566034343252, "grad_norm": 1.485642671585083, "learning_rate": 8.752519020608115e-06, "loss": 0.7847, "step": 7033 }, { "epoch": 1.265036411040187, "grad_norm": 1.0261430740356445, "learning_rate": 8.752134091150832e-06, "loss": 0.9861, "step": 7034 }, { "epoch": 1.2652162186460487, "grad_norm": 1.505645751953125, "learning_rate": 8.751749110782013e-06, "loss": 0.7322, "step": 7035 }, { "epoch": 1.2653960262519104, "grad_norm": 1.088435411453247, "learning_rate": 8.751364079506882e-06, "loss": 0.9301, "step": 7036 }, { "epoch": 1.2655758338577723, "grad_norm": 1.422646403312683, "learning_rate": 8.750978997330661e-06, "loss": 0.6997, "step": 7037 }, { "epoch": 1.265755641463634, "grad_norm": 1.5624500513076782, "learning_rate": 8.750593864258578e-06, "loss": 0.7066, "step": 7038 }, { "epoch": 1.2659354490694956, "grad_norm": 1.4797247648239136, "learning_rate": 8.750208680295858e-06, "loss": 0.7519, "step": 7039 }, { "epoch": 1.2661152566753573, "grad_norm": 1.4206597805023193, "learning_rate": 8.749823445447725e-06, "loss": 0.7204, "step": 7040 }, { "epoch": 1.266295064281219, "grad_norm": 1.4945588111877441, "learning_rate": 8.74943815971941e-06, "loss": 0.7265, "step": 7041 }, { "epoch": 1.2664748718870809, "grad_norm": 1.4393445253372192, "learning_rate": 8.74905282311614e-06, "loss": 0.7456, "step": 7042 }, { "epoch": 1.2666546794929425, "grad_norm": 1.5289496183395386, "learning_rate": 8.74866743564314e-06, "loss": 0.7216, "step": 7043 }, { "epoch": 1.2668344870988042, "grad_norm": 1.7421308755874634, "learning_rate": 8.748281997305644e-06, "loss": 0.7524, "step": 7044 }, { "epoch": 1.267014294704666, "grad_norm": 1.4419114589691162, "learning_rate": 8.747896508108877e-06, "loss": 0.7273, "step": 7045 }, { "epoch": 1.2671941023105278, "grad_norm": 1.456900715827942, "learning_rate": 8.747510968058073e-06, "loss": 0.7892, "step": 7046 }, { "epoch": 1.2673739099163894, "grad_norm": 1.772830843925476, "learning_rate": 8.747125377158463e-06, "loss": 0.7794, "step": 7047 }, { "epoch": 1.2675537175222513, "grad_norm": 1.4666603803634644, "learning_rate": 8.746739735415278e-06, "loss": 0.7291, "step": 7048 }, { "epoch": 1.267733525128113, "grad_norm": 1.456113338470459, "learning_rate": 8.746354042833752e-06, "loss": 0.6961, "step": 7049 }, { "epoch": 1.2679133327339747, "grad_norm": 1.4708633422851562, "learning_rate": 8.745968299419116e-06, "loss": 0.685, "step": 7050 }, { "epoch": 1.2680931403398363, "grad_norm": 1.5241693258285522, "learning_rate": 8.745582505176607e-06, "loss": 0.7613, "step": 7051 }, { "epoch": 1.268272947945698, "grad_norm": 1.6436847448349, "learning_rate": 8.745196660111456e-06, "loss": 0.7709, "step": 7052 }, { "epoch": 1.26845275555156, "grad_norm": 1.4377213716506958, "learning_rate": 8.7448107642289e-06, "loss": 0.705, "step": 7053 }, { "epoch": 1.2686325631574216, "grad_norm": 1.6725176572799683, "learning_rate": 8.744424817534179e-06, "loss": 0.7626, "step": 7054 }, { "epoch": 1.2688123707632832, "grad_norm": 1.3991323709487915, "learning_rate": 8.744038820032524e-06, "loss": 0.7234, "step": 7055 }, { "epoch": 1.2689921783691451, "grad_norm": 1.4906342029571533, "learning_rate": 8.743652771729176e-06, "loss": 0.7188, "step": 7056 }, { "epoch": 1.2691719859750068, "grad_norm": 1.3999689817428589, "learning_rate": 8.743266672629372e-06, "loss": 0.7349, "step": 7057 }, { "epoch": 1.2693517935808685, "grad_norm": 1.4068009853363037, "learning_rate": 8.742880522738351e-06, "loss": 0.7116, "step": 7058 }, { "epoch": 1.2695316011867301, "grad_norm": 1.4423633813858032, "learning_rate": 8.74249432206135e-06, "loss": 0.6977, "step": 7059 }, { "epoch": 1.2697114087925918, "grad_norm": 1.4956022500991821, "learning_rate": 8.742108070603614e-06, "loss": 0.7578, "step": 7060 }, { "epoch": 1.2698912163984537, "grad_norm": 1.4214304685592651, "learning_rate": 8.741721768370382e-06, "loss": 0.712, "step": 7061 }, { "epoch": 1.2700710240043154, "grad_norm": 1.576202154159546, "learning_rate": 8.741335415366893e-06, "loss": 0.7538, "step": 7062 }, { "epoch": 1.270250831610177, "grad_norm": 1.4369412660598755, "learning_rate": 8.740949011598394e-06, "loss": 0.8112, "step": 7063 }, { "epoch": 1.270430639216039, "grad_norm": 1.4510163068771362, "learning_rate": 8.740562557070125e-06, "loss": 0.7125, "step": 7064 }, { "epoch": 1.2706104468219006, "grad_norm": 1.356557011604309, "learning_rate": 8.740176051787331e-06, "loss": 0.7479, "step": 7065 }, { "epoch": 1.2707902544277623, "grad_norm": 1.6238396167755127, "learning_rate": 8.739789495755254e-06, "loss": 0.7247, "step": 7066 }, { "epoch": 1.270970062033624, "grad_norm": 1.4422087669372559, "learning_rate": 8.73940288897914e-06, "loss": 0.7756, "step": 7067 }, { "epoch": 1.2711498696394856, "grad_norm": 1.7497751712799072, "learning_rate": 8.739016231464237e-06, "loss": 0.6954, "step": 7068 }, { "epoch": 1.2713296772453475, "grad_norm": 1.5048203468322754, "learning_rate": 8.738629523215791e-06, "loss": 0.7503, "step": 7069 }, { "epoch": 1.2715094848512092, "grad_norm": 2.178605794906616, "learning_rate": 8.738242764239046e-06, "loss": 0.7205, "step": 7070 }, { "epoch": 1.2716892924570709, "grad_norm": 1.5039716958999634, "learning_rate": 8.737855954539252e-06, "loss": 0.6702, "step": 7071 }, { "epoch": 1.2718691000629327, "grad_norm": 1.434196949005127, "learning_rate": 8.737469094121658e-06, "loss": 0.9407, "step": 7072 }, { "epoch": 1.2720489076687944, "grad_norm": 1.3764455318450928, "learning_rate": 8.737082182991513e-06, "loss": 0.7712, "step": 7073 }, { "epoch": 1.272228715274656, "grad_norm": 1.5229237079620361, "learning_rate": 8.736695221154063e-06, "loss": 0.8236, "step": 7074 }, { "epoch": 1.272408522880518, "grad_norm": 1.505242943763733, "learning_rate": 8.736308208614565e-06, "loss": 0.7447, "step": 7075 }, { "epoch": 1.2725883304863796, "grad_norm": 1.3969818353652954, "learning_rate": 8.735921145378265e-06, "loss": 0.7086, "step": 7076 }, { "epoch": 1.2727681380922413, "grad_norm": 1.1849098205566406, "learning_rate": 8.735534031450419e-06, "loss": 0.9526, "step": 7077 }, { "epoch": 1.272947945698103, "grad_norm": 1.7466994524002075, "learning_rate": 8.735146866836277e-06, "loss": 0.7746, "step": 7078 }, { "epoch": 1.2731277533039647, "grad_norm": 1.4214736223220825, "learning_rate": 8.734759651541093e-06, "loss": 0.7364, "step": 7079 }, { "epoch": 1.2733075609098266, "grad_norm": 1.493538737297058, "learning_rate": 8.734372385570122e-06, "loss": 0.693, "step": 7080 }, { "epoch": 1.2734873685156882, "grad_norm": 1.5692750215530396, "learning_rate": 8.733985068928616e-06, "loss": 0.7069, "step": 7081 }, { "epoch": 1.27366717612155, "grad_norm": 1.5500357151031494, "learning_rate": 8.733597701621835e-06, "loss": 0.7461, "step": 7082 }, { "epoch": 1.2738469837274118, "grad_norm": 1.571799874305725, "learning_rate": 8.733210283655029e-06, "loss": 0.7889, "step": 7083 }, { "epoch": 1.2740267913332735, "grad_norm": 1.171385407447815, "learning_rate": 8.73282281503346e-06, "loss": 0.9549, "step": 7084 }, { "epoch": 1.2742065989391351, "grad_norm": 1.4481186866760254, "learning_rate": 8.73243529576238e-06, "loss": 0.7481, "step": 7085 }, { "epoch": 1.2743864065449968, "grad_norm": 1.4784064292907715, "learning_rate": 8.732047725847055e-06, "loss": 0.7944, "step": 7086 }, { "epoch": 1.2745662141508585, "grad_norm": 1.1609898805618286, "learning_rate": 8.731660105292738e-06, "loss": 0.9236, "step": 7087 }, { "epoch": 1.2747460217567204, "grad_norm": 1.4730638265609741, "learning_rate": 8.731272434104688e-06, "loss": 0.763, "step": 7088 }, { "epoch": 1.274925829362582, "grad_norm": 1.2260040044784546, "learning_rate": 8.730884712288168e-06, "loss": 0.9354, "step": 7089 }, { "epoch": 1.2751056369684437, "grad_norm": 1.5613590478897095, "learning_rate": 8.730496939848439e-06, "loss": 0.7509, "step": 7090 }, { "epoch": 1.2752854445743056, "grad_norm": 1.4638558626174927, "learning_rate": 8.73010911679076e-06, "loss": 0.7966, "step": 7091 }, { "epoch": 1.2754652521801673, "grad_norm": 1.4225547313690186, "learning_rate": 8.729721243120395e-06, "loss": 0.7816, "step": 7092 }, { "epoch": 1.275645059786029, "grad_norm": 1.4656517505645752, "learning_rate": 8.729333318842608e-06, "loss": 0.7086, "step": 7093 }, { "epoch": 1.2758248673918906, "grad_norm": 1.5518187284469604, "learning_rate": 8.72894534396266e-06, "loss": 0.7664, "step": 7094 }, { "epoch": 1.2760046749977523, "grad_norm": 1.4510947465896606, "learning_rate": 8.728557318485815e-06, "loss": 0.7288, "step": 7095 }, { "epoch": 1.2761844826036142, "grad_norm": 1.473200798034668, "learning_rate": 8.728169242417342e-06, "loss": 0.7884, "step": 7096 }, { "epoch": 1.2763642902094758, "grad_norm": 1.4981857538223267, "learning_rate": 8.727781115762503e-06, "loss": 0.7482, "step": 7097 }, { "epoch": 1.2765440978153375, "grad_norm": 1.20177161693573, "learning_rate": 8.727392938526567e-06, "loss": 0.9428, "step": 7098 }, { "epoch": 1.2767239054211994, "grad_norm": 1.4642215967178345, "learning_rate": 8.7270047107148e-06, "loss": 0.7811, "step": 7099 }, { "epoch": 1.276903713027061, "grad_norm": 1.0574102401733398, "learning_rate": 8.726616432332466e-06, "loss": 0.9623, "step": 7100 }, { "epoch": 1.2770835206329227, "grad_norm": 1.6032060384750366, "learning_rate": 8.72622810338484e-06, "loss": 0.7306, "step": 7101 }, { "epoch": 1.2772633282387846, "grad_norm": 1.114292860031128, "learning_rate": 8.725839723877188e-06, "loss": 0.9384, "step": 7102 }, { "epoch": 1.2774431358446463, "grad_norm": 1.0868263244628906, "learning_rate": 8.725451293814778e-06, "loss": 0.9719, "step": 7103 }, { "epoch": 1.277622943450508, "grad_norm": 1.613405466079712, "learning_rate": 8.725062813202883e-06, "loss": 0.7492, "step": 7104 }, { "epoch": 1.2778027510563696, "grad_norm": 1.443616271018982, "learning_rate": 8.724674282046772e-06, "loss": 0.7093, "step": 7105 }, { "epoch": 1.2779825586622313, "grad_norm": 1.6268419027328491, "learning_rate": 8.72428570035172e-06, "loss": 0.7383, "step": 7106 }, { "epoch": 1.2781623662680932, "grad_norm": 1.4850226640701294, "learning_rate": 8.723897068122999e-06, "loss": 0.756, "step": 7107 }, { "epoch": 1.2783421738739549, "grad_norm": 1.437429666519165, "learning_rate": 8.72350838536588e-06, "loss": 0.6906, "step": 7108 }, { "epoch": 1.2785219814798165, "grad_norm": 1.4385231733322144, "learning_rate": 8.723119652085636e-06, "loss": 0.7092, "step": 7109 }, { "epoch": 1.2787017890856784, "grad_norm": 1.4265727996826172, "learning_rate": 8.722730868287546e-06, "loss": 0.7054, "step": 7110 }, { "epoch": 1.2788815966915401, "grad_norm": 1.1789093017578125, "learning_rate": 8.722342033976881e-06, "loss": 0.8999, "step": 7111 }, { "epoch": 1.2790614042974018, "grad_norm": 1.3003385066986084, "learning_rate": 8.721953149158921e-06, "loss": 0.9709, "step": 7112 }, { "epoch": 1.2792412119032635, "grad_norm": 1.5248874425888062, "learning_rate": 8.721564213838937e-06, "loss": 0.7554, "step": 7113 }, { "epoch": 1.2794210195091251, "grad_norm": 1.495783805847168, "learning_rate": 8.721175228022213e-06, "loss": 0.7717, "step": 7114 }, { "epoch": 1.279600827114987, "grad_norm": 1.470354676246643, "learning_rate": 8.720786191714023e-06, "loss": 0.7613, "step": 7115 }, { "epoch": 1.2797806347208487, "grad_norm": 1.4772720336914062, "learning_rate": 8.720397104919647e-06, "loss": 0.7509, "step": 7116 }, { "epoch": 1.2799604423267104, "grad_norm": 1.4171372652053833, "learning_rate": 8.720007967644364e-06, "loss": 0.7594, "step": 7117 }, { "epoch": 1.2801402499325722, "grad_norm": 1.489606261253357, "learning_rate": 8.719618779893453e-06, "loss": 0.6924, "step": 7118 }, { "epoch": 1.280320057538434, "grad_norm": 1.5006901025772095, "learning_rate": 8.719229541672197e-06, "loss": 0.69, "step": 7119 }, { "epoch": 1.2804998651442956, "grad_norm": 1.4499858617782593, "learning_rate": 8.718840252985875e-06, "loss": 0.7729, "step": 7120 }, { "epoch": 1.2806796727501573, "grad_norm": 1.5471748113632202, "learning_rate": 8.71845091383977e-06, "loss": 0.7524, "step": 7121 }, { "epoch": 1.280859480356019, "grad_norm": 1.609373688697815, "learning_rate": 8.718061524239166e-06, "loss": 0.7627, "step": 7122 }, { "epoch": 1.2810392879618808, "grad_norm": 1.2499058246612549, "learning_rate": 8.717672084189345e-06, "loss": 0.9599, "step": 7123 }, { "epoch": 1.2812190955677425, "grad_norm": 1.2106459140777588, "learning_rate": 8.717282593695594e-06, "loss": 0.9291, "step": 7124 }, { "epoch": 1.2813989031736042, "grad_norm": 1.5496371984481812, "learning_rate": 8.716893052763194e-06, "loss": 0.7801, "step": 7125 }, { "epoch": 1.281578710779466, "grad_norm": 1.578634262084961, "learning_rate": 8.716503461397434e-06, "loss": 0.7547, "step": 7126 }, { "epoch": 1.2817585183853277, "grad_norm": 1.7098122835159302, "learning_rate": 8.716113819603596e-06, "loss": 0.6851, "step": 7127 }, { "epoch": 1.2819383259911894, "grad_norm": 1.4351825714111328, "learning_rate": 8.715724127386971e-06, "loss": 0.6749, "step": 7128 }, { "epoch": 1.2821181335970513, "grad_norm": 1.611590027809143, "learning_rate": 8.715334384752847e-06, "loss": 0.7736, "step": 7129 }, { "epoch": 1.282297941202913, "grad_norm": 1.4215242862701416, "learning_rate": 8.714944591706507e-06, "loss": 0.7369, "step": 7130 }, { "epoch": 1.2824777488087746, "grad_norm": 1.519802212715149, "learning_rate": 8.714554748253246e-06, "loss": 0.7643, "step": 7131 }, { "epoch": 1.2826575564146363, "grad_norm": 1.4465594291687012, "learning_rate": 8.71416485439835e-06, "loss": 0.9222, "step": 7132 }, { "epoch": 1.282837364020498, "grad_norm": 1.4797468185424805, "learning_rate": 8.71377491014711e-06, "loss": 0.7639, "step": 7133 }, { "epoch": 1.2830171716263599, "grad_norm": 1.188781976699829, "learning_rate": 8.713384915504817e-06, "loss": 0.9677, "step": 7134 }, { "epoch": 1.2831969792322215, "grad_norm": 1.4884803295135498, "learning_rate": 8.712994870476766e-06, "loss": 0.7364, "step": 7135 }, { "epoch": 1.2833767868380832, "grad_norm": 1.5504155158996582, "learning_rate": 8.712604775068243e-06, "loss": 0.7158, "step": 7136 }, { "epoch": 1.283556594443945, "grad_norm": 1.5295404195785522, "learning_rate": 8.712214629284547e-06, "loss": 0.7059, "step": 7137 }, { "epoch": 1.2837364020498068, "grad_norm": 1.6087684631347656, "learning_rate": 8.71182443313097e-06, "loss": 0.7184, "step": 7138 }, { "epoch": 1.2839162096556684, "grad_norm": 1.5451836585998535, "learning_rate": 8.711434186612802e-06, "loss": 0.8099, "step": 7139 }, { "epoch": 1.28409601726153, "grad_norm": 1.5927925109863281, "learning_rate": 8.711043889735345e-06, "loss": 0.7765, "step": 7140 }, { "epoch": 1.2842758248673918, "grad_norm": 1.6349270343780518, "learning_rate": 8.710653542503892e-06, "loss": 0.8268, "step": 7141 }, { "epoch": 1.2844556324732537, "grad_norm": 1.525866985321045, "learning_rate": 8.710263144923738e-06, "loss": 0.7695, "step": 7142 }, { "epoch": 1.2846354400791153, "grad_norm": 1.5746805667877197, "learning_rate": 8.709872697000183e-06, "loss": 0.7363, "step": 7143 }, { "epoch": 1.284815247684977, "grad_norm": 1.643647313117981, "learning_rate": 8.709482198738521e-06, "loss": 0.7353, "step": 7144 }, { "epoch": 1.284995055290839, "grad_norm": 1.5739326477050781, "learning_rate": 8.709091650144055e-06, "loss": 0.9704, "step": 7145 }, { "epoch": 1.2851748628967006, "grad_norm": 1.5186808109283447, "learning_rate": 8.708701051222081e-06, "loss": 0.7765, "step": 7146 }, { "epoch": 1.2853546705025622, "grad_norm": 1.5755764245986938, "learning_rate": 8.708310401977901e-06, "loss": 0.7205, "step": 7147 }, { "epoch": 1.285534478108424, "grad_norm": 1.5686043500900269, "learning_rate": 8.707919702416815e-06, "loss": 0.763, "step": 7148 }, { "epoch": 1.2857142857142856, "grad_norm": 1.6033666133880615, "learning_rate": 8.707528952544124e-06, "loss": 0.7278, "step": 7149 }, { "epoch": 1.2858940933201475, "grad_norm": 1.5371298789978027, "learning_rate": 8.70713815236513e-06, "loss": 0.6959, "step": 7150 }, { "epoch": 1.2860739009260092, "grad_norm": 1.3871246576309204, "learning_rate": 8.706747301885132e-06, "loss": 0.7345, "step": 7151 }, { "epoch": 1.2862537085318708, "grad_norm": 1.3381595611572266, "learning_rate": 8.70635640110944e-06, "loss": 0.6771, "step": 7152 }, { "epoch": 1.2864335161377327, "grad_norm": 1.5207957029342651, "learning_rate": 8.705965450043354e-06, "loss": 0.668, "step": 7153 }, { "epoch": 1.2866133237435944, "grad_norm": 1.4606128931045532, "learning_rate": 8.70557444869218e-06, "loss": 0.6858, "step": 7154 }, { "epoch": 1.286793131349456, "grad_norm": 1.399424433708191, "learning_rate": 8.705183397061223e-06, "loss": 0.6929, "step": 7155 }, { "epoch": 1.286972938955318, "grad_norm": 1.5362447500228882, "learning_rate": 8.70479229515579e-06, "loss": 0.8957, "step": 7156 }, { "epoch": 1.2871527465611796, "grad_norm": 1.4905461072921753, "learning_rate": 8.704401142981184e-06, "loss": 0.7354, "step": 7157 }, { "epoch": 1.2873325541670413, "grad_norm": 1.4514366388320923, "learning_rate": 8.70400994054272e-06, "loss": 0.6991, "step": 7158 }, { "epoch": 1.287512361772903, "grad_norm": 1.3979521989822388, "learning_rate": 8.703618687845697e-06, "loss": 0.7087, "step": 7159 }, { "epoch": 1.2876921693787646, "grad_norm": 1.5664565563201904, "learning_rate": 8.70322738489543e-06, "loss": 0.7409, "step": 7160 }, { "epoch": 1.2878719769846265, "grad_norm": 1.4963035583496094, "learning_rate": 8.702836031697224e-06, "loss": 0.7723, "step": 7161 }, { "epoch": 1.2880517845904882, "grad_norm": 1.5229475498199463, "learning_rate": 8.702444628256394e-06, "loss": 0.6857, "step": 7162 }, { "epoch": 1.2882315921963499, "grad_norm": 1.134736180305481, "learning_rate": 8.702053174578248e-06, "loss": 0.9705, "step": 7163 }, { "epoch": 1.2884113998022118, "grad_norm": 1.449476718902588, "learning_rate": 8.701661670668097e-06, "loss": 0.76, "step": 7164 }, { "epoch": 1.2885912074080734, "grad_norm": 1.4825685024261475, "learning_rate": 8.701270116531254e-06, "loss": 0.7114, "step": 7165 }, { "epoch": 1.288771015013935, "grad_norm": 1.0558388233184814, "learning_rate": 8.700878512173034e-06, "loss": 0.949, "step": 7166 }, { "epoch": 1.2889508226197968, "grad_norm": 1.5504481792449951, "learning_rate": 8.700486857598749e-06, "loss": 0.7138, "step": 7167 }, { "epoch": 1.2891306302256584, "grad_norm": 1.612014651298523, "learning_rate": 8.700095152813712e-06, "loss": 0.7082, "step": 7168 }, { "epoch": 1.2893104378315203, "grad_norm": 1.0473816394805908, "learning_rate": 8.699703397823238e-06, "loss": 0.9605, "step": 7169 }, { "epoch": 1.289490245437382, "grad_norm": 1.6145607233047485, "learning_rate": 8.699311592632644e-06, "loss": 0.6955, "step": 7170 }, { "epoch": 1.2896700530432437, "grad_norm": 1.1482739448547363, "learning_rate": 8.698919737247246e-06, "loss": 0.9549, "step": 7171 }, { "epoch": 1.2898498606491056, "grad_norm": 1.049533486366272, "learning_rate": 8.69852783167236e-06, "loss": 0.9225, "step": 7172 }, { "epoch": 1.2900296682549672, "grad_norm": 1.4806119203567505, "learning_rate": 8.698135875913304e-06, "loss": 0.7505, "step": 7173 }, { "epoch": 1.290209475860829, "grad_norm": 1.4951425790786743, "learning_rate": 8.697743869975398e-06, "loss": 0.7233, "step": 7174 }, { "epoch": 1.2903892834666906, "grad_norm": 1.3791613578796387, "learning_rate": 8.697351813863959e-06, "loss": 0.7488, "step": 7175 }, { "epoch": 1.2905690910725522, "grad_norm": 1.489937424659729, "learning_rate": 8.696959707584307e-06, "loss": 0.7706, "step": 7176 }, { "epoch": 1.2907488986784141, "grad_norm": 1.5765377283096313, "learning_rate": 8.696567551141764e-06, "loss": 0.7897, "step": 7177 }, { "epoch": 1.2909287062842758, "grad_norm": 1.411553978919983, "learning_rate": 8.696175344541647e-06, "loss": 0.7956, "step": 7178 }, { "epoch": 1.2911085138901375, "grad_norm": 1.4635963439941406, "learning_rate": 8.695783087789282e-06, "loss": 0.6898, "step": 7179 }, { "epoch": 1.2912883214959994, "grad_norm": 1.5768516063690186, "learning_rate": 8.69539078088999e-06, "loss": 0.7173, "step": 7180 }, { "epoch": 1.291468129101861, "grad_norm": 1.4846028089523315, "learning_rate": 8.694998423849095e-06, "loss": 0.7335, "step": 7181 }, { "epoch": 1.2916479367077227, "grad_norm": 1.4393879175186157, "learning_rate": 8.694606016671919e-06, "loss": 0.6971, "step": 7182 }, { "epoch": 1.2918277443135846, "grad_norm": 1.5261372327804565, "learning_rate": 8.694213559363785e-06, "loss": 0.7583, "step": 7183 }, { "epoch": 1.2920075519194463, "grad_norm": 1.5431373119354248, "learning_rate": 8.693821051930022e-06, "loss": 0.6913, "step": 7184 }, { "epoch": 1.292187359525308, "grad_norm": 1.5813466310501099, "learning_rate": 8.693428494375955e-06, "loss": 0.7548, "step": 7185 }, { "epoch": 1.2923671671311696, "grad_norm": 1.6232048273086548, "learning_rate": 8.693035886706909e-06, "loss": 0.7521, "step": 7186 }, { "epoch": 1.2925469747370313, "grad_norm": 1.515769600868225, "learning_rate": 8.692643228928211e-06, "loss": 0.7418, "step": 7187 }, { "epoch": 1.2927267823428932, "grad_norm": 1.5964550971984863, "learning_rate": 8.692250521045192e-06, "loss": 0.8018, "step": 7188 }, { "epoch": 1.2929065899487548, "grad_norm": 1.5227307081222534, "learning_rate": 8.691857763063176e-06, "loss": 0.6962, "step": 7189 }, { "epoch": 1.2930863975546165, "grad_norm": 1.448410153388977, "learning_rate": 8.691464954987494e-06, "loss": 0.7367, "step": 7190 }, { "epoch": 1.2932662051604784, "grad_norm": 1.5748475790023804, "learning_rate": 8.691072096823478e-06, "loss": 0.765, "step": 7191 }, { "epoch": 1.29344601276634, "grad_norm": 1.504902720451355, "learning_rate": 8.690679188576455e-06, "loss": 0.6874, "step": 7192 }, { "epoch": 1.2936258203722018, "grad_norm": 1.58304762840271, "learning_rate": 8.690286230251758e-06, "loss": 0.7788, "step": 7193 }, { "epoch": 1.2938056279780634, "grad_norm": 1.459402322769165, "learning_rate": 8.689893221854721e-06, "loss": 0.7826, "step": 7194 }, { "epoch": 1.293985435583925, "grad_norm": 1.483452558517456, "learning_rate": 8.689500163390674e-06, "loss": 0.7758, "step": 7195 }, { "epoch": 1.294165243189787, "grad_norm": 1.4754691123962402, "learning_rate": 8.68910705486495e-06, "loss": 0.7436, "step": 7196 }, { "epoch": 1.2943450507956487, "grad_norm": 1.6378988027572632, "learning_rate": 8.688713896282886e-06, "loss": 0.7277, "step": 7197 }, { "epoch": 1.2945248584015103, "grad_norm": 1.5370961427688599, "learning_rate": 8.688320687649811e-06, "loss": 0.7552, "step": 7198 }, { "epoch": 1.2947046660073722, "grad_norm": 1.4666588306427002, "learning_rate": 8.687927428971065e-06, "loss": 0.735, "step": 7199 }, { "epoch": 1.2948844736132339, "grad_norm": 1.5307425260543823, "learning_rate": 8.687534120251986e-06, "loss": 0.9694, "step": 7200 }, { "epoch": 1.2950642812190956, "grad_norm": 1.391980528831482, "learning_rate": 8.687140761497905e-06, "loss": 0.7059, "step": 7201 }, { "epoch": 1.2952440888249572, "grad_norm": 1.4739707708358765, "learning_rate": 8.686747352714161e-06, "loss": 0.71, "step": 7202 }, { "epoch": 1.295423896430819, "grad_norm": 1.5196857452392578, "learning_rate": 8.686353893906094e-06, "loss": 0.7376, "step": 7203 }, { "epoch": 1.2956037040366808, "grad_norm": 1.5581600666046143, "learning_rate": 8.685960385079042e-06, "loss": 0.7697, "step": 7204 }, { "epoch": 1.2957835116425425, "grad_norm": 1.5051045417785645, "learning_rate": 8.685566826238345e-06, "loss": 0.675, "step": 7205 }, { "epoch": 1.2959633192484041, "grad_norm": 1.0463974475860596, "learning_rate": 8.68517321738934e-06, "loss": 0.9394, "step": 7206 }, { "epoch": 1.296143126854266, "grad_norm": 1.5362521409988403, "learning_rate": 8.68477955853737e-06, "loss": 0.753, "step": 7207 }, { "epoch": 1.2963229344601277, "grad_norm": 1.4862940311431885, "learning_rate": 8.684385849687777e-06, "loss": 0.6874, "step": 7208 }, { "epoch": 1.2965027420659894, "grad_norm": 1.589437484741211, "learning_rate": 8.683992090845903e-06, "loss": 0.6834, "step": 7209 }, { "epoch": 1.296682549671851, "grad_norm": 1.4439928531646729, "learning_rate": 8.68359828201709e-06, "loss": 0.7181, "step": 7210 }, { "epoch": 1.296862357277713, "grad_norm": 1.4386311769485474, "learning_rate": 8.683204423206681e-06, "loss": 0.7893, "step": 7211 }, { "epoch": 1.2970421648835746, "grad_norm": 1.4276155233383179, "learning_rate": 8.68281051442002e-06, "loss": 0.7557, "step": 7212 }, { "epoch": 1.2972219724894363, "grad_norm": 1.4621244668960571, "learning_rate": 8.682416555662457e-06, "loss": 0.7378, "step": 7213 }, { "epoch": 1.297401780095298, "grad_norm": 1.6170684099197388, "learning_rate": 8.682022546939328e-06, "loss": 0.6609, "step": 7214 }, { "epoch": 1.2975815877011598, "grad_norm": 1.4483548402786255, "learning_rate": 8.681628488255986e-06, "loss": 0.7198, "step": 7215 }, { "epoch": 1.2977613953070215, "grad_norm": 1.6017799377441406, "learning_rate": 8.681234379617777e-06, "loss": 0.736, "step": 7216 }, { "epoch": 1.2979412029128832, "grad_norm": 1.5357741117477417, "learning_rate": 8.680840221030049e-06, "loss": 0.7321, "step": 7217 }, { "epoch": 1.298121010518745, "grad_norm": 1.5448766946792603, "learning_rate": 8.680446012498147e-06, "loss": 0.7017, "step": 7218 }, { "epoch": 1.2983008181246067, "grad_norm": 1.4918677806854248, "learning_rate": 8.680051754027421e-06, "loss": 0.773, "step": 7219 }, { "epoch": 1.2984806257304684, "grad_norm": 1.5652731657028198, "learning_rate": 8.679657445623224e-06, "loss": 0.7679, "step": 7220 }, { "epoch": 1.29866043333633, "grad_norm": 1.4393386840820312, "learning_rate": 8.679263087290903e-06, "loss": 0.7207, "step": 7221 }, { "epoch": 1.2988402409421917, "grad_norm": 1.356435775756836, "learning_rate": 8.678868679035807e-06, "loss": 0.9852, "step": 7222 }, { "epoch": 1.2990200485480536, "grad_norm": 1.7063039541244507, "learning_rate": 8.678474220863293e-06, "loss": 0.7319, "step": 7223 }, { "epoch": 1.2991998561539153, "grad_norm": 1.424108862876892, "learning_rate": 8.678079712778711e-06, "loss": 0.7417, "step": 7224 }, { "epoch": 1.299379663759777, "grad_norm": 1.4555460214614868, "learning_rate": 8.677685154787411e-06, "loss": 0.7563, "step": 7225 }, { "epoch": 1.2995594713656389, "grad_norm": 1.5540878772735596, "learning_rate": 8.67729054689475e-06, "loss": 0.7514, "step": 7226 }, { "epoch": 1.2997392789715005, "grad_norm": 1.5327054262161255, "learning_rate": 8.676895889106083e-06, "loss": 0.743, "step": 7227 }, { "epoch": 1.2999190865773622, "grad_norm": 1.5437120199203491, "learning_rate": 8.676501181426761e-06, "loss": 0.6682, "step": 7228 }, { "epoch": 1.3000988941832239, "grad_norm": 1.470978856086731, "learning_rate": 8.676106423862142e-06, "loss": 0.6953, "step": 7229 }, { "epoch": 1.3002787017890856, "grad_norm": 1.4401159286499023, "learning_rate": 8.675711616417584e-06, "loss": 0.7499, "step": 7230 }, { "epoch": 1.3004585093949474, "grad_norm": 1.1832389831542969, "learning_rate": 8.675316759098442e-06, "loss": 0.9822, "step": 7231 }, { "epoch": 1.3006383170008091, "grad_norm": 1.439914345741272, "learning_rate": 8.674921851910075e-06, "loss": 0.7608, "step": 7232 }, { "epoch": 1.3008181246066708, "grad_norm": 1.1146490573883057, "learning_rate": 8.674526894857838e-06, "loss": 0.9488, "step": 7233 }, { "epoch": 1.3009979322125327, "grad_norm": 1.5599058866500854, "learning_rate": 8.674131887947095e-06, "loss": 0.7575, "step": 7234 }, { "epoch": 1.3011777398183944, "grad_norm": 1.750105857849121, "learning_rate": 8.673736831183202e-06, "loss": 0.7194, "step": 7235 }, { "epoch": 1.301357547424256, "grad_norm": 1.4335353374481201, "learning_rate": 8.67334172457152e-06, "loss": 0.7063, "step": 7236 }, { "epoch": 1.3015373550301177, "grad_norm": 1.440724492073059, "learning_rate": 8.67294656811741e-06, "loss": 0.7087, "step": 7237 }, { "epoch": 1.3017171626359794, "grad_norm": 1.4378762245178223, "learning_rate": 8.672551361826237e-06, "loss": 0.7307, "step": 7238 }, { "epoch": 1.3018969702418413, "grad_norm": 1.475488305091858, "learning_rate": 8.67215610570336e-06, "loss": 0.7083, "step": 7239 }, { "epoch": 1.302076777847703, "grad_norm": 1.4112457036972046, "learning_rate": 8.671760799754143e-06, "loss": 0.6766, "step": 7240 }, { "epoch": 1.3022565854535646, "grad_norm": 1.481500506401062, "learning_rate": 8.67136544398395e-06, "loss": 0.7588, "step": 7241 }, { "epoch": 1.3024363930594265, "grad_norm": 1.1628823280334473, "learning_rate": 8.670970038398145e-06, "loss": 0.9566, "step": 7242 }, { "epoch": 1.3026162006652882, "grad_norm": 1.599638819694519, "learning_rate": 8.670574583002093e-06, "loss": 0.8075, "step": 7243 }, { "epoch": 1.3027960082711498, "grad_norm": 1.4628077745437622, "learning_rate": 8.67017907780116e-06, "loss": 0.7513, "step": 7244 }, { "epoch": 1.3029758158770117, "grad_norm": 1.4868088960647583, "learning_rate": 8.669783522800714e-06, "loss": 0.7517, "step": 7245 }, { "epoch": 1.3031556234828734, "grad_norm": 1.4853419065475464, "learning_rate": 8.66938791800612e-06, "loss": 0.7616, "step": 7246 }, { "epoch": 1.303335431088735, "grad_norm": 1.4563018083572388, "learning_rate": 8.668992263422746e-06, "loss": 0.7831, "step": 7247 }, { "epoch": 1.3035152386945967, "grad_norm": 1.7056106328964233, "learning_rate": 8.668596559055963e-06, "loss": 0.7414, "step": 7248 }, { "epoch": 1.3036950463004584, "grad_norm": 1.5484758615493774, "learning_rate": 8.668200804911138e-06, "loss": 0.7722, "step": 7249 }, { "epoch": 1.3038748539063203, "grad_norm": 1.541368007659912, "learning_rate": 8.66780500099364e-06, "loss": 0.7928, "step": 7250 }, { "epoch": 1.304054661512182, "grad_norm": 1.509916067123413, "learning_rate": 8.66740914730884e-06, "loss": 0.7551, "step": 7251 }, { "epoch": 1.3042344691180436, "grad_norm": 1.5168951749801636, "learning_rate": 8.667013243862113e-06, "loss": 0.7128, "step": 7252 }, { "epoch": 1.3044142767239055, "grad_norm": 1.5474741458892822, "learning_rate": 8.666617290658825e-06, "loss": 0.6937, "step": 7253 }, { "epoch": 1.3045940843297672, "grad_norm": 1.5139708518981934, "learning_rate": 8.666221287704354e-06, "loss": 0.9356, "step": 7254 }, { "epoch": 1.3047738919356289, "grad_norm": 1.4493056535720825, "learning_rate": 8.66582523500407e-06, "loss": 0.7393, "step": 7255 }, { "epoch": 1.3049536995414905, "grad_norm": 1.357528805732727, "learning_rate": 8.665429132563346e-06, "loss": 0.9566, "step": 7256 }, { "epoch": 1.3051335071473522, "grad_norm": 1.7029588222503662, "learning_rate": 8.66503298038756e-06, "loss": 0.7297, "step": 7257 }, { "epoch": 1.305313314753214, "grad_norm": 1.1012738943099976, "learning_rate": 8.664636778482085e-06, "loss": 0.946, "step": 7258 }, { "epoch": 1.3054931223590758, "grad_norm": 1.6249912977218628, "learning_rate": 8.664240526852296e-06, "loss": 0.8172, "step": 7259 }, { "epoch": 1.3056729299649374, "grad_norm": 1.509821891784668, "learning_rate": 8.663844225503573e-06, "loss": 0.7461, "step": 7260 }, { "epoch": 1.3058527375707993, "grad_norm": 1.5831937789916992, "learning_rate": 8.663447874441291e-06, "loss": 0.749, "step": 7261 }, { "epoch": 1.306032545176661, "grad_norm": 1.6657752990722656, "learning_rate": 8.663051473670829e-06, "loss": 0.7753, "step": 7262 }, { "epoch": 1.3062123527825227, "grad_norm": 1.4809404611587524, "learning_rate": 8.662655023197562e-06, "loss": 0.7244, "step": 7263 }, { "epoch": 1.3063921603883843, "grad_norm": 1.4832663536071777, "learning_rate": 8.662258523026873e-06, "loss": 0.7011, "step": 7264 }, { "epoch": 1.306571967994246, "grad_norm": 1.5363134145736694, "learning_rate": 8.661861973164143e-06, "loss": 0.714, "step": 7265 }, { "epoch": 1.306751775600108, "grad_norm": 1.6000968217849731, "learning_rate": 8.661465373614752e-06, "loss": 0.7208, "step": 7266 }, { "epoch": 1.3069315832059696, "grad_norm": 1.4808539152145386, "learning_rate": 8.661068724384077e-06, "loss": 0.7299, "step": 7267 }, { "epoch": 1.3071113908118313, "grad_norm": 1.622326135635376, "learning_rate": 8.660672025477506e-06, "loss": 0.7295, "step": 7268 }, { "epoch": 1.3072911984176931, "grad_norm": 1.4967628717422485, "learning_rate": 8.660275276900416e-06, "loss": 0.708, "step": 7269 }, { "epoch": 1.3074710060235548, "grad_norm": 1.5160185098648071, "learning_rate": 8.659878478658196e-06, "loss": 0.7293, "step": 7270 }, { "epoch": 1.3076508136294165, "grad_norm": 1.0870189666748047, "learning_rate": 8.659481630756225e-06, "loss": 0.9426, "step": 7271 }, { "epoch": 1.3078306212352784, "grad_norm": 1.721222996711731, "learning_rate": 8.659084733199892e-06, "loss": 0.7747, "step": 7272 }, { "epoch": 1.30801042884114, "grad_norm": 1.5510585308074951, "learning_rate": 8.658687785994579e-06, "loss": 0.8105, "step": 7273 }, { "epoch": 1.3081902364470017, "grad_norm": 1.4791417121887207, "learning_rate": 8.658290789145673e-06, "loss": 0.6608, "step": 7274 }, { "epoch": 1.3083700440528634, "grad_norm": 1.526164174079895, "learning_rate": 8.657893742658562e-06, "loss": 0.7231, "step": 7275 }, { "epoch": 1.308549851658725, "grad_norm": 1.442312479019165, "learning_rate": 8.657496646538635e-06, "loss": 0.7808, "step": 7276 }, { "epoch": 1.308729659264587, "grad_norm": 1.0763078927993774, "learning_rate": 8.657099500791275e-06, "loss": 0.969, "step": 7277 }, { "epoch": 1.3089094668704486, "grad_norm": 1.5562578439712524, "learning_rate": 8.656702305421873e-06, "loss": 0.6715, "step": 7278 }, { "epoch": 1.3090892744763103, "grad_norm": 1.4293133020401, "learning_rate": 8.65630506043582e-06, "loss": 0.7765, "step": 7279 }, { "epoch": 1.3092690820821722, "grad_norm": 1.4338985681533813, "learning_rate": 8.655907765838506e-06, "loss": 0.6991, "step": 7280 }, { "epoch": 1.3094488896880339, "grad_norm": 1.622463345527649, "learning_rate": 8.655510421635318e-06, "loss": 0.7453, "step": 7281 }, { "epoch": 1.3096286972938955, "grad_norm": 1.5037550926208496, "learning_rate": 8.655113027831651e-06, "loss": 0.7328, "step": 7282 }, { "epoch": 1.3098085048997572, "grad_norm": 1.08219313621521, "learning_rate": 8.654715584432896e-06, "loss": 0.9703, "step": 7283 }, { "epoch": 1.3099883125056189, "grad_norm": 1.4782812595367432, "learning_rate": 8.654318091444447e-06, "loss": 0.7667, "step": 7284 }, { "epoch": 1.3101681201114808, "grad_norm": 1.4857157468795776, "learning_rate": 8.653920548871695e-06, "loss": 0.7297, "step": 7285 }, { "epoch": 1.3103479277173424, "grad_norm": 1.4807500839233398, "learning_rate": 8.653522956720037e-06, "loss": 0.7024, "step": 7286 }, { "epoch": 1.310527735323204, "grad_norm": 1.4509011507034302, "learning_rate": 8.653125314994865e-06, "loss": 0.7662, "step": 7287 }, { "epoch": 1.310707542929066, "grad_norm": 1.44890296459198, "learning_rate": 8.652727623701577e-06, "loss": 0.7833, "step": 7288 }, { "epoch": 1.3108873505349277, "grad_norm": 1.4097880125045776, "learning_rate": 8.65232988284557e-06, "loss": 0.675, "step": 7289 }, { "epoch": 1.3110671581407893, "grad_norm": 1.5033323764801025, "learning_rate": 8.651932092432235e-06, "loss": 0.7311, "step": 7290 }, { "epoch": 1.311246965746651, "grad_norm": 1.3948843479156494, "learning_rate": 8.651534252466975e-06, "loss": 0.7038, "step": 7291 }, { "epoch": 1.3114267733525127, "grad_norm": 1.5345808267593384, "learning_rate": 8.651136362955186e-06, "loss": 0.7421, "step": 7292 }, { "epoch": 1.3116065809583746, "grad_norm": 1.4354249238967896, "learning_rate": 8.650738423902269e-06, "loss": 0.7441, "step": 7293 }, { "epoch": 1.3117863885642362, "grad_norm": 2.0393028259277344, "learning_rate": 8.65034043531362e-06, "loss": 0.7263, "step": 7294 }, { "epoch": 1.311966196170098, "grad_norm": 1.4482133388519287, "learning_rate": 8.649942397194642e-06, "loss": 0.7433, "step": 7295 }, { "epoch": 1.3121460037759598, "grad_norm": 1.5256640911102295, "learning_rate": 8.649544309550735e-06, "loss": 0.7154, "step": 7296 }, { "epoch": 1.3123258113818215, "grad_norm": 1.4890949726104736, "learning_rate": 8.649146172387299e-06, "loss": 0.8192, "step": 7297 }, { "epoch": 1.3125056189876831, "grad_norm": 1.5827901363372803, "learning_rate": 8.64874798570974e-06, "loss": 0.7345, "step": 7298 }, { "epoch": 1.312685426593545, "grad_norm": 1.5976780652999878, "learning_rate": 8.648349749523457e-06, "loss": 0.708, "step": 7299 }, { "epoch": 1.3128652341994067, "grad_norm": 1.1124399900436401, "learning_rate": 8.647951463833855e-06, "loss": 0.9523, "step": 7300 }, { "epoch": 1.3130450418052684, "grad_norm": 1.9273737668991089, "learning_rate": 8.647553128646337e-06, "loss": 0.6842, "step": 7301 }, { "epoch": 1.31322484941113, "grad_norm": 1.4958926439285278, "learning_rate": 8.64715474396631e-06, "loss": 0.7024, "step": 7302 }, { "epoch": 1.3134046570169917, "grad_norm": 1.516655683517456, "learning_rate": 8.64675630979918e-06, "loss": 0.7369, "step": 7303 }, { "epoch": 1.3135844646228536, "grad_norm": 1.4952025413513184, "learning_rate": 8.646357826150351e-06, "loss": 0.7606, "step": 7304 }, { "epoch": 1.3137642722287153, "grad_norm": 1.581733226776123, "learning_rate": 8.645959293025232e-06, "loss": 0.7058, "step": 7305 }, { "epoch": 1.313944079834577, "grad_norm": 1.4632395505905151, "learning_rate": 8.645560710429228e-06, "loss": 0.688, "step": 7306 }, { "epoch": 1.3141238874404388, "grad_norm": 1.4518400430679321, "learning_rate": 8.64516207836775e-06, "loss": 0.7825, "step": 7307 }, { "epoch": 1.3143036950463005, "grad_norm": 1.5744054317474365, "learning_rate": 8.644763396846202e-06, "loss": 0.7601, "step": 7308 }, { "epoch": 1.3144835026521622, "grad_norm": 1.4343633651733398, "learning_rate": 8.644364665870003e-06, "loss": 0.758, "step": 7309 }, { "epoch": 1.3146633102580239, "grad_norm": 1.5358285903930664, "learning_rate": 8.643965885444551e-06, "loss": 0.7741, "step": 7310 }, { "epoch": 1.3148431178638855, "grad_norm": 1.5946109294891357, "learning_rate": 8.643567055575268e-06, "loss": 0.787, "step": 7311 }, { "epoch": 1.3150229254697474, "grad_norm": 1.5395835638046265, "learning_rate": 8.64316817626756e-06, "loss": 0.8005, "step": 7312 }, { "epoch": 1.315202733075609, "grad_norm": 1.497636079788208, "learning_rate": 8.642769247526839e-06, "loss": 0.7926, "step": 7313 }, { "epoch": 1.3153825406814708, "grad_norm": 1.6150778532028198, "learning_rate": 8.64237026935852e-06, "loss": 0.7505, "step": 7314 }, { "epoch": 1.3155623482873327, "grad_norm": 1.527493953704834, "learning_rate": 8.641971241768015e-06, "loss": 0.734, "step": 7315 }, { "epoch": 1.3157421558931943, "grad_norm": 1.534268856048584, "learning_rate": 8.641572164760738e-06, "loss": 0.7894, "step": 7316 }, { "epoch": 1.315921963499056, "grad_norm": 1.3215528726577759, "learning_rate": 8.641173038342107e-06, "loss": 0.6455, "step": 7317 }, { "epoch": 1.3161017711049177, "grad_norm": 1.5522078275680542, "learning_rate": 8.640773862517536e-06, "loss": 0.717, "step": 7318 }, { "epoch": 1.3162815787107793, "grad_norm": 1.1149407625198364, "learning_rate": 8.64037463729244e-06, "loss": 0.9845, "step": 7319 }, { "epoch": 1.3164613863166412, "grad_norm": 1.3651493787765503, "learning_rate": 8.639975362672235e-06, "loss": 0.6861, "step": 7320 }, { "epoch": 1.316641193922503, "grad_norm": 1.2172142267227173, "learning_rate": 8.639576038662343e-06, "loss": 0.9478, "step": 7321 }, { "epoch": 1.3168210015283646, "grad_norm": 1.0712863206863403, "learning_rate": 8.639176665268179e-06, "loss": 0.9308, "step": 7322 }, { "epoch": 1.3170008091342265, "grad_norm": 1.5071055889129639, "learning_rate": 8.638777242495162e-06, "loss": 0.6851, "step": 7323 }, { "epoch": 1.3171806167400881, "grad_norm": 1.4739149808883667, "learning_rate": 8.638377770348714e-06, "loss": 0.7146, "step": 7324 }, { "epoch": 1.3173604243459498, "grad_norm": 1.4928488731384277, "learning_rate": 8.63797824883425e-06, "loss": 0.7685, "step": 7325 }, { "epoch": 1.3175402319518117, "grad_norm": 1.4578489065170288, "learning_rate": 8.637578677957199e-06, "loss": 0.7737, "step": 7326 }, { "epoch": 1.3177200395576734, "grad_norm": 1.505803108215332, "learning_rate": 8.637179057722978e-06, "loss": 0.777, "step": 7327 }, { "epoch": 1.317899847163535, "grad_norm": 1.5570462942123413, "learning_rate": 8.636779388137008e-06, "loss": 0.8222, "step": 7328 }, { "epoch": 1.3180796547693967, "grad_norm": 1.6243253946304321, "learning_rate": 8.636379669204712e-06, "loss": 0.7926, "step": 7329 }, { "epoch": 1.3182594623752584, "grad_norm": 1.5114059448242188, "learning_rate": 8.63597990093152e-06, "loss": 0.7773, "step": 7330 }, { "epoch": 1.3184392699811203, "grad_norm": 1.401932716369629, "learning_rate": 8.635580083322847e-06, "loss": 0.7451, "step": 7331 }, { "epoch": 1.318619077586982, "grad_norm": 1.54363214969635, "learning_rate": 8.635180216384125e-06, "loss": 0.7234, "step": 7332 }, { "epoch": 1.3187988851928436, "grad_norm": 1.491527795791626, "learning_rate": 8.634780300120778e-06, "loss": 0.7393, "step": 7333 }, { "epoch": 1.3189786927987055, "grad_norm": 1.4489262104034424, "learning_rate": 8.634380334538231e-06, "loss": 0.6783, "step": 7334 }, { "epoch": 1.3191585004045672, "grad_norm": 1.6269640922546387, "learning_rate": 8.63398031964191e-06, "loss": 0.7581, "step": 7335 }, { "epoch": 1.3193383080104288, "grad_norm": 1.5529251098632812, "learning_rate": 8.633580255437246e-06, "loss": 0.7131, "step": 7336 }, { "epoch": 1.3195181156162905, "grad_norm": 1.3958475589752197, "learning_rate": 8.633180141929665e-06, "loss": 0.8871, "step": 7337 }, { "epoch": 1.3196979232221522, "grad_norm": 1.1616268157958984, "learning_rate": 8.632779979124597e-06, "loss": 0.9336, "step": 7338 }, { "epoch": 1.319877730828014, "grad_norm": 1.471308946609497, "learning_rate": 8.632379767027472e-06, "loss": 0.8232, "step": 7339 }, { "epoch": 1.3200575384338757, "grad_norm": 1.5185225009918213, "learning_rate": 8.63197950564372e-06, "loss": 0.6834, "step": 7340 }, { "epoch": 1.3202373460397374, "grad_norm": 1.5926337242126465, "learning_rate": 8.63157919497877e-06, "loss": 0.7696, "step": 7341 }, { "epoch": 1.3204171536455993, "grad_norm": 1.4748684167861938, "learning_rate": 8.631178835038057e-06, "loss": 0.7043, "step": 7342 }, { "epoch": 1.320596961251461, "grad_norm": 1.4391018152236938, "learning_rate": 8.63077842582701e-06, "loss": 0.7506, "step": 7343 }, { "epoch": 1.3207767688573226, "grad_norm": 1.4991694688796997, "learning_rate": 8.630377967351065e-06, "loss": 0.7793, "step": 7344 }, { "epoch": 1.3209565764631843, "grad_norm": 1.4931175708770752, "learning_rate": 8.629977459615655e-06, "loss": 0.7627, "step": 7345 }, { "epoch": 1.321136384069046, "grad_norm": 1.4553786516189575, "learning_rate": 8.629576902626214e-06, "loss": 0.7027, "step": 7346 }, { "epoch": 1.3213161916749079, "grad_norm": 1.490835428237915, "learning_rate": 8.629176296388175e-06, "loss": 0.7162, "step": 7347 }, { "epoch": 1.3214959992807696, "grad_norm": 1.7675637006759644, "learning_rate": 8.628775640906977e-06, "loss": 0.7088, "step": 7348 }, { "epoch": 1.3216758068866312, "grad_norm": 1.5564017295837402, "learning_rate": 8.628374936188055e-06, "loss": 0.7714, "step": 7349 }, { "epoch": 1.3218556144924931, "grad_norm": 1.339804768562317, "learning_rate": 8.627974182236846e-06, "loss": 0.7058, "step": 7350 }, { "epoch": 1.3220354220983548, "grad_norm": 1.5217502117156982, "learning_rate": 8.627573379058789e-06, "loss": 0.7205, "step": 7351 }, { "epoch": 1.3222152297042165, "grad_norm": 1.4224029779434204, "learning_rate": 8.62717252665932e-06, "loss": 0.7039, "step": 7352 }, { "epoch": 1.3223950373100783, "grad_norm": 1.4111970663070679, "learning_rate": 8.62677162504388e-06, "loss": 0.7255, "step": 7353 }, { "epoch": 1.32257484491594, "grad_norm": 1.4953175783157349, "learning_rate": 8.626370674217906e-06, "loss": 0.7552, "step": 7354 }, { "epoch": 1.3227546525218017, "grad_norm": 1.6243348121643066, "learning_rate": 8.62596967418684e-06, "loss": 0.7931, "step": 7355 }, { "epoch": 1.3229344601276634, "grad_norm": 1.4374008178710938, "learning_rate": 8.625568624956126e-06, "loss": 0.6825, "step": 7356 }, { "epoch": 1.323114267733525, "grad_norm": 1.4872626066207886, "learning_rate": 8.6251675265312e-06, "loss": 0.7526, "step": 7357 }, { "epoch": 1.323294075339387, "grad_norm": 1.4570385217666626, "learning_rate": 8.62476637891751e-06, "loss": 0.713, "step": 7358 }, { "epoch": 1.3234738829452486, "grad_norm": 1.4545071125030518, "learning_rate": 8.624365182120496e-06, "loss": 0.7147, "step": 7359 }, { "epoch": 1.3236536905511103, "grad_norm": 1.488663911819458, "learning_rate": 8.6239639361456e-06, "loss": 0.7217, "step": 7360 }, { "epoch": 1.3238334981569722, "grad_norm": 1.5607738494873047, "learning_rate": 8.62356264099827e-06, "loss": 0.6871, "step": 7361 }, { "epoch": 1.3240133057628338, "grad_norm": 1.4877349138259888, "learning_rate": 8.623161296683951e-06, "loss": 0.9771, "step": 7362 }, { "epoch": 1.3241931133686955, "grad_norm": 1.5150493383407593, "learning_rate": 8.622759903208085e-06, "loss": 0.7403, "step": 7363 }, { "epoch": 1.3243729209745572, "grad_norm": 1.5217865705490112, "learning_rate": 8.62235846057612e-06, "loss": 0.7386, "step": 7364 }, { "epoch": 1.3245527285804188, "grad_norm": 1.5014233589172363, "learning_rate": 8.621956968793506e-06, "loss": 0.6965, "step": 7365 }, { "epoch": 1.3247325361862807, "grad_norm": 1.4536486864089966, "learning_rate": 8.621555427865689e-06, "loss": 0.7345, "step": 7366 }, { "epoch": 1.3249123437921424, "grad_norm": 1.526364803314209, "learning_rate": 8.621153837798116e-06, "loss": 0.7746, "step": 7367 }, { "epoch": 1.325092151398004, "grad_norm": 1.538621425628662, "learning_rate": 8.620752198596235e-06, "loss": 0.7545, "step": 7368 }, { "epoch": 1.325271959003866, "grad_norm": 1.5123621225357056, "learning_rate": 8.620350510265498e-06, "loss": 0.7463, "step": 7369 }, { "epoch": 1.3254517666097276, "grad_norm": 1.2093154191970825, "learning_rate": 8.619948772811356e-06, "loss": 0.9479, "step": 7370 }, { "epoch": 1.3256315742155893, "grad_norm": 2.83670711517334, "learning_rate": 8.61954698623926e-06, "loss": 0.7709, "step": 7371 }, { "epoch": 1.325811381821451, "grad_norm": 1.5157943964004517, "learning_rate": 8.61914515055466e-06, "loss": 0.7594, "step": 7372 }, { "epoch": 1.3259911894273126, "grad_norm": 1.5215041637420654, "learning_rate": 8.618743265763008e-06, "loss": 0.7301, "step": 7373 }, { "epoch": 1.3261709970331745, "grad_norm": 1.46499764919281, "learning_rate": 8.618341331869759e-06, "loss": 0.7455, "step": 7374 }, { "epoch": 1.3263508046390362, "grad_norm": 1.638206958770752, "learning_rate": 8.617939348880366e-06, "loss": 0.7092, "step": 7375 }, { "epoch": 1.3265306122448979, "grad_norm": 1.4564483165740967, "learning_rate": 8.617537316800283e-06, "loss": 0.7571, "step": 7376 }, { "epoch": 1.3267104198507598, "grad_norm": 1.4762725830078125, "learning_rate": 8.617135235634966e-06, "loss": 0.7588, "step": 7377 }, { "epoch": 1.3268902274566214, "grad_norm": 1.3788478374481201, "learning_rate": 8.616733105389869e-06, "loss": 0.6849, "step": 7378 }, { "epoch": 1.327070035062483, "grad_norm": 1.0880701541900635, "learning_rate": 8.616330926070448e-06, "loss": 0.9631, "step": 7379 }, { "epoch": 1.327249842668345, "grad_norm": 1.5437694787979126, "learning_rate": 8.615928697682165e-06, "loss": 0.7389, "step": 7380 }, { "epoch": 1.3274296502742067, "grad_norm": 1.5539649724960327, "learning_rate": 8.615526420230472e-06, "loss": 0.813, "step": 7381 }, { "epoch": 1.3276094578800683, "grad_norm": 1.6205849647521973, "learning_rate": 8.61512409372083e-06, "loss": 0.7288, "step": 7382 }, { "epoch": 1.32778926548593, "grad_norm": 1.496595859527588, "learning_rate": 8.614721718158698e-06, "loss": 0.7543, "step": 7383 }, { "epoch": 1.3279690730917917, "grad_norm": 1.5018064975738525, "learning_rate": 8.614319293549534e-06, "loss": 0.7107, "step": 7384 }, { "epoch": 1.3281488806976536, "grad_norm": 1.1277740001678467, "learning_rate": 8.613916819898802e-06, "loss": 0.9851, "step": 7385 }, { "epoch": 1.3283286883035152, "grad_norm": 1.4307541847229004, "learning_rate": 8.613514297211958e-06, "loss": 0.7844, "step": 7386 }, { "epoch": 1.328508495909377, "grad_norm": 1.4150030612945557, "learning_rate": 8.613111725494467e-06, "loss": 0.7054, "step": 7387 }, { "epoch": 1.3286883035152388, "grad_norm": 1.5374047756195068, "learning_rate": 8.612709104751793e-06, "loss": 0.7059, "step": 7388 }, { "epoch": 1.3288681111211005, "grad_norm": 1.6239583492279053, "learning_rate": 8.612306434989395e-06, "loss": 0.844, "step": 7389 }, { "epoch": 1.3290479187269622, "grad_norm": 1.7752970457077026, "learning_rate": 8.611903716212738e-06, "loss": 0.7187, "step": 7390 }, { "epoch": 1.3292277263328238, "grad_norm": 1.5486277341842651, "learning_rate": 8.611500948427288e-06, "loss": 0.6883, "step": 7391 }, { "epoch": 1.3294075339386855, "grad_norm": 1.5804216861724854, "learning_rate": 8.61109813163851e-06, "loss": 0.7427, "step": 7392 }, { "epoch": 1.3295873415445474, "grad_norm": 1.6761044263839722, "learning_rate": 8.610695265851867e-06, "loss": 0.7241, "step": 7393 }, { "epoch": 1.329767149150409, "grad_norm": 1.3962841033935547, "learning_rate": 8.610292351072826e-06, "loss": 0.7369, "step": 7394 }, { "epoch": 1.3299469567562707, "grad_norm": 1.6513410806655884, "learning_rate": 8.609889387306856e-06, "loss": 0.7204, "step": 7395 }, { "epoch": 1.3301267643621326, "grad_norm": 1.3720437288284302, "learning_rate": 8.609486374559424e-06, "loss": 0.6392, "step": 7396 }, { "epoch": 1.3303065719679943, "grad_norm": 1.759413480758667, "learning_rate": 8.609083312835997e-06, "loss": 0.7451, "step": 7397 }, { "epoch": 1.330486379573856, "grad_norm": 1.580691933631897, "learning_rate": 8.608680202142046e-06, "loss": 0.7411, "step": 7398 }, { "epoch": 1.3306661871797176, "grad_norm": 1.123335361480713, "learning_rate": 8.60827704248304e-06, "loss": 0.975, "step": 7399 }, { "epoch": 1.3308459947855793, "grad_norm": 1.469504952430725, "learning_rate": 8.607873833864448e-06, "loss": 0.6982, "step": 7400 }, { "epoch": 1.3310258023914412, "grad_norm": 1.4364250898361206, "learning_rate": 8.607470576291744e-06, "loss": 0.7376, "step": 7401 }, { "epoch": 1.3312056099973029, "grad_norm": 0.9803930521011353, "learning_rate": 8.607067269770398e-06, "loss": 0.9401, "step": 7402 }, { "epoch": 1.3313854176031645, "grad_norm": 1.4646961688995361, "learning_rate": 8.60666391430588e-06, "loss": 0.6633, "step": 7403 }, { "epoch": 1.3315652252090264, "grad_norm": 1.4719089269638062, "learning_rate": 8.606260509903666e-06, "loss": 0.7622, "step": 7404 }, { "epoch": 1.331745032814888, "grad_norm": 1.4224870204925537, "learning_rate": 8.605857056569228e-06, "loss": 0.7455, "step": 7405 }, { "epoch": 1.3319248404207498, "grad_norm": 1.5691418647766113, "learning_rate": 8.605453554308041e-06, "loss": 0.7074, "step": 7406 }, { "epoch": 1.3321046480266117, "grad_norm": 1.5169702768325806, "learning_rate": 8.605050003125582e-06, "loss": 0.6699, "step": 7407 }, { "epoch": 1.3322844556324733, "grad_norm": 1.0670193433761597, "learning_rate": 8.604646403027324e-06, "loss": 0.9341, "step": 7408 }, { "epoch": 1.332464263238335, "grad_norm": 1.5388239622116089, "learning_rate": 8.604242754018743e-06, "loss": 0.7693, "step": 7409 }, { "epoch": 1.3326440708441967, "grad_norm": 1.4540539979934692, "learning_rate": 8.603839056105318e-06, "loss": 0.7271, "step": 7410 }, { "epoch": 1.3328238784500583, "grad_norm": 1.0713995695114136, "learning_rate": 8.603435309292524e-06, "loss": 0.9308, "step": 7411 }, { "epoch": 1.3330036860559202, "grad_norm": 1.4452180862426758, "learning_rate": 8.603031513585843e-06, "loss": 0.7139, "step": 7412 }, { "epoch": 1.333183493661782, "grad_norm": 1.048330307006836, "learning_rate": 8.602627668990754e-06, "loss": 0.9289, "step": 7413 }, { "epoch": 1.3333633012676436, "grad_norm": 1.5874667167663574, "learning_rate": 8.602223775512731e-06, "loss": 0.752, "step": 7414 }, { "epoch": 1.3335431088735055, "grad_norm": 1.4903301000595093, "learning_rate": 8.601819833157258e-06, "loss": 0.8332, "step": 7415 }, { "epoch": 1.3337229164793671, "grad_norm": 1.5485173463821411, "learning_rate": 8.601415841929817e-06, "loss": 0.7916, "step": 7416 }, { "epoch": 1.3339027240852288, "grad_norm": 1.460085153579712, "learning_rate": 8.60101180183589e-06, "loss": 0.7247, "step": 7417 }, { "epoch": 1.3340825316910905, "grad_norm": 1.514158010482788, "learning_rate": 8.600607712880956e-06, "loss": 0.7387, "step": 7418 }, { "epoch": 1.3342623392969521, "grad_norm": 1.3966354131698608, "learning_rate": 8.6002035750705e-06, "loss": 0.7093, "step": 7419 }, { "epoch": 1.334442146902814, "grad_norm": 1.4403091669082642, "learning_rate": 8.599799388410006e-06, "loss": 0.7434, "step": 7420 }, { "epoch": 1.3346219545086757, "grad_norm": 1.4945570230484009, "learning_rate": 8.599395152904959e-06, "loss": 0.7684, "step": 7421 }, { "epoch": 1.3348017621145374, "grad_norm": 1.5235016345977783, "learning_rate": 8.598990868560841e-06, "loss": 0.78, "step": 7422 }, { "epoch": 1.3349815697203993, "grad_norm": 1.2431339025497437, "learning_rate": 8.59858653538314e-06, "loss": 0.9258, "step": 7423 }, { "epoch": 1.335161377326261, "grad_norm": 1.54070246219635, "learning_rate": 8.59818215337734e-06, "loss": 0.8172, "step": 7424 }, { "epoch": 1.3353411849321226, "grad_norm": 1.447300672531128, "learning_rate": 8.597777722548931e-06, "loss": 0.7697, "step": 7425 }, { "epoch": 1.3355209925379843, "grad_norm": 1.0675365924835205, "learning_rate": 8.597373242903399e-06, "loss": 0.9911, "step": 7426 }, { "epoch": 1.335700800143846, "grad_norm": 5.254977703094482, "learning_rate": 8.596968714446233e-06, "loss": 0.7004, "step": 7427 }, { "epoch": 1.3358806077497078, "grad_norm": 1.4537038803100586, "learning_rate": 8.596564137182918e-06, "loss": 0.6877, "step": 7428 }, { "epoch": 1.3360604153555695, "grad_norm": 1.5094101428985596, "learning_rate": 8.59615951111895e-06, "loss": 0.7121, "step": 7429 }, { "epoch": 1.3362402229614312, "grad_norm": 1.4954997301101685, "learning_rate": 8.595754836259815e-06, "loss": 0.7315, "step": 7430 }, { "epoch": 1.336420030567293, "grad_norm": 1.4976823329925537, "learning_rate": 8.595350112611007e-06, "loss": 0.7357, "step": 7431 }, { "epoch": 1.3365998381731548, "grad_norm": 1.5431183576583862, "learning_rate": 8.594945340178014e-06, "loss": 0.69, "step": 7432 }, { "epoch": 1.3367796457790164, "grad_norm": 1.3883296251296997, "learning_rate": 8.594540518966328e-06, "loss": 0.7305, "step": 7433 }, { "epoch": 1.3369594533848783, "grad_norm": 1.529976487159729, "learning_rate": 8.594135648981445e-06, "loss": 0.7653, "step": 7434 }, { "epoch": 1.33713926099074, "grad_norm": 1.586043357849121, "learning_rate": 8.593730730228858e-06, "loss": 0.774, "step": 7435 }, { "epoch": 1.3373190685966017, "grad_norm": 1.5636167526245117, "learning_rate": 8.59332576271406e-06, "loss": 0.751, "step": 7436 }, { "epoch": 1.3374988762024633, "grad_norm": 1.553614854812622, "learning_rate": 8.592920746442547e-06, "loss": 0.7279, "step": 7437 }, { "epoch": 1.337678683808325, "grad_norm": 1.5652118921279907, "learning_rate": 8.592515681419812e-06, "loss": 0.8166, "step": 7438 }, { "epoch": 1.337858491414187, "grad_norm": 1.5166938304901123, "learning_rate": 8.592110567651355e-06, "loss": 0.7315, "step": 7439 }, { "epoch": 1.3380382990200486, "grad_norm": 1.9627060890197754, "learning_rate": 8.59170540514267e-06, "loss": 0.7422, "step": 7440 }, { "epoch": 1.3382181066259102, "grad_norm": 1.4728291034698486, "learning_rate": 8.591300193899257e-06, "loss": 0.8186, "step": 7441 }, { "epoch": 1.3383979142317721, "grad_norm": 1.4284915924072266, "learning_rate": 8.59089493392661e-06, "loss": 0.7409, "step": 7442 }, { "epoch": 1.3385777218376338, "grad_norm": 1.5010110139846802, "learning_rate": 8.590489625230231e-06, "loss": 0.7034, "step": 7443 }, { "epoch": 1.3387575294434955, "grad_norm": 1.214794397354126, "learning_rate": 8.590084267815622e-06, "loss": 0.9408, "step": 7444 }, { "epoch": 1.3389373370493571, "grad_norm": 1.4902925491333008, "learning_rate": 8.589678861688277e-06, "loss": 0.7747, "step": 7445 }, { "epoch": 1.3391171446552188, "grad_norm": 1.4950460195541382, "learning_rate": 8.589273406853701e-06, "loss": 0.7202, "step": 7446 }, { "epoch": 1.3392969522610807, "grad_norm": 1.7719318866729736, "learning_rate": 8.588867903317395e-06, "loss": 0.7834, "step": 7447 }, { "epoch": 1.3394767598669424, "grad_norm": 1.5613924264907837, "learning_rate": 8.58846235108486e-06, "loss": 0.7875, "step": 7448 }, { "epoch": 1.339656567472804, "grad_norm": 1.2357269525527954, "learning_rate": 8.5880567501616e-06, "loss": 0.9122, "step": 7449 }, { "epoch": 1.339836375078666, "grad_norm": 1.4781192541122437, "learning_rate": 8.587651100553116e-06, "loss": 0.776, "step": 7450 }, { "epoch": 1.3400161826845276, "grad_norm": 1.4495689868927002, "learning_rate": 8.587245402264916e-06, "loss": 0.7155, "step": 7451 }, { "epoch": 1.3401959902903893, "grad_norm": 1.4509400129318237, "learning_rate": 8.586839655302502e-06, "loss": 0.6786, "step": 7452 }, { "epoch": 1.340375797896251, "grad_norm": 1.4576166868209839, "learning_rate": 8.586433859671382e-06, "loss": 0.7911, "step": 7453 }, { "epoch": 1.3405556055021126, "grad_norm": 1.5861101150512695, "learning_rate": 8.586028015377059e-06, "loss": 0.7653, "step": 7454 }, { "epoch": 1.3407354131079745, "grad_norm": 1.0927878618240356, "learning_rate": 8.58562212242504e-06, "loss": 0.929, "step": 7455 }, { "epoch": 1.3409152207138362, "grad_norm": 1.4588686227798462, "learning_rate": 8.585216180820835e-06, "loss": 0.7565, "step": 7456 }, { "epoch": 1.3410950283196978, "grad_norm": 1.524096965789795, "learning_rate": 8.58481019056995e-06, "loss": 0.7508, "step": 7457 }, { "epoch": 1.3412748359255597, "grad_norm": 1.0195025205612183, "learning_rate": 8.584404151677896e-06, "loss": 0.9821, "step": 7458 }, { "epoch": 1.3414546435314214, "grad_norm": 1.2495009899139404, "learning_rate": 8.58399806415018e-06, "loss": 0.9429, "step": 7459 }, { "epoch": 1.341634451137283, "grad_norm": 1.4540499448776245, "learning_rate": 8.583591927992311e-06, "loss": 0.7643, "step": 7460 }, { "epoch": 1.341814258743145, "grad_norm": 1.0450246334075928, "learning_rate": 8.583185743209805e-06, "loss": 0.9166, "step": 7461 }, { "epoch": 1.3419940663490066, "grad_norm": 1.467556357383728, "learning_rate": 8.58277950980817e-06, "loss": 0.7318, "step": 7462 }, { "epoch": 1.3421738739548683, "grad_norm": 1.4785691499710083, "learning_rate": 8.582373227792915e-06, "loss": 0.7128, "step": 7463 }, { "epoch": 1.34235368156073, "grad_norm": 1.5279693603515625, "learning_rate": 8.581966897169558e-06, "loss": 0.739, "step": 7464 }, { "epoch": 1.3425334891665917, "grad_norm": 1.5883212089538574, "learning_rate": 8.58156051794361e-06, "loss": 0.7579, "step": 7465 }, { "epoch": 1.3427132967724535, "grad_norm": 1.4736937284469604, "learning_rate": 8.581154090120585e-06, "loss": 0.7145, "step": 7466 }, { "epoch": 1.3428931043783152, "grad_norm": 1.477570652961731, "learning_rate": 8.580747613705998e-06, "loss": 0.7523, "step": 7467 }, { "epoch": 1.3430729119841769, "grad_norm": 1.433134913444519, "learning_rate": 8.580341088705366e-06, "loss": 0.7662, "step": 7468 }, { "epoch": 1.3432527195900388, "grad_norm": 1.4743211269378662, "learning_rate": 8.579934515124202e-06, "loss": 0.7377, "step": 7469 }, { "epoch": 1.3434325271959004, "grad_norm": 1.4762879610061646, "learning_rate": 8.579527892968022e-06, "loss": 0.7237, "step": 7470 }, { "epoch": 1.3436123348017621, "grad_norm": 1.4086464643478394, "learning_rate": 8.579121222242348e-06, "loss": 0.7078, "step": 7471 }, { "epoch": 1.3437921424076238, "grad_norm": 1.5405033826828003, "learning_rate": 8.578714502952694e-06, "loss": 0.7906, "step": 7472 }, { "epoch": 1.3439719500134855, "grad_norm": 1.5753802061080933, "learning_rate": 8.57830773510458e-06, "loss": 0.7029, "step": 7473 }, { "epoch": 1.3441517576193474, "grad_norm": 1.3583029508590698, "learning_rate": 8.577900918703527e-06, "loss": 0.7044, "step": 7474 }, { "epoch": 1.344331565225209, "grad_norm": 1.4867467880249023, "learning_rate": 8.577494053755051e-06, "loss": 0.7084, "step": 7475 }, { "epoch": 1.3445113728310707, "grad_norm": 1.5535659790039062, "learning_rate": 8.577087140264677e-06, "loss": 0.8179, "step": 7476 }, { "epoch": 1.3446911804369326, "grad_norm": 1.4520676136016846, "learning_rate": 8.576680178237922e-06, "loss": 0.7263, "step": 7477 }, { "epoch": 1.3448709880427943, "grad_norm": 1.2445403337478638, "learning_rate": 8.576273167680312e-06, "loss": 0.9342, "step": 7478 }, { "epoch": 1.345050795648656, "grad_norm": 1.2076447010040283, "learning_rate": 8.575866108597366e-06, "loss": 0.9563, "step": 7479 }, { "epoch": 1.3452306032545176, "grad_norm": 1.5261229276657104, "learning_rate": 8.57545900099461e-06, "loss": 0.7342, "step": 7480 }, { "epoch": 1.3454104108603793, "grad_norm": 1.6392158269882202, "learning_rate": 8.575051844877566e-06, "loss": 0.7141, "step": 7481 }, { "epoch": 1.3455902184662412, "grad_norm": 1.4955904483795166, "learning_rate": 8.57464464025176e-06, "loss": 0.7007, "step": 7482 }, { "epoch": 1.3457700260721028, "grad_norm": 1.4604884386062622, "learning_rate": 8.574237387122717e-06, "loss": 0.7645, "step": 7483 }, { "epoch": 1.3459498336779645, "grad_norm": 1.1896744966506958, "learning_rate": 8.573830085495961e-06, "loss": 0.9275, "step": 7484 }, { "epoch": 1.3461296412838264, "grad_norm": 1.41640305519104, "learning_rate": 8.573422735377022e-06, "loss": 0.728, "step": 7485 }, { "epoch": 1.346309448889688, "grad_norm": 1.4886406660079956, "learning_rate": 8.573015336771425e-06, "loss": 0.7071, "step": 7486 }, { "epoch": 1.3464892564955497, "grad_norm": 2.042208671569824, "learning_rate": 8.572607889684696e-06, "loss": 0.7282, "step": 7487 }, { "epoch": 1.3466690641014116, "grad_norm": 1.5881907939910889, "learning_rate": 8.572200394122368e-06, "loss": 0.8234, "step": 7488 }, { "epoch": 1.3468488717072733, "grad_norm": 1.699940800666809, "learning_rate": 8.571792850089967e-06, "loss": 0.7304, "step": 7489 }, { "epoch": 1.347028679313135, "grad_norm": 1.4905891418457031, "learning_rate": 8.571385257593024e-06, "loss": 0.7458, "step": 7490 }, { "epoch": 1.3472084869189966, "grad_norm": 1.505311131477356, "learning_rate": 8.570977616637069e-06, "loss": 0.8018, "step": 7491 }, { "epoch": 1.3473882945248583, "grad_norm": 1.4336153268814087, "learning_rate": 8.570569927227634e-06, "loss": 0.7658, "step": 7492 }, { "epoch": 1.3475681021307202, "grad_norm": 1.5097101926803589, "learning_rate": 8.570162189370249e-06, "loss": 0.7715, "step": 7493 }, { "epoch": 1.3477479097365819, "grad_norm": 1.0895297527313232, "learning_rate": 8.569754403070446e-06, "loss": 0.9494, "step": 7494 }, { "epoch": 1.3479277173424435, "grad_norm": 1.5386730432510376, "learning_rate": 8.569346568333765e-06, "loss": 0.737, "step": 7495 }, { "epoch": 1.3481075249483054, "grad_norm": 1.2623463869094849, "learning_rate": 8.568938685165731e-06, "loss": 0.9394, "step": 7496 }, { "epoch": 1.348287332554167, "grad_norm": 1.367080807685852, "learning_rate": 8.568530753571882e-06, "loss": 0.9267, "step": 7497 }, { "epoch": 1.3484671401600288, "grad_norm": 1.5248850584030151, "learning_rate": 8.568122773557754e-06, "loss": 0.6995, "step": 7498 }, { "epoch": 1.3486469477658904, "grad_norm": 1.565481424331665, "learning_rate": 8.567714745128881e-06, "loss": 0.6626, "step": 7499 }, { "epoch": 1.3488267553717521, "grad_norm": 1.1514590978622437, "learning_rate": 8.567306668290801e-06, "loss": 0.9548, "step": 7500 }, { "epoch": 1.3488267553717521, "eval_loss": 0.8041090369224548, "eval_runtime": 148.6968, "eval_samples_per_second": 96.72, "eval_steps_per_second": 1.513, "step": 7500 }, { "epoch": 1.349006562977614, "grad_norm": 1.5593796968460083, "learning_rate": 8.566898543049049e-06, "loss": 0.7786, "step": 7501 }, { "epoch": 1.3491863705834757, "grad_norm": 1.4391560554504395, "learning_rate": 8.566490369409165e-06, "loss": 0.6909, "step": 7502 }, { "epoch": 1.3493661781893374, "grad_norm": 1.3756396770477295, "learning_rate": 8.566082147376687e-06, "loss": 0.6823, "step": 7503 }, { "epoch": 1.3495459857951992, "grad_norm": 1.393905758857727, "learning_rate": 8.565673876957152e-06, "loss": 0.6942, "step": 7504 }, { "epoch": 1.349725793401061, "grad_norm": 1.471544861793518, "learning_rate": 8.565265558156101e-06, "loss": 0.7569, "step": 7505 }, { "epoch": 1.3499056010069226, "grad_norm": 1.5625107288360596, "learning_rate": 8.564857190979076e-06, "loss": 0.7719, "step": 7506 }, { "epoch": 1.3500854086127843, "grad_norm": 1.47052001953125, "learning_rate": 8.564448775431618e-06, "loss": 0.7283, "step": 7507 }, { "epoch": 1.350265216218646, "grad_norm": 1.612581729888916, "learning_rate": 8.564040311519264e-06, "loss": 0.7773, "step": 7508 }, { "epoch": 1.3504450238245078, "grad_norm": 1.7312514781951904, "learning_rate": 8.56363179924756e-06, "loss": 0.7547, "step": 7509 }, { "epoch": 1.3506248314303695, "grad_norm": 1.6331253051757812, "learning_rate": 8.563223238622049e-06, "loss": 0.7677, "step": 7510 }, { "epoch": 1.3508046390362312, "grad_norm": 1.4377251863479614, "learning_rate": 8.562814629648276e-06, "loss": 0.7784, "step": 7511 }, { "epoch": 1.350984446642093, "grad_norm": 1.5831892490386963, "learning_rate": 8.56240597233178e-06, "loss": 0.7821, "step": 7512 }, { "epoch": 1.3511642542479547, "grad_norm": 1.5519665479660034, "learning_rate": 8.56199726667811e-06, "loss": 0.7135, "step": 7513 }, { "epoch": 1.3513440618538164, "grad_norm": 1.4078054428100586, "learning_rate": 8.561588512692814e-06, "loss": 0.7315, "step": 7514 }, { "epoch": 1.351523869459678, "grad_norm": 1.4336166381835938, "learning_rate": 8.561179710381431e-06, "loss": 0.8093, "step": 7515 }, { "epoch": 1.35170367706554, "grad_norm": 1.5092881917953491, "learning_rate": 8.560770859749515e-06, "loss": 0.7416, "step": 7516 }, { "epoch": 1.3518834846714016, "grad_norm": 1.676653504371643, "learning_rate": 8.56036196080261e-06, "loss": 0.7628, "step": 7517 }, { "epoch": 1.3520632922772633, "grad_norm": 1.512198567390442, "learning_rate": 8.559953013546263e-06, "loss": 0.8077, "step": 7518 }, { "epoch": 1.352243099883125, "grad_norm": 1.8546292781829834, "learning_rate": 8.559544017986027e-06, "loss": 0.7556, "step": 7519 }, { "epoch": 1.3524229074889869, "grad_norm": 1.5314795970916748, "learning_rate": 8.559134974127448e-06, "loss": 0.7863, "step": 7520 }, { "epoch": 1.3526027150948485, "grad_norm": 1.7447230815887451, "learning_rate": 8.558725881976078e-06, "loss": 0.7132, "step": 7521 }, { "epoch": 1.3527825227007102, "grad_norm": 1.5225269794464111, "learning_rate": 8.558316741537466e-06, "loss": 0.7325, "step": 7522 }, { "epoch": 1.352962330306572, "grad_norm": 1.2071305513381958, "learning_rate": 8.557907552817168e-06, "loss": 0.9262, "step": 7523 }, { "epoch": 1.3531421379124338, "grad_norm": 1.4607651233673096, "learning_rate": 8.55749831582073e-06, "loss": 0.6382, "step": 7524 }, { "epoch": 1.3533219455182954, "grad_norm": 1.4184311628341675, "learning_rate": 8.55708903055371e-06, "loss": 0.6886, "step": 7525 }, { "epoch": 1.353501753124157, "grad_norm": 1.671531319618225, "learning_rate": 8.556679697021657e-06, "loss": 0.7758, "step": 7526 }, { "epoch": 1.3536815607300188, "grad_norm": 1.0687174797058105, "learning_rate": 8.55627031523013e-06, "loss": 0.9378, "step": 7527 }, { "epoch": 1.3538613683358807, "grad_norm": 1.558206558227539, "learning_rate": 8.55586088518468e-06, "loss": 0.7736, "step": 7528 }, { "epoch": 1.3540411759417423, "grad_norm": 1.4104357957839966, "learning_rate": 8.555451406890862e-06, "loss": 0.6819, "step": 7529 }, { "epoch": 1.354220983547604, "grad_norm": 1.7555259466171265, "learning_rate": 8.555041880354237e-06, "loss": 0.8043, "step": 7530 }, { "epoch": 1.354400791153466, "grad_norm": 1.4340616464614868, "learning_rate": 8.554632305580355e-06, "loss": 0.6681, "step": 7531 }, { "epoch": 1.3545805987593276, "grad_norm": 1.1901936531066895, "learning_rate": 8.554222682574777e-06, "loss": 0.9525, "step": 7532 }, { "epoch": 1.3547604063651892, "grad_norm": 1.5634368658065796, "learning_rate": 8.553813011343062e-06, "loss": 0.7861, "step": 7533 }, { "epoch": 1.354940213971051, "grad_norm": 1.44676673412323, "learning_rate": 8.553403291890767e-06, "loss": 0.6892, "step": 7534 }, { "epoch": 1.3551200215769126, "grad_norm": 1.5883296728134155, "learning_rate": 8.552993524223453e-06, "loss": 0.7481, "step": 7535 }, { "epoch": 1.3552998291827745, "grad_norm": 1.6724460124969482, "learning_rate": 8.552583708346678e-06, "loss": 0.8376, "step": 7536 }, { "epoch": 1.3554796367886361, "grad_norm": 1.4354760646820068, "learning_rate": 8.552173844266003e-06, "loss": 0.7514, "step": 7537 }, { "epoch": 1.3556594443944978, "grad_norm": 1.5667251348495483, "learning_rate": 8.551763931986991e-06, "loss": 0.8173, "step": 7538 }, { "epoch": 1.3558392520003597, "grad_norm": 1.2241753339767456, "learning_rate": 8.551353971515202e-06, "loss": 0.9618, "step": 7539 }, { "epoch": 1.3560190596062214, "grad_norm": 1.463673710823059, "learning_rate": 8.5509439628562e-06, "loss": 0.7174, "step": 7540 }, { "epoch": 1.356198867212083, "grad_norm": 1.4867668151855469, "learning_rate": 8.550533906015549e-06, "loss": 0.6784, "step": 7541 }, { "epoch": 1.3563786748179447, "grad_norm": 1.4027738571166992, "learning_rate": 8.550123800998808e-06, "loss": 0.6998, "step": 7542 }, { "epoch": 1.3565584824238064, "grad_norm": 1.591050148010254, "learning_rate": 8.549713647811548e-06, "loss": 0.8315, "step": 7543 }, { "epoch": 1.3567382900296683, "grad_norm": 1.5138769149780273, "learning_rate": 8.549303446459331e-06, "loss": 0.6653, "step": 7544 }, { "epoch": 1.35691809763553, "grad_norm": 1.436698079109192, "learning_rate": 8.548893196947725e-06, "loss": 0.7214, "step": 7545 }, { "epoch": 1.3570979052413916, "grad_norm": 1.554084062576294, "learning_rate": 8.548482899282294e-06, "loss": 0.7592, "step": 7546 }, { "epoch": 1.3572777128472535, "grad_norm": 1.6282895803451538, "learning_rate": 8.548072553468604e-06, "loss": 0.828, "step": 7547 }, { "epoch": 1.3574575204531152, "grad_norm": 1.5497952699661255, "learning_rate": 8.547662159512227e-06, "loss": 0.794, "step": 7548 }, { "epoch": 1.3576373280589769, "grad_norm": 1.2136353254318237, "learning_rate": 8.547251717418729e-06, "loss": 0.9116, "step": 7549 }, { "epoch": 1.3578171356648387, "grad_norm": 1.5095667839050293, "learning_rate": 8.546841227193679e-06, "loss": 0.7357, "step": 7550 }, { "epoch": 1.3579969432707004, "grad_norm": 1.0656230449676514, "learning_rate": 8.546430688842648e-06, "loss": 0.9682, "step": 7551 }, { "epoch": 1.358176750876562, "grad_norm": 1.4328553676605225, "learning_rate": 8.546020102371207e-06, "loss": 0.7587, "step": 7552 }, { "epoch": 1.3583565584824238, "grad_norm": 1.0764436721801758, "learning_rate": 8.545609467784926e-06, "loss": 0.939, "step": 7553 }, { "epoch": 1.3585363660882854, "grad_norm": 1.4291876554489136, "learning_rate": 8.545198785089374e-06, "loss": 0.7358, "step": 7554 }, { "epoch": 1.3587161736941473, "grad_norm": 1.5641320943832397, "learning_rate": 8.54478805429013e-06, "loss": 0.7381, "step": 7555 }, { "epoch": 1.358895981300009, "grad_norm": 1.4895888566970825, "learning_rate": 8.54437727539276e-06, "loss": 0.7651, "step": 7556 }, { "epoch": 1.3590757889058707, "grad_norm": 1.496013879776001, "learning_rate": 8.543966448402846e-06, "loss": 0.7534, "step": 7557 }, { "epoch": 1.3592555965117326, "grad_norm": 1.4685543775558472, "learning_rate": 8.543555573325952e-06, "loss": 0.7848, "step": 7558 }, { "epoch": 1.3594354041175942, "grad_norm": 1.7548127174377441, "learning_rate": 8.54314465016766e-06, "loss": 0.7699, "step": 7559 }, { "epoch": 1.359615211723456, "grad_norm": 1.6377216577529907, "learning_rate": 8.542733678933545e-06, "loss": 0.7451, "step": 7560 }, { "epoch": 1.3597950193293176, "grad_norm": 1.4700841903686523, "learning_rate": 8.542322659629182e-06, "loss": 0.6635, "step": 7561 }, { "epoch": 1.3599748269351792, "grad_norm": 1.9887124300003052, "learning_rate": 8.54191159226015e-06, "loss": 0.7244, "step": 7562 }, { "epoch": 1.3601546345410411, "grad_norm": 1.9597399234771729, "learning_rate": 8.541500476832025e-06, "loss": 0.8142, "step": 7563 }, { "epoch": 1.3603344421469028, "grad_norm": 1.5094531774520874, "learning_rate": 8.541089313350384e-06, "loss": 0.7631, "step": 7564 }, { "epoch": 1.3605142497527645, "grad_norm": 1.474880576133728, "learning_rate": 8.540678101820808e-06, "loss": 0.8143, "step": 7565 }, { "epoch": 1.3606940573586264, "grad_norm": 1.4125173091888428, "learning_rate": 8.540266842248877e-06, "loss": 0.7166, "step": 7566 }, { "epoch": 1.360873864964488, "grad_norm": 1.359144926071167, "learning_rate": 8.539855534640169e-06, "loss": 0.9374, "step": 7567 }, { "epoch": 1.3610536725703497, "grad_norm": 1.4031215906143188, "learning_rate": 8.539444179000266e-06, "loss": 0.6892, "step": 7568 }, { "epoch": 1.3612334801762114, "grad_norm": 1.4232776165008545, "learning_rate": 8.53903277533475e-06, "loss": 0.6793, "step": 7569 }, { "epoch": 1.361413287782073, "grad_norm": 1.5962687730789185, "learning_rate": 8.538621323649203e-06, "loss": 0.7523, "step": 7570 }, { "epoch": 1.361593095387935, "grad_norm": 1.0116928815841675, "learning_rate": 8.538209823949208e-06, "loss": 0.94, "step": 7571 }, { "epoch": 1.3617729029937966, "grad_norm": 1.4750112295150757, "learning_rate": 8.537798276240349e-06, "loss": 0.7174, "step": 7572 }, { "epoch": 1.3619527105996583, "grad_norm": 1.485614538192749, "learning_rate": 8.537386680528209e-06, "loss": 0.7328, "step": 7573 }, { "epoch": 1.3621325182055202, "grad_norm": 1.5055203437805176, "learning_rate": 8.536975036818372e-06, "loss": 0.7287, "step": 7574 }, { "epoch": 1.3623123258113818, "grad_norm": 1.400631070137024, "learning_rate": 8.536563345116426e-06, "loss": 0.7126, "step": 7575 }, { "epoch": 1.3624921334172435, "grad_norm": 1.4288283586502075, "learning_rate": 8.536151605427955e-06, "loss": 0.6909, "step": 7576 }, { "epoch": 1.3626719410231054, "grad_norm": 1.257493495941162, "learning_rate": 8.535739817758549e-06, "loss": 0.8801, "step": 7577 }, { "epoch": 1.362851748628967, "grad_norm": 1.5101927518844604, "learning_rate": 8.53532798211379e-06, "loss": 0.7106, "step": 7578 }, { "epoch": 1.3630315562348287, "grad_norm": 1.610153079032898, "learning_rate": 8.53491609849927e-06, "loss": 0.7823, "step": 7579 }, { "epoch": 1.3632113638406904, "grad_norm": 1.456786870956421, "learning_rate": 8.534504166920577e-06, "loss": 0.6798, "step": 7580 }, { "epoch": 1.363391171446552, "grad_norm": 1.47708261013031, "learning_rate": 8.5340921873833e-06, "loss": 0.6947, "step": 7581 }, { "epoch": 1.363570979052414, "grad_norm": 1.4749932289123535, "learning_rate": 8.53368015989303e-06, "loss": 0.7526, "step": 7582 }, { "epoch": 1.3637507866582756, "grad_norm": 1.3729054927825928, "learning_rate": 8.533268084455357e-06, "loss": 0.7749, "step": 7583 }, { "epoch": 1.3639305942641373, "grad_norm": 1.5888475179672241, "learning_rate": 8.532855961075872e-06, "loss": 0.754, "step": 7584 }, { "epoch": 1.3641104018699992, "grad_norm": 1.7745306491851807, "learning_rate": 8.532443789760168e-06, "loss": 0.886, "step": 7585 }, { "epoch": 1.3642902094758609, "grad_norm": 1.1135333776474, "learning_rate": 8.532031570513835e-06, "loss": 0.9429, "step": 7586 }, { "epoch": 1.3644700170817226, "grad_norm": 1.4582648277282715, "learning_rate": 8.531619303342468e-06, "loss": 0.6939, "step": 7587 }, { "epoch": 1.3646498246875842, "grad_norm": 1.4238094091415405, "learning_rate": 8.531206988251663e-06, "loss": 0.7261, "step": 7588 }, { "epoch": 1.364829632293446, "grad_norm": 1.444965124130249, "learning_rate": 8.530794625247013e-06, "loss": 0.7826, "step": 7589 }, { "epoch": 1.3650094398993078, "grad_norm": 1.4722752571105957, "learning_rate": 8.53038221433411e-06, "loss": 0.7723, "step": 7590 }, { "epoch": 1.3651892475051695, "grad_norm": 1.4923855066299438, "learning_rate": 8.529969755518554e-06, "loss": 0.7519, "step": 7591 }, { "epoch": 1.3653690551110311, "grad_norm": 1.5464714765548706, "learning_rate": 8.52955724880594e-06, "loss": 0.7984, "step": 7592 }, { "epoch": 1.365548862716893, "grad_norm": 1.484241247177124, "learning_rate": 8.529144694201866e-06, "loss": 0.6795, "step": 7593 }, { "epoch": 1.3657286703227547, "grad_norm": 1.4802391529083252, "learning_rate": 8.52873209171193e-06, "loss": 0.7067, "step": 7594 }, { "epoch": 1.3659084779286164, "grad_norm": 1.5761158466339111, "learning_rate": 8.528319441341728e-06, "loss": 0.7355, "step": 7595 }, { "epoch": 1.366088285534478, "grad_norm": 1.4627152681350708, "learning_rate": 8.52790674309686e-06, "loss": 0.7464, "step": 7596 }, { "epoch": 1.3662680931403397, "grad_norm": 1.5247901678085327, "learning_rate": 8.527493996982927e-06, "loss": 0.6933, "step": 7597 }, { "epoch": 1.3664479007462016, "grad_norm": 1.0129576921463013, "learning_rate": 8.52708120300553e-06, "loss": 0.9783, "step": 7598 }, { "epoch": 1.3666277083520633, "grad_norm": 1.4954005479812622, "learning_rate": 8.52666836117027e-06, "loss": 0.761, "step": 7599 }, { "epoch": 1.366807515957925, "grad_norm": 1.5696306228637695, "learning_rate": 8.526255471482747e-06, "loss": 0.7598, "step": 7600 }, { "epoch": 1.3669873235637868, "grad_norm": 1.3952388763427734, "learning_rate": 8.525842533948566e-06, "loss": 0.747, "step": 7601 }, { "epoch": 1.3671671311696485, "grad_norm": 1.0904704332351685, "learning_rate": 8.525429548573323e-06, "loss": 0.9476, "step": 7602 }, { "epoch": 1.3673469387755102, "grad_norm": 1.4347014427185059, "learning_rate": 8.525016515362632e-06, "loss": 0.7442, "step": 7603 }, { "epoch": 1.367526746381372, "grad_norm": 1.499395489692688, "learning_rate": 8.52460343432209e-06, "loss": 0.7418, "step": 7604 }, { "epoch": 1.3677065539872337, "grad_norm": 1.4719759225845337, "learning_rate": 8.524190305457304e-06, "loss": 0.7857, "step": 7605 }, { "epoch": 1.3678863615930954, "grad_norm": 1.159764289855957, "learning_rate": 8.52377712877388e-06, "loss": 0.9281, "step": 7606 }, { "epoch": 1.368066169198957, "grad_norm": 2.1710102558135986, "learning_rate": 8.523363904277424e-06, "loss": 0.699, "step": 7607 }, { "epoch": 1.3682459768048187, "grad_norm": 1.4387142658233643, "learning_rate": 8.522950631973543e-06, "loss": 0.7443, "step": 7608 }, { "epoch": 1.3684257844106806, "grad_norm": 1.4690791368484497, "learning_rate": 8.522537311867846e-06, "loss": 0.7607, "step": 7609 }, { "epoch": 1.3686055920165423, "grad_norm": 1.4844927787780762, "learning_rate": 8.522123943965938e-06, "loss": 0.6966, "step": 7610 }, { "epoch": 1.368785399622404, "grad_norm": 1.4148871898651123, "learning_rate": 8.52171052827343e-06, "loss": 0.7838, "step": 7611 }, { "epoch": 1.3689652072282659, "grad_norm": 1.2527978420257568, "learning_rate": 8.521297064795931e-06, "loss": 0.9334, "step": 7612 }, { "epoch": 1.3691450148341275, "grad_norm": 1.5204020738601685, "learning_rate": 8.520883553539052e-06, "loss": 0.7241, "step": 7613 }, { "epoch": 1.3693248224399892, "grad_norm": 1.4788764715194702, "learning_rate": 8.520469994508403e-06, "loss": 0.7419, "step": 7614 }, { "epoch": 1.3695046300458509, "grad_norm": 1.4308730363845825, "learning_rate": 8.520056387709594e-06, "loss": 0.7503, "step": 7615 }, { "epoch": 1.3696844376517125, "grad_norm": 1.050169587135315, "learning_rate": 8.51964273314824e-06, "loss": 0.9479, "step": 7616 }, { "epoch": 1.3698642452575744, "grad_norm": 1.5050725936889648, "learning_rate": 8.519229030829952e-06, "loss": 0.7462, "step": 7617 }, { "epoch": 1.3700440528634361, "grad_norm": 1.4749857187271118, "learning_rate": 8.518815280760344e-06, "loss": 0.7079, "step": 7618 }, { "epoch": 1.3702238604692978, "grad_norm": 1.5009959936141968, "learning_rate": 8.51840148294503e-06, "loss": 0.7229, "step": 7619 }, { "epoch": 1.3704036680751597, "grad_norm": 1.4693012237548828, "learning_rate": 8.517987637389621e-06, "loss": 0.7008, "step": 7620 }, { "epoch": 1.3705834756810213, "grad_norm": 1.8339060544967651, "learning_rate": 8.51757374409974e-06, "loss": 0.7319, "step": 7621 }, { "epoch": 1.370763283286883, "grad_norm": 1.606947660446167, "learning_rate": 8.517159803080999e-06, "loss": 0.7373, "step": 7622 }, { "epoch": 1.3709430908927447, "grad_norm": 1.5433779954910278, "learning_rate": 8.516745814339013e-06, "loss": 0.6759, "step": 7623 }, { "epoch": 1.3711228984986064, "grad_norm": 1.4030014276504517, "learning_rate": 8.5163317778794e-06, "loss": 0.7139, "step": 7624 }, { "epoch": 1.3713027061044682, "grad_norm": 1.0758249759674072, "learning_rate": 8.51591769370778e-06, "loss": 0.9821, "step": 7625 }, { "epoch": 1.37148251371033, "grad_norm": 1.4719085693359375, "learning_rate": 8.51550356182977e-06, "loss": 0.7521, "step": 7626 }, { "epoch": 1.3716623213161916, "grad_norm": 1.5660905838012695, "learning_rate": 8.51508938225099e-06, "loss": 0.7407, "step": 7627 }, { "epoch": 1.3718421289220535, "grad_norm": 1.0197957754135132, "learning_rate": 8.514675154977058e-06, "loss": 0.9432, "step": 7628 }, { "epoch": 1.3720219365279152, "grad_norm": 1.5223437547683716, "learning_rate": 8.514260880013596e-06, "loss": 0.6929, "step": 7629 }, { "epoch": 1.3722017441337768, "grad_norm": 1.4498043060302734, "learning_rate": 8.513846557366225e-06, "loss": 0.7207, "step": 7630 }, { "epoch": 1.3723815517396387, "grad_norm": 1.5166265964508057, "learning_rate": 8.513432187040568e-06, "loss": 0.7368, "step": 7631 }, { "epoch": 1.3725613593455004, "grad_norm": 1.4957760572433472, "learning_rate": 8.513017769042246e-06, "loss": 0.7005, "step": 7632 }, { "epoch": 1.372741166951362, "grad_norm": 1.531678557395935, "learning_rate": 8.512603303376883e-06, "loss": 0.7623, "step": 7633 }, { "epoch": 1.3729209745572237, "grad_norm": 1.4371296167373657, "learning_rate": 8.512188790050102e-06, "loss": 0.7059, "step": 7634 }, { "epoch": 1.3731007821630854, "grad_norm": 1.486167073249817, "learning_rate": 8.511774229067527e-06, "loss": 0.7066, "step": 7635 }, { "epoch": 1.3732805897689473, "grad_norm": 1.4664764404296875, "learning_rate": 8.511359620434782e-06, "loss": 0.7589, "step": 7636 }, { "epoch": 1.373460397374809, "grad_norm": 1.5220026969909668, "learning_rate": 8.510944964157497e-06, "loss": 0.7678, "step": 7637 }, { "epoch": 1.3736402049806706, "grad_norm": 1.501302719116211, "learning_rate": 8.510530260241294e-06, "loss": 0.6915, "step": 7638 }, { "epoch": 1.3738200125865325, "grad_norm": 1.460440993309021, "learning_rate": 8.510115508691802e-06, "loss": 0.7369, "step": 7639 }, { "epoch": 1.3739998201923942, "grad_norm": 1.443248987197876, "learning_rate": 8.50970070951465e-06, "loss": 0.7488, "step": 7640 }, { "epoch": 1.3741796277982559, "grad_norm": 1.4728230237960815, "learning_rate": 8.509285862715463e-06, "loss": 0.7166, "step": 7641 }, { "epoch": 1.3743594354041175, "grad_norm": 1.534155011177063, "learning_rate": 8.508870968299871e-06, "loss": 0.79, "step": 7642 }, { "epoch": 1.3745392430099792, "grad_norm": 1.4275344610214233, "learning_rate": 8.508456026273505e-06, "loss": 0.7183, "step": 7643 }, { "epoch": 1.374719050615841, "grad_norm": 1.4398359060287476, "learning_rate": 8.508041036641994e-06, "loss": 0.8266, "step": 7644 }, { "epoch": 1.3748988582217028, "grad_norm": 1.522732138633728, "learning_rate": 8.507625999410969e-06, "loss": 0.7126, "step": 7645 }, { "epoch": 1.3750786658275644, "grad_norm": 1.6912399530410767, "learning_rate": 8.507210914586062e-06, "loss": 0.7775, "step": 7646 }, { "epoch": 1.3752584734334263, "grad_norm": 1.4772305488586426, "learning_rate": 8.506795782172905e-06, "loss": 0.9543, "step": 7647 }, { "epoch": 1.375438281039288, "grad_norm": 1.6035906076431274, "learning_rate": 8.50638060217713e-06, "loss": 0.7808, "step": 7648 }, { "epoch": 1.3756180886451497, "grad_norm": 1.4489096403121948, "learning_rate": 8.505965374604372e-06, "loss": 0.6813, "step": 7649 }, { "epoch": 1.3757978962510113, "grad_norm": 1.425910234451294, "learning_rate": 8.505550099460264e-06, "loss": 0.7162, "step": 7650 }, { "epoch": 1.375977703856873, "grad_norm": 1.6374967098236084, "learning_rate": 8.505134776750442e-06, "loss": 0.7817, "step": 7651 }, { "epoch": 1.376157511462735, "grad_norm": 1.1226747035980225, "learning_rate": 8.504719406480537e-06, "loss": 0.9347, "step": 7652 }, { "epoch": 1.3763373190685966, "grad_norm": 1.4640451669692993, "learning_rate": 8.504303988656191e-06, "loss": 0.6655, "step": 7653 }, { "epoch": 1.3765171266744582, "grad_norm": 1.5770539045333862, "learning_rate": 8.503888523283037e-06, "loss": 0.6911, "step": 7654 }, { "epoch": 1.3766969342803201, "grad_norm": 1.4174529314041138, "learning_rate": 8.503473010366713e-06, "loss": 0.6969, "step": 7655 }, { "epoch": 1.3768767418861818, "grad_norm": 1.3583710193634033, "learning_rate": 8.503057449912858e-06, "loss": 0.696, "step": 7656 }, { "epoch": 1.3770565494920435, "grad_norm": 1.4569528102874756, "learning_rate": 8.50264184192711e-06, "loss": 0.7475, "step": 7657 }, { "epoch": 1.3772363570979054, "grad_norm": 1.4627392292022705, "learning_rate": 8.502226186415108e-06, "loss": 0.7308, "step": 7658 }, { "epoch": 1.377416164703767, "grad_norm": 1.534112572669983, "learning_rate": 8.501810483382492e-06, "loss": 0.6932, "step": 7659 }, { "epoch": 1.3775959723096287, "grad_norm": 1.383864402770996, "learning_rate": 8.501394732834903e-06, "loss": 0.7777, "step": 7660 }, { "epoch": 1.3777757799154904, "grad_norm": 1.5805721282958984, "learning_rate": 8.50097893477798e-06, "loss": 0.7175, "step": 7661 }, { "epoch": 1.377955587521352, "grad_norm": 1.4891502857208252, "learning_rate": 8.500563089217369e-06, "loss": 0.764, "step": 7662 }, { "epoch": 1.378135395127214, "grad_norm": 1.028696894645691, "learning_rate": 8.500147196158708e-06, "loss": 0.9138, "step": 7663 }, { "epoch": 1.3783152027330756, "grad_norm": 1.5838078260421753, "learning_rate": 8.499731255607644e-06, "loss": 0.75, "step": 7664 }, { "epoch": 1.3784950103389373, "grad_norm": 1.0613974332809448, "learning_rate": 8.499315267569817e-06, "loss": 0.9429, "step": 7665 }, { "epoch": 1.3786748179447992, "grad_norm": 1.109880805015564, "learning_rate": 8.498899232050874e-06, "loss": 0.9159, "step": 7666 }, { "epoch": 1.3788546255506609, "grad_norm": 1.5008314847946167, "learning_rate": 8.49848314905646e-06, "loss": 0.709, "step": 7667 }, { "epoch": 1.3790344331565225, "grad_norm": 1.4600300788879395, "learning_rate": 8.498067018592221e-06, "loss": 0.8067, "step": 7668 }, { "epoch": 1.3792142407623842, "grad_norm": 1.5093945264816284, "learning_rate": 8.497650840663801e-06, "loss": 0.7554, "step": 7669 }, { "epoch": 1.3793940483682459, "grad_norm": 1.596213936805725, "learning_rate": 8.49723461527685e-06, "loss": 0.7408, "step": 7670 }, { "epoch": 1.3795738559741078, "grad_norm": 1.5213385820388794, "learning_rate": 8.496818342437013e-06, "loss": 0.6921, "step": 7671 }, { "epoch": 1.3797536635799694, "grad_norm": 1.1775649785995483, "learning_rate": 8.49640202214994e-06, "loss": 0.9281, "step": 7672 }, { "epoch": 1.379933471185831, "grad_norm": 1.1704462766647339, "learning_rate": 8.495985654421279e-06, "loss": 0.954, "step": 7673 }, { "epoch": 1.380113278791693, "grad_norm": 1.5429902076721191, "learning_rate": 8.495569239256681e-06, "loss": 0.7619, "step": 7674 }, { "epoch": 1.3802930863975547, "grad_norm": 1.1511800289154053, "learning_rate": 8.495152776661792e-06, "loss": 0.9456, "step": 7675 }, { "epoch": 1.3804728940034163, "grad_norm": 1.957021951675415, "learning_rate": 8.494736266642269e-06, "loss": 0.7371, "step": 7676 }, { "epoch": 1.380652701609278, "grad_norm": 1.0827966928482056, "learning_rate": 8.49431970920376e-06, "loss": 0.9257, "step": 7677 }, { "epoch": 1.3808325092151397, "grad_norm": 1.3886702060699463, "learning_rate": 8.493903104351916e-06, "loss": 0.7466, "step": 7678 }, { "epoch": 1.3810123168210016, "grad_norm": 1.523151159286499, "learning_rate": 8.493486452092391e-06, "loss": 0.8183, "step": 7679 }, { "epoch": 1.3811921244268632, "grad_norm": 1.7481716871261597, "learning_rate": 8.493069752430841e-06, "loss": 0.6955, "step": 7680 }, { "epoch": 1.381371932032725, "grad_norm": 1.510033369064331, "learning_rate": 8.492653005372917e-06, "loss": 0.7318, "step": 7681 }, { "epoch": 1.3815517396385868, "grad_norm": 1.4242061376571655, "learning_rate": 8.492236210924274e-06, "loss": 0.7637, "step": 7682 }, { "epoch": 1.3817315472444485, "grad_norm": 1.1749905347824097, "learning_rate": 8.491819369090567e-06, "loss": 0.9418, "step": 7683 }, { "epoch": 1.3819113548503101, "grad_norm": 1.389636754989624, "learning_rate": 8.491402479877455e-06, "loss": 0.7188, "step": 7684 }, { "epoch": 1.382091162456172, "grad_norm": 1.1296321153640747, "learning_rate": 8.490985543290593e-06, "loss": 0.9261, "step": 7685 }, { "epoch": 1.3822709700620337, "grad_norm": 1.4995454549789429, "learning_rate": 8.490568559335637e-06, "loss": 0.7451, "step": 7686 }, { "epoch": 1.3824507776678954, "grad_norm": 1.5011686086654663, "learning_rate": 8.490151528018245e-06, "loss": 0.7715, "step": 7687 }, { "epoch": 1.382630585273757, "grad_norm": 1.6930032968521118, "learning_rate": 8.489734449344078e-06, "loss": 0.734, "step": 7688 }, { "epoch": 1.3828103928796187, "grad_norm": 1.5597935914993286, "learning_rate": 8.489317323318791e-06, "loss": 0.7854, "step": 7689 }, { "epoch": 1.3829902004854806, "grad_norm": 1.1511633396148682, "learning_rate": 8.488900149948046e-06, "loss": 0.961, "step": 7690 }, { "epoch": 1.3831700080913423, "grad_norm": 1.5398424863815308, "learning_rate": 8.488482929237508e-06, "loss": 0.7617, "step": 7691 }, { "epoch": 1.383349815697204, "grad_norm": 1.4642179012298584, "learning_rate": 8.48806566119283e-06, "loss": 0.6997, "step": 7692 }, { "epoch": 1.3835296233030658, "grad_norm": 1.4904379844665527, "learning_rate": 8.487648345819679e-06, "loss": 0.739, "step": 7693 }, { "epoch": 1.3837094309089275, "grad_norm": 1.418028712272644, "learning_rate": 8.487230983123718e-06, "loss": 0.6744, "step": 7694 }, { "epoch": 1.3838892385147892, "grad_norm": 1.7931592464447021, "learning_rate": 8.486813573110605e-06, "loss": 0.7675, "step": 7695 }, { "epoch": 1.3840690461206508, "grad_norm": 1.4924898147583008, "learning_rate": 8.48639611578601e-06, "loss": 0.6599, "step": 7696 }, { "epoch": 1.3842488537265125, "grad_norm": 1.4658844470977783, "learning_rate": 8.485978611155593e-06, "loss": 0.6958, "step": 7697 }, { "epoch": 1.3844286613323744, "grad_norm": 1.5639457702636719, "learning_rate": 8.48556105922502e-06, "loss": 0.6913, "step": 7698 }, { "epoch": 1.384608468938236, "grad_norm": 1.5042475461959839, "learning_rate": 8.485143459999958e-06, "loss": 0.7244, "step": 7699 }, { "epoch": 1.3847882765440978, "grad_norm": 1.6995031833648682, "learning_rate": 8.48472581348607e-06, "loss": 0.7436, "step": 7700 }, { "epoch": 1.3849680841499596, "grad_norm": 1.4414671659469604, "learning_rate": 8.484308119689028e-06, "loss": 0.7146, "step": 7701 }, { "epoch": 1.3851478917558213, "grad_norm": 1.4219495058059692, "learning_rate": 8.483890378614496e-06, "loss": 0.6556, "step": 7702 }, { "epoch": 1.385327699361683, "grad_norm": 1.1645781993865967, "learning_rate": 8.483472590268143e-06, "loss": 0.9469, "step": 7703 }, { "epoch": 1.3855075069675447, "grad_norm": 1.485286831855774, "learning_rate": 8.483054754655637e-06, "loss": 0.6731, "step": 7704 }, { "epoch": 1.3856873145734063, "grad_norm": 1.482743740081787, "learning_rate": 8.482636871782648e-06, "loss": 0.7526, "step": 7705 }, { "epoch": 1.3858671221792682, "grad_norm": 1.4416285753250122, "learning_rate": 8.482218941654846e-06, "loss": 0.6539, "step": 7706 }, { "epoch": 1.3860469297851299, "grad_norm": 1.4011621475219727, "learning_rate": 8.481800964277902e-06, "loss": 0.7306, "step": 7707 }, { "epoch": 1.3862267373909916, "grad_norm": 1.5025410652160645, "learning_rate": 8.48138293965749e-06, "loss": 0.6984, "step": 7708 }, { "epoch": 1.3864065449968535, "grad_norm": 1.5664681196212769, "learning_rate": 8.480964867799277e-06, "loss": 0.7254, "step": 7709 }, { "epoch": 1.3865863526027151, "grad_norm": 1.5331480503082275, "learning_rate": 8.48054674870894e-06, "loss": 0.7226, "step": 7710 }, { "epoch": 1.3867661602085768, "grad_norm": 1.5120466947555542, "learning_rate": 8.480128582392148e-06, "loss": 0.7132, "step": 7711 }, { "epoch": 1.3869459678144387, "grad_norm": 1.4864296913146973, "learning_rate": 8.47971036885458e-06, "loss": 0.7162, "step": 7712 }, { "epoch": 1.3871257754203004, "grad_norm": 1.5686020851135254, "learning_rate": 8.479292108101907e-06, "loss": 0.6927, "step": 7713 }, { "epoch": 1.387305583026162, "grad_norm": 1.5241613388061523, "learning_rate": 8.478873800139806e-06, "loss": 0.7618, "step": 7714 }, { "epoch": 1.3874853906320237, "grad_norm": 1.2063186168670654, "learning_rate": 8.478455444973951e-06, "loss": 0.9435, "step": 7715 }, { "epoch": 1.3876651982378854, "grad_norm": 1.4730867147445679, "learning_rate": 8.478037042610023e-06, "loss": 0.7723, "step": 7716 }, { "epoch": 1.3878450058437473, "grad_norm": 1.4785834550857544, "learning_rate": 8.477618593053693e-06, "loss": 0.7442, "step": 7717 }, { "epoch": 1.388024813449609, "grad_norm": 1.1932393312454224, "learning_rate": 8.477200096310642e-06, "loss": 0.9154, "step": 7718 }, { "epoch": 1.3882046210554706, "grad_norm": 1.611793875694275, "learning_rate": 8.476781552386551e-06, "loss": 0.773, "step": 7719 }, { "epoch": 1.3883844286613325, "grad_norm": 1.4531136751174927, "learning_rate": 8.476362961287094e-06, "loss": 0.7189, "step": 7720 }, { "epoch": 1.3885642362671942, "grad_norm": 1.104600429534912, "learning_rate": 8.475944323017952e-06, "loss": 0.9438, "step": 7721 }, { "epoch": 1.3887440438730558, "grad_norm": 1.6275975704193115, "learning_rate": 8.475525637584809e-06, "loss": 0.7837, "step": 7722 }, { "epoch": 1.3889238514789175, "grad_norm": 1.238158941268921, "learning_rate": 8.475106904993343e-06, "loss": 0.942, "step": 7723 }, { "epoch": 1.3891036590847792, "grad_norm": 1.5349940061569214, "learning_rate": 8.474688125249235e-06, "loss": 0.7628, "step": 7724 }, { "epoch": 1.389283466690641, "grad_norm": 1.3797608613967896, "learning_rate": 8.474269298358167e-06, "loss": 0.7346, "step": 7725 }, { "epoch": 1.3894632742965027, "grad_norm": 1.7017338275909424, "learning_rate": 8.473850424325827e-06, "loss": 0.7018, "step": 7726 }, { "epoch": 1.3896430819023644, "grad_norm": 1.4433950185775757, "learning_rate": 8.473431503157892e-06, "loss": 0.7217, "step": 7727 }, { "epoch": 1.3898228895082263, "grad_norm": 1.4806407690048218, "learning_rate": 8.47301253486005e-06, "loss": 0.7425, "step": 7728 }, { "epoch": 1.390002697114088, "grad_norm": 1.5501278638839722, "learning_rate": 8.472593519437986e-06, "loss": 0.766, "step": 7729 }, { "epoch": 1.3901825047199496, "grad_norm": 1.4043184518814087, "learning_rate": 8.472174456897384e-06, "loss": 0.7347, "step": 7730 }, { "epoch": 1.3903623123258113, "grad_norm": 1.06453275680542, "learning_rate": 8.47175534724393e-06, "loss": 0.9554, "step": 7731 }, { "epoch": 1.390542119931673, "grad_norm": 1.5583964586257935, "learning_rate": 8.471336190483312e-06, "loss": 0.7475, "step": 7732 }, { "epoch": 1.3907219275375349, "grad_norm": 1.3809508085250854, "learning_rate": 8.470916986621215e-06, "loss": 0.6527, "step": 7733 }, { "epoch": 1.3909017351433965, "grad_norm": 1.1649757623672485, "learning_rate": 8.47049773566333e-06, "loss": 0.9556, "step": 7734 }, { "epoch": 1.3910815427492582, "grad_norm": 1.5641247034072876, "learning_rate": 8.470078437615344e-06, "loss": 0.6901, "step": 7735 }, { "epoch": 1.39126135035512, "grad_norm": 1.5482834577560425, "learning_rate": 8.46965909248295e-06, "loss": 0.6924, "step": 7736 }, { "epoch": 1.3914411579609818, "grad_norm": 1.6454341411590576, "learning_rate": 8.46923970027183e-06, "loss": 0.7354, "step": 7737 }, { "epoch": 1.3916209655668434, "grad_norm": 1.4935855865478516, "learning_rate": 8.468820260987682e-06, "loss": 0.6941, "step": 7738 }, { "epoch": 1.3918007731727053, "grad_norm": 1.3989098072052002, "learning_rate": 8.468400774636194e-06, "loss": 0.7076, "step": 7739 }, { "epoch": 1.391980580778567, "grad_norm": 1.5559484958648682, "learning_rate": 8.46798124122306e-06, "loss": 0.7598, "step": 7740 }, { "epoch": 1.3921603883844287, "grad_norm": 1.573062539100647, "learning_rate": 8.46756166075397e-06, "loss": 0.7785, "step": 7741 }, { "epoch": 1.3923401959902904, "grad_norm": 1.4552092552185059, "learning_rate": 8.467142033234617e-06, "loss": 0.7513, "step": 7742 }, { "epoch": 1.392520003596152, "grad_norm": 1.063852310180664, "learning_rate": 8.466722358670696e-06, "loss": 0.9493, "step": 7743 }, { "epoch": 1.392699811202014, "grad_norm": 1.1787105798721313, "learning_rate": 8.466302637067902e-06, "loss": 0.9579, "step": 7744 }, { "epoch": 1.3928796188078756, "grad_norm": 1.4435229301452637, "learning_rate": 8.46588286843193e-06, "loss": 0.7266, "step": 7745 }, { "epoch": 1.3930594264137373, "grad_norm": 1.5673294067382812, "learning_rate": 8.465463052768475e-06, "loss": 0.745, "step": 7746 }, { "epoch": 1.3932392340195991, "grad_norm": 1.4208450317382812, "learning_rate": 8.465043190083235e-06, "loss": 0.7766, "step": 7747 }, { "epoch": 1.3934190416254608, "grad_norm": 1.1826211214065552, "learning_rate": 8.464623280381903e-06, "loss": 0.905, "step": 7748 }, { "epoch": 1.3935988492313225, "grad_norm": 1.4700311422348022, "learning_rate": 8.46420332367018e-06, "loss": 0.6939, "step": 7749 }, { "epoch": 1.3937786568371842, "grad_norm": 1.053937315940857, "learning_rate": 8.463783319953764e-06, "loss": 0.9018, "step": 7750 }, { "epoch": 1.3939584644430458, "grad_norm": 1.5736795663833618, "learning_rate": 8.463363269238351e-06, "loss": 0.726, "step": 7751 }, { "epoch": 1.3941382720489077, "grad_norm": 1.4426164627075195, "learning_rate": 8.462943171529648e-06, "loss": 0.7109, "step": 7752 }, { "epoch": 1.3943180796547694, "grad_norm": 1.5107167959213257, "learning_rate": 8.462523026833345e-06, "loss": 0.765, "step": 7753 }, { "epoch": 1.394497887260631, "grad_norm": 1.4290552139282227, "learning_rate": 8.46210283515515e-06, "loss": 0.7475, "step": 7754 }, { "epoch": 1.394677694866493, "grad_norm": 1.3690245151519775, "learning_rate": 8.461682596500762e-06, "loss": 0.7344, "step": 7755 }, { "epoch": 1.3948575024723546, "grad_norm": 1.0989547967910767, "learning_rate": 8.461262310875883e-06, "loss": 0.9608, "step": 7756 }, { "epoch": 1.3950373100782163, "grad_norm": 1.6266658306121826, "learning_rate": 8.460841978286216e-06, "loss": 0.7405, "step": 7757 }, { "epoch": 1.395217117684078, "grad_norm": 1.5136526823043823, "learning_rate": 8.460421598737465e-06, "loss": 0.6539, "step": 7758 }, { "epoch": 1.3953969252899396, "grad_norm": 1.5514013767242432, "learning_rate": 8.460001172235332e-06, "loss": 0.692, "step": 7759 }, { "epoch": 1.3955767328958015, "grad_norm": 1.2823758125305176, "learning_rate": 8.459580698785525e-06, "loss": 1.0185, "step": 7760 }, { "epoch": 1.3957565405016632, "grad_norm": 1.424966812133789, "learning_rate": 8.459160178393745e-06, "loss": 0.6685, "step": 7761 }, { "epoch": 1.3959363481075249, "grad_norm": 1.1794261932373047, "learning_rate": 8.458739611065703e-06, "loss": 0.9235, "step": 7762 }, { "epoch": 1.3961161557133868, "grad_norm": 1.2623554468154907, "learning_rate": 8.458318996807103e-06, "loss": 0.9086, "step": 7763 }, { "epoch": 1.3962959633192484, "grad_norm": 1.5623548030853271, "learning_rate": 8.45789833562365e-06, "loss": 0.7257, "step": 7764 }, { "epoch": 1.39647577092511, "grad_norm": 1.4553089141845703, "learning_rate": 8.457477627521054e-06, "loss": 0.7751, "step": 7765 }, { "epoch": 1.396655578530972, "grad_norm": 1.4863072633743286, "learning_rate": 8.457056872505024e-06, "loss": 0.7439, "step": 7766 }, { "epoch": 1.3968353861368337, "grad_norm": 1.4130192995071411, "learning_rate": 8.456636070581268e-06, "loss": 0.7078, "step": 7767 }, { "epoch": 1.3970151937426953, "grad_norm": 1.4807747602462769, "learning_rate": 8.456215221755497e-06, "loss": 0.7905, "step": 7768 }, { "epoch": 1.397195001348557, "grad_norm": 1.4851115942001343, "learning_rate": 8.45579432603342e-06, "loss": 0.7544, "step": 7769 }, { "epoch": 1.3973748089544187, "grad_norm": 1.489876627922058, "learning_rate": 8.455373383420748e-06, "loss": 0.7275, "step": 7770 }, { "epoch": 1.3975546165602806, "grad_norm": 1.4075610637664795, "learning_rate": 8.454952393923194e-06, "loss": 0.7442, "step": 7771 }, { "epoch": 1.3977344241661422, "grad_norm": 1.4886744022369385, "learning_rate": 8.454531357546468e-06, "loss": 0.7173, "step": 7772 }, { "epoch": 1.397914231772004, "grad_norm": 1.44430410861969, "learning_rate": 8.454110274296285e-06, "loss": 0.6758, "step": 7773 }, { "epoch": 1.3980940393778658, "grad_norm": 1.507152795791626, "learning_rate": 8.453689144178357e-06, "loss": 0.714, "step": 7774 }, { "epoch": 1.3982738469837275, "grad_norm": 1.1919277906417847, "learning_rate": 8.4532679671984e-06, "loss": 0.9507, "step": 7775 }, { "epoch": 1.3984536545895891, "grad_norm": 1.770526647567749, "learning_rate": 8.452846743362129e-06, "loss": 0.7663, "step": 7776 }, { "epoch": 1.3986334621954508, "grad_norm": 1.3458081483840942, "learning_rate": 8.452425472675256e-06, "loss": 0.7706, "step": 7777 }, { "epoch": 1.3988132698013125, "grad_norm": 1.5216271877288818, "learning_rate": 8.4520041551435e-06, "loss": 0.7615, "step": 7778 }, { "epoch": 1.3989930774071744, "grad_norm": 1.4539963006973267, "learning_rate": 8.45158279077258e-06, "loss": 0.778, "step": 7779 }, { "epoch": 1.399172885013036, "grad_norm": 1.5642188787460327, "learning_rate": 8.451161379568206e-06, "loss": 0.7801, "step": 7780 }, { "epoch": 1.3993526926188977, "grad_norm": 1.4890974760055542, "learning_rate": 8.450739921536104e-06, "loss": 0.7652, "step": 7781 }, { "epoch": 1.3995325002247596, "grad_norm": 1.5319715738296509, "learning_rate": 8.450318416681987e-06, "loss": 0.7329, "step": 7782 }, { "epoch": 1.3997123078306213, "grad_norm": 1.40470552444458, "learning_rate": 8.449896865011577e-06, "loss": 0.6601, "step": 7783 }, { "epoch": 1.399892115436483, "grad_norm": 0.993392825126648, "learning_rate": 8.449475266530592e-06, "loss": 0.9048, "step": 7784 }, { "epoch": 1.4000719230423446, "grad_norm": 1.4541043043136597, "learning_rate": 8.449053621244756e-06, "loss": 0.7661, "step": 7785 }, { "epoch": 1.4002517306482063, "grad_norm": 1.4732774496078491, "learning_rate": 8.448631929159787e-06, "loss": 0.7484, "step": 7786 }, { "epoch": 1.4004315382540682, "grad_norm": 1.489571452140808, "learning_rate": 8.448210190281407e-06, "loss": 0.7335, "step": 7787 }, { "epoch": 1.4006113458599299, "grad_norm": 1.5081967115402222, "learning_rate": 8.44778840461534e-06, "loss": 0.7604, "step": 7788 }, { "epoch": 1.4007911534657915, "grad_norm": 1.558519959449768, "learning_rate": 8.447366572167309e-06, "loss": 0.782, "step": 7789 }, { "epoch": 1.4009709610716534, "grad_norm": 1.3742716312408447, "learning_rate": 8.446944692943035e-06, "loss": 0.7802, "step": 7790 }, { "epoch": 1.401150768677515, "grad_norm": 1.8142108917236328, "learning_rate": 8.446522766948247e-06, "loss": 0.7884, "step": 7791 }, { "epoch": 1.4013305762833768, "grad_norm": 1.6369285583496094, "learning_rate": 8.446100794188666e-06, "loss": 0.6745, "step": 7792 }, { "epoch": 1.4015103838892387, "grad_norm": 1.4601202011108398, "learning_rate": 8.44567877467002e-06, "loss": 0.7073, "step": 7793 }, { "epoch": 1.4016901914951003, "grad_norm": 1.3030061721801758, "learning_rate": 8.445256708398033e-06, "loss": 0.9197, "step": 7794 }, { "epoch": 1.401869999100962, "grad_norm": 1.6021742820739746, "learning_rate": 8.444834595378434e-06, "loss": 0.6738, "step": 7795 }, { "epoch": 1.4020498067068237, "grad_norm": 1.3761552572250366, "learning_rate": 8.444412435616949e-06, "loss": 0.7418, "step": 7796 }, { "epoch": 1.4022296143126853, "grad_norm": 1.6130088567733765, "learning_rate": 8.443990229119307e-06, "loss": 0.809, "step": 7797 }, { "epoch": 1.4024094219185472, "grad_norm": 1.506295084953308, "learning_rate": 8.443567975891236e-06, "loss": 0.7325, "step": 7798 }, { "epoch": 1.402589229524409, "grad_norm": 1.5452513694763184, "learning_rate": 8.443145675938467e-06, "loss": 0.6925, "step": 7799 }, { "epoch": 1.4027690371302706, "grad_norm": 1.5277858972549438, "learning_rate": 8.442723329266727e-06, "loss": 0.7698, "step": 7800 }, { "epoch": 1.4029488447361325, "grad_norm": 1.1644372940063477, "learning_rate": 8.44230093588175e-06, "loss": 0.9796, "step": 7801 }, { "epoch": 1.4031286523419941, "grad_norm": 1.5027704238891602, "learning_rate": 8.441878495789268e-06, "loss": 0.6479, "step": 7802 }, { "epoch": 1.4033084599478558, "grad_norm": 1.6061351299285889, "learning_rate": 8.441456008995009e-06, "loss": 0.7561, "step": 7803 }, { "epoch": 1.4034882675537175, "grad_norm": 1.5184520483016968, "learning_rate": 8.441033475504708e-06, "loss": 0.6999, "step": 7804 }, { "epoch": 1.4036680751595791, "grad_norm": 1.3234713077545166, "learning_rate": 8.440610895324099e-06, "loss": 0.957, "step": 7805 }, { "epoch": 1.403847882765441, "grad_norm": 1.4814361333847046, "learning_rate": 8.440188268458913e-06, "loss": 0.7218, "step": 7806 }, { "epoch": 1.4040276903713027, "grad_norm": 1.1890913248062134, "learning_rate": 8.439765594914886e-06, "loss": 0.8874, "step": 7807 }, { "epoch": 1.4042074979771644, "grad_norm": 1.484235405921936, "learning_rate": 8.439342874697754e-06, "loss": 0.8017, "step": 7808 }, { "epoch": 1.4043873055830263, "grad_norm": 1.1295497417449951, "learning_rate": 8.438920107813253e-06, "loss": 0.8863, "step": 7809 }, { "epoch": 1.404567113188888, "grad_norm": 1.457015037536621, "learning_rate": 8.438497294267117e-06, "loss": 0.7948, "step": 7810 }, { "epoch": 1.4047469207947496, "grad_norm": 1.4794687032699585, "learning_rate": 8.438074434065085e-06, "loss": 0.746, "step": 7811 }, { "epoch": 1.4049267284006113, "grad_norm": 1.545979380607605, "learning_rate": 8.437651527212895e-06, "loss": 0.7552, "step": 7812 }, { "epoch": 1.405106536006473, "grad_norm": 1.569841742515564, "learning_rate": 8.437228573716282e-06, "loss": 0.7326, "step": 7813 }, { "epoch": 1.4052863436123348, "grad_norm": 1.1092307567596436, "learning_rate": 8.43680557358099e-06, "loss": 0.9703, "step": 7814 }, { "epoch": 1.4054661512181965, "grad_norm": 1.4743077754974365, "learning_rate": 8.436382526812755e-06, "loss": 0.6787, "step": 7815 }, { "epoch": 1.4056459588240582, "grad_norm": 1.3898190259933472, "learning_rate": 8.435959433417318e-06, "loss": 0.698, "step": 7816 }, { "epoch": 1.40582576642992, "grad_norm": 1.4185665845870972, "learning_rate": 8.435536293400421e-06, "loss": 0.7165, "step": 7817 }, { "epoch": 1.4060055740357817, "grad_norm": 1.3571292161941528, "learning_rate": 8.435113106767802e-06, "loss": 0.6854, "step": 7818 }, { "epoch": 1.4061853816416434, "grad_norm": 1.47690749168396, "learning_rate": 8.434689873525208e-06, "loss": 0.7236, "step": 7819 }, { "epoch": 1.406365189247505, "grad_norm": 1.5451323986053467, "learning_rate": 8.434266593678378e-06, "loss": 0.8198, "step": 7820 }, { "epoch": 1.406544996853367, "grad_norm": 1.5140825510025024, "learning_rate": 8.433843267233057e-06, "loss": 0.7199, "step": 7821 }, { "epoch": 1.4067248044592287, "grad_norm": 1.537327527999878, "learning_rate": 8.433419894194988e-06, "loss": 0.7294, "step": 7822 }, { "epoch": 1.4069046120650903, "grad_norm": 1.6059002876281738, "learning_rate": 8.432996474569917e-06, "loss": 0.7199, "step": 7823 }, { "epoch": 1.407084419670952, "grad_norm": 1.5299862623214722, "learning_rate": 8.432573008363587e-06, "loss": 0.7633, "step": 7824 }, { "epoch": 1.4072642272768139, "grad_norm": 1.5668412446975708, "learning_rate": 8.432149495581746e-06, "loss": 0.7327, "step": 7825 }, { "epoch": 1.4074440348826756, "grad_norm": 1.5082298517227173, "learning_rate": 8.431725936230139e-06, "loss": 0.704, "step": 7826 }, { "epoch": 1.4076238424885372, "grad_norm": 1.4386287927627563, "learning_rate": 8.431302330314515e-06, "loss": 0.7895, "step": 7827 }, { "epoch": 1.4078036500943991, "grad_norm": 1.476959228515625, "learning_rate": 8.430878677840622e-06, "loss": 0.7831, "step": 7828 }, { "epoch": 1.4079834577002608, "grad_norm": 1.4476488828659058, "learning_rate": 8.430454978814204e-06, "loss": 0.7495, "step": 7829 }, { "epoch": 1.4081632653061225, "grad_norm": 1.4717239141464233, "learning_rate": 8.430031233241015e-06, "loss": 0.7865, "step": 7830 }, { "epoch": 1.4083430729119841, "grad_norm": 1.485624074935913, "learning_rate": 8.429607441126804e-06, "loss": 0.7403, "step": 7831 }, { "epoch": 1.4085228805178458, "grad_norm": 1.2144136428833008, "learning_rate": 8.429183602477318e-06, "loss": 0.964, "step": 7832 }, { "epoch": 1.4087026881237077, "grad_norm": 1.4608595371246338, "learning_rate": 8.428759717298312e-06, "loss": 0.7313, "step": 7833 }, { "epoch": 1.4088824957295694, "grad_norm": 1.4077671766281128, "learning_rate": 8.428335785595533e-06, "loss": 0.6574, "step": 7834 }, { "epoch": 1.409062303335431, "grad_norm": 1.1303902864456177, "learning_rate": 8.427911807374737e-06, "loss": 0.9704, "step": 7835 }, { "epoch": 1.409242110941293, "grad_norm": 1.0533183813095093, "learning_rate": 8.427487782641677e-06, "loss": 0.972, "step": 7836 }, { "epoch": 1.4094219185471546, "grad_norm": 1.4613550901412964, "learning_rate": 8.427063711402103e-06, "loss": 0.7083, "step": 7837 }, { "epoch": 1.4096017261530163, "grad_norm": 1.0538982152938843, "learning_rate": 8.426639593661772e-06, "loss": 0.9527, "step": 7838 }, { "epoch": 1.409781533758878, "grad_norm": 1.4778698682785034, "learning_rate": 8.42621542942644e-06, "loss": 0.7414, "step": 7839 }, { "epoch": 1.4099613413647396, "grad_norm": 1.652948260307312, "learning_rate": 8.425791218701857e-06, "loss": 0.7294, "step": 7840 }, { "epoch": 1.4101411489706015, "grad_norm": 1.535598874092102, "learning_rate": 8.425366961493784e-06, "loss": 0.7746, "step": 7841 }, { "epoch": 1.4103209565764632, "grad_norm": 1.5045915842056274, "learning_rate": 8.424942657807975e-06, "loss": 0.7726, "step": 7842 }, { "epoch": 1.4105007641823248, "grad_norm": 1.622884750366211, "learning_rate": 8.42451830765019e-06, "loss": 0.8096, "step": 7843 }, { "epoch": 1.4106805717881867, "grad_norm": 1.523228645324707, "learning_rate": 8.424093911026183e-06, "loss": 0.7041, "step": 7844 }, { "epoch": 1.4108603793940484, "grad_norm": 1.423318862915039, "learning_rate": 8.423669467941716e-06, "loss": 0.7385, "step": 7845 }, { "epoch": 1.41104018699991, "grad_norm": 1.4509543180465698, "learning_rate": 8.423244978402544e-06, "loss": 0.806, "step": 7846 }, { "epoch": 1.4112199946057717, "grad_norm": 1.4541680812835693, "learning_rate": 8.422820442414434e-06, "loss": 0.7136, "step": 7847 }, { "epoch": 1.4113998022116334, "grad_norm": 1.4435840845108032, "learning_rate": 8.422395859983138e-06, "loss": 0.7585, "step": 7848 }, { "epoch": 1.4115796098174953, "grad_norm": 1.6162059307098389, "learning_rate": 8.421971231114423e-06, "loss": 0.7915, "step": 7849 }, { "epoch": 1.411759417423357, "grad_norm": 1.5684033632278442, "learning_rate": 8.421546555814046e-06, "loss": 0.7398, "step": 7850 }, { "epoch": 1.4119392250292186, "grad_norm": 1.6685971021652222, "learning_rate": 8.421121834087774e-06, "loss": 0.7864, "step": 7851 }, { "epoch": 1.4121190326350805, "grad_norm": 1.6973259449005127, "learning_rate": 8.420697065941367e-06, "loss": 0.6678, "step": 7852 }, { "epoch": 1.4122988402409422, "grad_norm": 1.3942747116088867, "learning_rate": 8.42027225138059e-06, "loss": 0.7632, "step": 7853 }, { "epoch": 1.4124786478468039, "grad_norm": 1.504309058189392, "learning_rate": 8.419847390411204e-06, "loss": 0.7213, "step": 7854 }, { "epoch": 1.4126584554526658, "grad_norm": 1.316811442375183, "learning_rate": 8.419422483038978e-06, "loss": 0.9431, "step": 7855 }, { "epoch": 1.4128382630585274, "grad_norm": 1.4578830003738403, "learning_rate": 8.418997529269674e-06, "loss": 0.768, "step": 7856 }, { "epoch": 1.4130180706643891, "grad_norm": 1.4404771327972412, "learning_rate": 8.418572529109064e-06, "loss": 0.7095, "step": 7857 }, { "epoch": 1.4131978782702508, "grad_norm": 1.5552951097488403, "learning_rate": 8.418147482562907e-06, "loss": 0.6642, "step": 7858 }, { "epoch": 1.4133776858761125, "grad_norm": 1.478786826133728, "learning_rate": 8.417722389636973e-06, "loss": 0.7527, "step": 7859 }, { "epoch": 1.4135574934819743, "grad_norm": 1.5563099384307861, "learning_rate": 8.417297250337033e-06, "loss": 0.7443, "step": 7860 }, { "epoch": 1.413737301087836, "grad_norm": 1.3971892595291138, "learning_rate": 8.416872064668852e-06, "loss": 0.6836, "step": 7861 }, { "epoch": 1.4139171086936977, "grad_norm": 1.5466704368591309, "learning_rate": 8.4164468326382e-06, "loss": 0.7386, "step": 7862 }, { "epoch": 1.4140969162995596, "grad_norm": 1.542231559753418, "learning_rate": 8.416021554250848e-06, "loss": 0.7473, "step": 7863 }, { "epoch": 1.4142767239054213, "grad_norm": 1.5313533544540405, "learning_rate": 8.415596229512566e-06, "loss": 0.755, "step": 7864 }, { "epoch": 1.414456531511283, "grad_norm": 1.585160732269287, "learning_rate": 8.415170858429125e-06, "loss": 0.7142, "step": 7865 }, { "epoch": 1.4146363391171446, "grad_norm": 1.5265145301818848, "learning_rate": 8.414745441006297e-06, "loss": 0.7515, "step": 7866 }, { "epoch": 1.4148161467230063, "grad_norm": 1.4451528787612915, "learning_rate": 8.414319977249854e-06, "loss": 0.6623, "step": 7867 }, { "epoch": 1.4149959543288682, "grad_norm": 1.5165929794311523, "learning_rate": 8.413894467165568e-06, "loss": 0.752, "step": 7868 }, { "epoch": 1.4151757619347298, "grad_norm": 1.1384577751159668, "learning_rate": 8.413468910759214e-06, "loss": 0.9361, "step": 7869 }, { "epoch": 1.4153555695405915, "grad_norm": 1.4936821460723877, "learning_rate": 8.413043308036565e-06, "loss": 0.7282, "step": 7870 }, { "epoch": 1.4155353771464534, "grad_norm": 1.5398797988891602, "learning_rate": 8.412617659003398e-06, "loss": 0.8382, "step": 7871 }, { "epoch": 1.415715184752315, "grad_norm": 1.0820817947387695, "learning_rate": 8.412191963665485e-06, "loss": 0.9068, "step": 7872 }, { "epoch": 1.4158949923581767, "grad_norm": 1.5884543657302856, "learning_rate": 8.411766222028608e-06, "loss": 0.7032, "step": 7873 }, { "epoch": 1.4160747999640384, "grad_norm": 1.5226655006408691, "learning_rate": 8.411340434098537e-06, "loss": 0.7285, "step": 7874 }, { "epoch": 1.4162546075699, "grad_norm": 1.5069702863693237, "learning_rate": 8.410914599881054e-06, "loss": 0.8078, "step": 7875 }, { "epoch": 1.416434415175762, "grad_norm": 1.544040322303772, "learning_rate": 8.410488719381934e-06, "loss": 0.6805, "step": 7876 }, { "epoch": 1.4166142227816236, "grad_norm": 1.0574942827224731, "learning_rate": 8.410062792606959e-06, "loss": 0.9578, "step": 7877 }, { "epoch": 1.4167940303874853, "grad_norm": 1.4074019193649292, "learning_rate": 8.409636819561905e-06, "loss": 0.6932, "step": 7878 }, { "epoch": 1.4169738379933472, "grad_norm": 1.522451400756836, "learning_rate": 8.409210800252554e-06, "loss": 0.7326, "step": 7879 }, { "epoch": 1.4171536455992089, "grad_norm": 1.4253841638565063, "learning_rate": 8.408784734684685e-06, "loss": 0.7363, "step": 7880 }, { "epoch": 1.4173334532050705, "grad_norm": 1.066466212272644, "learning_rate": 8.408358622864081e-06, "loss": 0.9415, "step": 7881 }, { "epoch": 1.4175132608109324, "grad_norm": 1.6415419578552246, "learning_rate": 8.407932464796521e-06, "loss": 0.7313, "step": 7882 }, { "epoch": 1.417693068416794, "grad_norm": 1.517867088317871, "learning_rate": 8.407506260487792e-06, "loss": 0.7729, "step": 7883 }, { "epoch": 1.4178728760226558, "grad_norm": 1.5023759603500366, "learning_rate": 8.407080009943672e-06, "loss": 0.7661, "step": 7884 }, { "epoch": 1.4180526836285174, "grad_norm": 1.4432984590530396, "learning_rate": 8.406653713169946e-06, "loss": 0.7433, "step": 7885 }, { "epoch": 1.4182324912343791, "grad_norm": 1.1910138130187988, "learning_rate": 8.4062273701724e-06, "loss": 0.9292, "step": 7886 }, { "epoch": 1.418412298840241, "grad_norm": 1.440392017364502, "learning_rate": 8.405800980956818e-06, "loss": 0.7527, "step": 7887 }, { "epoch": 1.4185921064461027, "grad_norm": 1.5543264150619507, "learning_rate": 8.405374545528988e-06, "loss": 0.7108, "step": 7888 }, { "epoch": 1.4187719140519643, "grad_norm": 1.514739751815796, "learning_rate": 8.40494806389469e-06, "loss": 0.7282, "step": 7889 }, { "epoch": 1.4189517216578262, "grad_norm": 1.4577654600143433, "learning_rate": 8.404521536059717e-06, "loss": 0.7543, "step": 7890 }, { "epoch": 1.419131529263688, "grad_norm": 1.440735936164856, "learning_rate": 8.404094962029854e-06, "loss": 0.7696, "step": 7891 }, { "epoch": 1.4193113368695496, "grad_norm": 1.382081151008606, "learning_rate": 8.403668341810887e-06, "loss": 0.7213, "step": 7892 }, { "epoch": 1.4194911444754112, "grad_norm": 1.5226513147354126, "learning_rate": 8.403241675408607e-06, "loss": 0.7674, "step": 7893 }, { "epoch": 1.419670952081273, "grad_norm": 1.5927594900131226, "learning_rate": 8.402814962828804e-06, "loss": 0.8162, "step": 7894 }, { "epoch": 1.4198507596871348, "grad_norm": 1.446637749671936, "learning_rate": 8.402388204077267e-06, "loss": 0.7313, "step": 7895 }, { "epoch": 1.4200305672929965, "grad_norm": 1.4291805028915405, "learning_rate": 8.401961399159786e-06, "loss": 0.7839, "step": 7896 }, { "epoch": 1.4202103748988582, "grad_norm": 2.553344249725342, "learning_rate": 8.401534548082152e-06, "loss": 0.7437, "step": 7897 }, { "epoch": 1.42039018250472, "grad_norm": 1.4895339012145996, "learning_rate": 8.40110765085016e-06, "loss": 0.7953, "step": 7898 }, { "epoch": 1.4205699901105817, "grad_norm": 1.5832327604293823, "learning_rate": 8.400680707469598e-06, "loss": 0.7798, "step": 7899 }, { "epoch": 1.4207497977164434, "grad_norm": 1.4654271602630615, "learning_rate": 8.40025371794626e-06, "loss": 0.7371, "step": 7900 }, { "epoch": 1.420929605322305, "grad_norm": 1.5115829706192017, "learning_rate": 8.399826682285944e-06, "loss": 0.7685, "step": 7901 }, { "epoch": 1.4211094129281667, "grad_norm": 1.4749609231948853, "learning_rate": 8.399399600494438e-06, "loss": 0.7727, "step": 7902 }, { "epoch": 1.4212892205340286, "grad_norm": 1.6358822584152222, "learning_rate": 8.39897247257754e-06, "loss": 0.7356, "step": 7903 }, { "epoch": 1.4214690281398903, "grad_norm": 1.642748475074768, "learning_rate": 8.398545298541046e-06, "loss": 0.7448, "step": 7904 }, { "epoch": 1.421648835745752, "grad_norm": 1.5000630617141724, "learning_rate": 8.39811807839075e-06, "loss": 0.7193, "step": 7905 }, { "epoch": 1.4218286433516139, "grad_norm": 1.4173946380615234, "learning_rate": 8.397690812132454e-06, "loss": 0.7069, "step": 7906 }, { "epoch": 1.4220084509574755, "grad_norm": 1.7172671556472778, "learning_rate": 8.39726349977195e-06, "loss": 0.7259, "step": 7907 }, { "epoch": 1.4221882585633372, "grad_norm": 1.5999809503555298, "learning_rate": 8.396836141315039e-06, "loss": 0.7599, "step": 7908 }, { "epoch": 1.422368066169199, "grad_norm": 1.5117605924606323, "learning_rate": 8.396408736767518e-06, "loss": 0.7771, "step": 7909 }, { "epoch": 1.4225478737750608, "grad_norm": 1.4798964262008667, "learning_rate": 8.395981286135187e-06, "loss": 0.6819, "step": 7910 }, { "epoch": 1.4227276813809224, "grad_norm": 1.4300276041030884, "learning_rate": 8.395553789423844e-06, "loss": 0.73, "step": 7911 }, { "epoch": 1.422907488986784, "grad_norm": 1.5778616666793823, "learning_rate": 8.395126246639294e-06, "loss": 0.7307, "step": 7912 }, { "epoch": 1.4230872965926458, "grad_norm": 1.4335031509399414, "learning_rate": 8.394698657787334e-06, "loss": 0.723, "step": 7913 }, { "epoch": 1.4232671041985077, "grad_norm": 1.4087293148040771, "learning_rate": 8.394271022873768e-06, "loss": 0.7243, "step": 7914 }, { "epoch": 1.4234469118043693, "grad_norm": 1.6392472982406616, "learning_rate": 8.3938433419044e-06, "loss": 0.7468, "step": 7915 }, { "epoch": 1.423626719410231, "grad_norm": 1.2492258548736572, "learning_rate": 8.39341561488503e-06, "loss": 0.9178, "step": 7916 }, { "epoch": 1.423806527016093, "grad_norm": 1.6086187362670898, "learning_rate": 8.39298784182146e-06, "loss": 0.6889, "step": 7917 }, { "epoch": 1.4239863346219546, "grad_norm": 1.2181148529052734, "learning_rate": 8.392560022719501e-06, "loss": 0.9627, "step": 7918 }, { "epoch": 1.4241661422278162, "grad_norm": 1.5631412267684937, "learning_rate": 8.392132157584952e-06, "loss": 0.7524, "step": 7919 }, { "epoch": 1.424345949833678, "grad_norm": 1.1108815670013428, "learning_rate": 8.39170424642362e-06, "loss": 0.9726, "step": 7920 }, { "epoch": 1.4245257574395396, "grad_norm": 1.63068425655365, "learning_rate": 8.391276289241312e-06, "loss": 0.7168, "step": 7921 }, { "epoch": 1.4247055650454015, "grad_norm": 1.4475836753845215, "learning_rate": 8.390848286043837e-06, "loss": 0.7173, "step": 7922 }, { "epoch": 1.4248853726512631, "grad_norm": 1.4624663591384888, "learning_rate": 8.390420236836998e-06, "loss": 0.7261, "step": 7923 }, { "epoch": 1.4250651802571248, "grad_norm": 1.4347320795059204, "learning_rate": 8.389992141626605e-06, "loss": 0.7181, "step": 7924 }, { "epoch": 1.4252449878629867, "grad_norm": 1.0452290773391724, "learning_rate": 8.389564000418466e-06, "loss": 0.9208, "step": 7925 }, { "epoch": 1.4254247954688484, "grad_norm": 1.5345590114593506, "learning_rate": 8.389135813218392e-06, "loss": 0.7191, "step": 7926 }, { "epoch": 1.42560460307471, "grad_norm": 1.5225416421890259, "learning_rate": 8.388707580032193e-06, "loss": 0.7851, "step": 7927 }, { "epoch": 1.4257844106805717, "grad_norm": 1.434476613998413, "learning_rate": 8.388279300865678e-06, "loss": 0.6867, "step": 7928 }, { "epoch": 1.4259642182864334, "grad_norm": 1.4574600458145142, "learning_rate": 8.387850975724658e-06, "loss": 0.7256, "step": 7929 }, { "epoch": 1.4261440258922953, "grad_norm": 1.5935683250427246, "learning_rate": 8.387422604614946e-06, "loss": 0.7302, "step": 7930 }, { "epoch": 1.426323833498157, "grad_norm": 1.384202480316162, "learning_rate": 8.386994187542354e-06, "loss": 0.7077, "step": 7931 }, { "epoch": 1.4265036411040186, "grad_norm": 1.2046748399734497, "learning_rate": 8.386565724512696e-06, "loss": 0.9509, "step": 7932 }, { "epoch": 1.4266834487098805, "grad_norm": 1.4308111667633057, "learning_rate": 8.386137215531783e-06, "loss": 0.7046, "step": 7933 }, { "epoch": 1.4268632563157422, "grad_norm": 1.5524848699569702, "learning_rate": 8.385708660605431e-06, "loss": 0.7507, "step": 7934 }, { "epoch": 1.4270430639216038, "grad_norm": 1.4740986824035645, "learning_rate": 8.385280059739456e-06, "loss": 0.7206, "step": 7935 }, { "epoch": 1.4272228715274657, "grad_norm": 1.4808111190795898, "learning_rate": 8.384851412939674e-06, "loss": 0.7482, "step": 7936 }, { "epoch": 1.4274026791333274, "grad_norm": 1.4946097135543823, "learning_rate": 8.384422720211897e-06, "loss": 0.7747, "step": 7937 }, { "epoch": 1.427582486739189, "grad_norm": 1.3576688766479492, "learning_rate": 8.383993981561946e-06, "loss": 0.7348, "step": 7938 }, { "epoch": 1.4277622943450508, "grad_norm": 1.6159772872924805, "learning_rate": 8.383565196995636e-06, "loss": 0.756, "step": 7939 }, { "epoch": 1.4279421019509124, "grad_norm": 1.153987169265747, "learning_rate": 8.383136366518788e-06, "loss": 0.9585, "step": 7940 }, { "epoch": 1.4281219095567743, "grad_norm": 1.5617092847824097, "learning_rate": 8.382707490137217e-06, "loss": 0.7494, "step": 7941 }, { "epoch": 1.428301717162636, "grad_norm": 1.113813042640686, "learning_rate": 8.382278567856743e-06, "loss": 0.9471, "step": 7942 }, { "epoch": 1.4284815247684977, "grad_norm": 1.1009010076522827, "learning_rate": 8.38184959968319e-06, "loss": 0.9451, "step": 7943 }, { "epoch": 1.4286613323743595, "grad_norm": 1.6242142915725708, "learning_rate": 8.381420585622373e-06, "loss": 0.7231, "step": 7944 }, { "epoch": 1.4288411399802212, "grad_norm": 1.589760184288025, "learning_rate": 8.380991525680116e-06, "loss": 0.767, "step": 7945 }, { "epoch": 1.429020947586083, "grad_norm": 1.517235279083252, "learning_rate": 8.38056241986224e-06, "loss": 0.7251, "step": 7946 }, { "epoch": 1.4292007551919446, "grad_norm": 1.6535351276397705, "learning_rate": 8.380133268174568e-06, "loss": 0.7302, "step": 7947 }, { "epoch": 1.4293805627978062, "grad_norm": 1.553714632987976, "learning_rate": 8.379704070622923e-06, "loss": 0.6826, "step": 7948 }, { "epoch": 1.4295603704036681, "grad_norm": 1.4295275211334229, "learning_rate": 8.379274827213127e-06, "loss": 0.6946, "step": 7949 }, { "epoch": 1.4297401780095298, "grad_norm": 1.4727619886398315, "learning_rate": 8.378845537951008e-06, "loss": 0.7361, "step": 7950 }, { "epoch": 1.4299199856153915, "grad_norm": 1.4676473140716553, "learning_rate": 8.378416202842386e-06, "loss": 0.7125, "step": 7951 }, { "epoch": 1.4300997932212534, "grad_norm": 1.119484543800354, "learning_rate": 8.37798682189309e-06, "loss": 0.942, "step": 7952 }, { "epoch": 1.430279600827115, "grad_norm": 1.5897183418273926, "learning_rate": 8.377557395108947e-06, "loss": 0.6935, "step": 7953 }, { "epoch": 1.4304594084329767, "grad_norm": 1.3920255899429321, "learning_rate": 8.37712792249578e-06, "loss": 0.765, "step": 7954 }, { "epoch": 1.4306392160388384, "grad_norm": 1.4878596067428589, "learning_rate": 8.376698404059419e-06, "loss": 0.7346, "step": 7955 }, { "epoch": 1.4308190236447, "grad_norm": 1.564449667930603, "learning_rate": 8.376268839805692e-06, "loss": 0.6861, "step": 7956 }, { "epoch": 1.430998831250562, "grad_norm": 1.5388363599777222, "learning_rate": 8.375839229740426e-06, "loss": 0.8272, "step": 7957 }, { "epoch": 1.4311786388564236, "grad_norm": 1.0400803089141846, "learning_rate": 8.37540957386945e-06, "loss": 0.9668, "step": 7958 }, { "epoch": 1.4313584464622853, "grad_norm": 1.4181263446807861, "learning_rate": 8.374979872198597e-06, "loss": 0.6957, "step": 7959 }, { "epoch": 1.4315382540681472, "grad_norm": 1.4826642274856567, "learning_rate": 8.374550124733695e-06, "loss": 0.7291, "step": 7960 }, { "epoch": 1.4317180616740088, "grad_norm": 1.4741755723953247, "learning_rate": 8.374120331480577e-06, "loss": 0.7479, "step": 7961 }, { "epoch": 1.4318978692798705, "grad_norm": 1.4138401746749878, "learning_rate": 8.373690492445072e-06, "loss": 0.732, "step": 7962 }, { "epoch": 1.4320776768857324, "grad_norm": 1.2168495655059814, "learning_rate": 8.373260607633014e-06, "loss": 0.9726, "step": 7963 }, { "epoch": 1.432257484491594, "grad_norm": 1.392575979232788, "learning_rate": 8.372830677050236e-06, "loss": 0.6855, "step": 7964 }, { "epoch": 1.4324372920974557, "grad_norm": 1.5290263891220093, "learning_rate": 8.372400700702569e-06, "loss": 0.7424, "step": 7965 }, { "epoch": 1.4326170997033174, "grad_norm": 1.4219344854354858, "learning_rate": 8.371970678595853e-06, "loss": 0.7087, "step": 7966 }, { "epoch": 1.432796907309179, "grad_norm": 1.122452974319458, "learning_rate": 8.371540610735917e-06, "loss": 0.9667, "step": 7967 }, { "epoch": 1.432976714915041, "grad_norm": 1.3044605255126953, "learning_rate": 8.371110497128601e-06, "loss": 0.9431, "step": 7968 }, { "epoch": 1.4331565225209026, "grad_norm": 1.4153573513031006, "learning_rate": 8.370680337779737e-06, "loss": 0.7779, "step": 7969 }, { "epoch": 1.4333363301267643, "grad_norm": 1.501239538192749, "learning_rate": 8.370250132695165e-06, "loss": 0.7448, "step": 7970 }, { "epoch": 1.4335161377326262, "grad_norm": 1.5871226787567139, "learning_rate": 8.36981988188072e-06, "loss": 0.7928, "step": 7971 }, { "epoch": 1.4336959453384879, "grad_norm": 1.464717149734497, "learning_rate": 8.369389585342242e-06, "loss": 0.7496, "step": 7972 }, { "epoch": 1.4338757529443495, "grad_norm": 1.1103063821792603, "learning_rate": 8.368959243085568e-06, "loss": 0.9409, "step": 7973 }, { "epoch": 1.4340555605502112, "grad_norm": 1.4874767065048218, "learning_rate": 8.368528855116536e-06, "loss": 0.6828, "step": 7974 }, { "epoch": 1.4342353681560729, "grad_norm": 1.5820825099945068, "learning_rate": 8.368098421440989e-06, "loss": 0.7634, "step": 7975 }, { "epoch": 1.4344151757619348, "grad_norm": 1.5244849920272827, "learning_rate": 8.367667942064766e-06, "loss": 0.7483, "step": 7976 }, { "epoch": 1.4345949833677965, "grad_norm": 1.5282152891159058, "learning_rate": 8.367237416993705e-06, "loss": 0.7221, "step": 7977 }, { "epoch": 1.4347747909736581, "grad_norm": 1.2427676916122437, "learning_rate": 8.366806846233655e-06, "loss": 0.9215, "step": 7978 }, { "epoch": 1.43495459857952, "grad_norm": 1.2265268564224243, "learning_rate": 8.366376229790451e-06, "loss": 0.9457, "step": 7979 }, { "epoch": 1.4351344061853817, "grad_norm": 1.0921320915222168, "learning_rate": 8.365945567669938e-06, "loss": 0.9739, "step": 7980 }, { "epoch": 1.4353142137912434, "grad_norm": 1.5709247589111328, "learning_rate": 8.365514859877961e-06, "loss": 0.7574, "step": 7981 }, { "epoch": 1.435494021397105, "grad_norm": 1.582585334777832, "learning_rate": 8.365084106420364e-06, "loss": 0.7376, "step": 7982 }, { "epoch": 1.4356738290029667, "grad_norm": 1.5900014638900757, "learning_rate": 8.364653307302992e-06, "loss": 0.7213, "step": 7983 }, { "epoch": 1.4358536366088286, "grad_norm": 1.4545886516571045, "learning_rate": 8.364222462531688e-06, "loss": 0.6613, "step": 7984 }, { "epoch": 1.4360334442146903, "grad_norm": 1.4139580726623535, "learning_rate": 8.3637915721123e-06, "loss": 0.7399, "step": 7985 }, { "epoch": 1.436213251820552, "grad_norm": 1.7395527362823486, "learning_rate": 8.363360636050675e-06, "loss": 0.7332, "step": 7986 }, { "epoch": 1.4363930594264138, "grad_norm": 1.436802864074707, "learning_rate": 8.362929654352659e-06, "loss": 0.7194, "step": 7987 }, { "epoch": 1.4365728670322755, "grad_norm": 1.4388142824172974, "learning_rate": 8.362498627024099e-06, "loss": 0.764, "step": 7988 }, { "epoch": 1.4367526746381372, "grad_norm": 1.4814163446426392, "learning_rate": 8.362067554070845e-06, "loss": 0.7598, "step": 7989 }, { "epoch": 1.436932482243999, "grad_norm": 1.5897356271743774, "learning_rate": 8.361636435498747e-06, "loss": 0.7761, "step": 7990 }, { "epoch": 1.4371122898498607, "grad_norm": 1.418185830116272, "learning_rate": 8.361205271313651e-06, "loss": 0.7377, "step": 7991 }, { "epoch": 1.4372920974557224, "grad_norm": 1.4955439567565918, "learning_rate": 8.360774061521413e-06, "loss": 0.739, "step": 7992 }, { "epoch": 1.437471905061584, "grad_norm": 1.897443175315857, "learning_rate": 8.36034280612788e-06, "loss": 0.7741, "step": 7993 }, { "epoch": 1.4376517126674457, "grad_norm": 1.093345046043396, "learning_rate": 8.359911505138902e-06, "loss": 0.9383, "step": 7994 }, { "epoch": 1.4378315202733076, "grad_norm": 1.5454682111740112, "learning_rate": 8.359480158560336e-06, "loss": 0.7012, "step": 7995 }, { "epoch": 1.4380113278791693, "grad_norm": 1.5871055126190186, "learning_rate": 8.359048766398032e-06, "loss": 0.7102, "step": 7996 }, { "epoch": 1.438191135485031, "grad_norm": 1.5842325687408447, "learning_rate": 8.358617328657841e-06, "loss": 0.7974, "step": 7997 }, { "epoch": 1.4383709430908929, "grad_norm": 1.5628169775009155, "learning_rate": 8.358185845345623e-06, "loss": 0.7716, "step": 7998 }, { "epoch": 1.4385507506967545, "grad_norm": 1.6484153270721436, "learning_rate": 8.357754316467227e-06, "loss": 0.7074, "step": 7999 }, { "epoch": 1.4387305583026162, "grad_norm": 1.4458562135696411, "learning_rate": 8.357322742028515e-06, "loss": 0.7177, "step": 8000 }, { "epoch": 1.4387305583026162, "eval_loss": 0.800898551940918, "eval_runtime": 150.9955, "eval_samples_per_second": 95.248, "eval_steps_per_second": 1.49, "step": 8000 }, { "epoch": 1.4389103659084779, "grad_norm": 1.482311487197876, "learning_rate": 8.356891122035335e-06, "loss": 0.7561, "step": 8001 }, { "epoch": 1.4390901735143395, "grad_norm": 1.4782174825668335, "learning_rate": 8.356459456493548e-06, "loss": 0.7524, "step": 8002 }, { "epoch": 1.4392699811202014, "grad_norm": 1.5664129257202148, "learning_rate": 8.35602774540901e-06, "loss": 0.7408, "step": 8003 }, { "epoch": 1.439449788726063, "grad_norm": 1.3882323503494263, "learning_rate": 8.355595988787582e-06, "loss": 0.7266, "step": 8004 }, { "epoch": 1.4396295963319248, "grad_norm": 1.1192140579223633, "learning_rate": 8.355164186635115e-06, "loss": 0.933, "step": 8005 }, { "epoch": 1.4398094039377867, "grad_norm": 1.5014644861221313, "learning_rate": 8.354732338957473e-06, "loss": 0.7221, "step": 8006 }, { "epoch": 1.4399892115436483, "grad_norm": 1.4704148769378662, "learning_rate": 8.354300445760517e-06, "loss": 0.6841, "step": 8007 }, { "epoch": 1.44016901914951, "grad_norm": 1.4272536039352417, "learning_rate": 8.353868507050106e-06, "loss": 0.7651, "step": 8008 }, { "epoch": 1.4403488267553717, "grad_norm": 1.5131862163543701, "learning_rate": 8.353436522832099e-06, "loss": 0.7394, "step": 8009 }, { "epoch": 1.4405286343612334, "grad_norm": 1.640164852142334, "learning_rate": 8.353004493112358e-06, "loss": 0.702, "step": 8010 }, { "epoch": 1.4407084419670952, "grad_norm": 1.4683536291122437, "learning_rate": 8.352572417896744e-06, "loss": 0.756, "step": 8011 }, { "epoch": 1.440888249572957, "grad_norm": 1.4787917137145996, "learning_rate": 8.352140297191125e-06, "loss": 0.7764, "step": 8012 }, { "epoch": 1.4410680571788186, "grad_norm": 1.104669213294983, "learning_rate": 8.351708131001359e-06, "loss": 0.9601, "step": 8013 }, { "epoch": 1.4412478647846805, "grad_norm": 1.650268793106079, "learning_rate": 8.35127591933331e-06, "loss": 0.8052, "step": 8014 }, { "epoch": 1.4414276723905421, "grad_norm": 1.4809467792510986, "learning_rate": 8.350843662192847e-06, "loss": 0.7345, "step": 8015 }, { "epoch": 1.4416074799964038, "grad_norm": 1.5473606586456299, "learning_rate": 8.35041135958583e-06, "loss": 0.7258, "step": 8016 }, { "epoch": 1.4417872876022657, "grad_norm": 1.7236649990081787, "learning_rate": 8.349979011518127e-06, "loss": 0.7669, "step": 8017 }, { "epoch": 1.4419670952081274, "grad_norm": 1.4163033962249756, "learning_rate": 8.349546617995607e-06, "loss": 0.7191, "step": 8018 }, { "epoch": 1.442146902813989, "grad_norm": 1.5287753343582153, "learning_rate": 8.349114179024133e-06, "loss": 0.743, "step": 8019 }, { "epoch": 1.4423267104198507, "grad_norm": 1.5415219068527222, "learning_rate": 8.348681694609573e-06, "loss": 0.7508, "step": 8020 }, { "epoch": 1.4425065180257124, "grad_norm": 1.5921461582183838, "learning_rate": 8.348249164757798e-06, "loss": 0.7629, "step": 8021 }, { "epoch": 1.4426863256315743, "grad_norm": 1.9990702867507935, "learning_rate": 8.347816589474674e-06, "loss": 0.7986, "step": 8022 }, { "epoch": 1.442866133237436, "grad_norm": 1.4789687395095825, "learning_rate": 8.347383968766072e-06, "loss": 0.7753, "step": 8023 }, { "epoch": 1.4430459408432976, "grad_norm": 1.574076533317566, "learning_rate": 8.346951302637863e-06, "loss": 0.7245, "step": 8024 }, { "epoch": 1.4432257484491595, "grad_norm": 1.5408689975738525, "learning_rate": 8.346518591095913e-06, "loss": 0.7552, "step": 8025 }, { "epoch": 1.4434055560550212, "grad_norm": 1.5275191068649292, "learning_rate": 8.3460858341461e-06, "loss": 0.7916, "step": 8026 }, { "epoch": 1.4435853636608829, "grad_norm": 1.4125739336013794, "learning_rate": 8.345653031794292e-06, "loss": 0.6962, "step": 8027 }, { "epoch": 1.4437651712667445, "grad_norm": 1.481623649597168, "learning_rate": 8.345220184046362e-06, "loss": 0.7085, "step": 8028 }, { "epoch": 1.4439449788726062, "grad_norm": 1.4425452947616577, "learning_rate": 8.344787290908183e-06, "loss": 0.7267, "step": 8029 }, { "epoch": 1.444124786478468, "grad_norm": 1.4493695497512817, "learning_rate": 8.34435435238563e-06, "loss": 0.777, "step": 8030 }, { "epoch": 1.4443045940843298, "grad_norm": 1.531576156616211, "learning_rate": 8.343921368484578e-06, "loss": 0.7605, "step": 8031 }, { "epoch": 1.4444844016901914, "grad_norm": 1.630735158920288, "learning_rate": 8.3434883392109e-06, "loss": 0.7131, "step": 8032 }, { "epoch": 1.4446642092960533, "grad_norm": 1.5244053602218628, "learning_rate": 8.34305526457047e-06, "loss": 0.7464, "step": 8033 }, { "epoch": 1.444844016901915, "grad_norm": 1.6789504289627075, "learning_rate": 8.342622144569168e-06, "loss": 0.7145, "step": 8034 }, { "epoch": 1.4450238245077767, "grad_norm": 1.4508789777755737, "learning_rate": 8.34218897921287e-06, "loss": 0.6853, "step": 8035 }, { "epoch": 1.4452036321136383, "grad_norm": 1.5674692392349243, "learning_rate": 8.341755768507452e-06, "loss": 0.7303, "step": 8036 }, { "epoch": 1.4453834397195, "grad_norm": 1.551392674446106, "learning_rate": 8.341322512458795e-06, "loss": 0.7661, "step": 8037 }, { "epoch": 1.445563247325362, "grad_norm": 1.5592193603515625, "learning_rate": 8.340889211072774e-06, "loss": 0.8149, "step": 8038 }, { "epoch": 1.4457430549312236, "grad_norm": 2.067781448364258, "learning_rate": 8.340455864355272e-06, "loss": 0.6543, "step": 8039 }, { "epoch": 1.4459228625370852, "grad_norm": 1.3672051429748535, "learning_rate": 8.340022472312165e-06, "loss": 0.651, "step": 8040 }, { "epoch": 1.4461026701429471, "grad_norm": 1.531790018081665, "learning_rate": 8.339589034949335e-06, "loss": 0.7628, "step": 8041 }, { "epoch": 1.4462824777488088, "grad_norm": 1.44938325881958, "learning_rate": 8.339155552272666e-06, "loss": 0.72, "step": 8042 }, { "epoch": 1.4464622853546705, "grad_norm": 1.5778374671936035, "learning_rate": 8.338722024288037e-06, "loss": 0.7326, "step": 8043 }, { "epoch": 1.4466420929605324, "grad_norm": 1.4468377828598022, "learning_rate": 8.33828845100133e-06, "loss": 0.7315, "step": 8044 }, { "epoch": 1.446821900566394, "grad_norm": 1.3709594011306763, "learning_rate": 8.33785483241843e-06, "loss": 0.7157, "step": 8045 }, { "epoch": 1.4470017081722557, "grad_norm": 1.479077935218811, "learning_rate": 8.33742116854522e-06, "loss": 0.7109, "step": 8046 }, { "epoch": 1.4471815157781174, "grad_norm": 1.1624650955200195, "learning_rate": 8.336987459387583e-06, "loss": 0.9409, "step": 8047 }, { "epoch": 1.447361323383979, "grad_norm": 1.5322141647338867, "learning_rate": 8.336553704951404e-06, "loss": 0.7384, "step": 8048 }, { "epoch": 1.447541130989841, "grad_norm": 1.5822985172271729, "learning_rate": 8.336119905242573e-06, "loss": 0.7892, "step": 8049 }, { "epoch": 1.4477209385957026, "grad_norm": 1.382704496383667, "learning_rate": 8.335686060266967e-06, "loss": 0.7196, "step": 8050 }, { "epoch": 1.4479007462015643, "grad_norm": 1.3619143962860107, "learning_rate": 8.335252170030482e-06, "loss": 0.6967, "step": 8051 }, { "epoch": 1.4480805538074262, "grad_norm": 1.4854744672775269, "learning_rate": 8.334818234539e-06, "loss": 0.702, "step": 8052 }, { "epoch": 1.4482603614132878, "grad_norm": 1.471045970916748, "learning_rate": 8.33438425379841e-06, "loss": 0.7874, "step": 8053 }, { "epoch": 1.4484401690191495, "grad_norm": 1.4450793266296387, "learning_rate": 8.3339502278146e-06, "loss": 0.7735, "step": 8054 }, { "epoch": 1.4486199766250112, "grad_norm": 1.6369812488555908, "learning_rate": 8.333516156593462e-06, "loss": 0.7635, "step": 8055 }, { "epoch": 1.4487997842308729, "grad_norm": 1.5470051765441895, "learning_rate": 8.333082040140884e-06, "loss": 0.7628, "step": 8056 }, { "epoch": 1.4489795918367347, "grad_norm": 1.7051485776901245, "learning_rate": 8.332647878462754e-06, "loss": 0.6955, "step": 8057 }, { "epoch": 1.4491593994425964, "grad_norm": 1.4771056175231934, "learning_rate": 8.332213671564966e-06, "loss": 0.695, "step": 8058 }, { "epoch": 1.449339207048458, "grad_norm": 1.1666303873062134, "learning_rate": 8.331779419453412e-06, "loss": 0.9451, "step": 8059 }, { "epoch": 1.44951901465432, "grad_norm": 1.5408893823623657, "learning_rate": 8.331345122133981e-06, "loss": 0.7442, "step": 8060 }, { "epoch": 1.4496988222601817, "grad_norm": 1.5204482078552246, "learning_rate": 8.33091077961257e-06, "loss": 0.7554, "step": 8061 }, { "epoch": 1.4498786298660433, "grad_norm": 1.2484245300292969, "learning_rate": 8.330476391895069e-06, "loss": 0.9561, "step": 8062 }, { "epoch": 1.450058437471905, "grad_norm": 1.40366792678833, "learning_rate": 8.330041958987374e-06, "loss": 0.8131, "step": 8063 }, { "epoch": 1.4502382450777667, "grad_norm": 1.4891785383224487, "learning_rate": 8.329607480895378e-06, "loss": 0.6875, "step": 8064 }, { "epoch": 1.4504180526836286, "grad_norm": 1.4691429138183594, "learning_rate": 8.329172957624977e-06, "loss": 0.7003, "step": 8065 }, { "epoch": 1.4505978602894902, "grad_norm": 1.5127382278442383, "learning_rate": 8.328738389182069e-06, "loss": 0.7795, "step": 8066 }, { "epoch": 1.450777667895352, "grad_norm": 1.4833534955978394, "learning_rate": 8.328303775572548e-06, "loss": 0.7381, "step": 8067 }, { "epoch": 1.4509574755012138, "grad_norm": 1.5033669471740723, "learning_rate": 8.327869116802314e-06, "loss": 0.8268, "step": 8068 }, { "epoch": 1.4511372831070755, "grad_norm": 1.4869883060455322, "learning_rate": 8.32743441287726e-06, "loss": 0.7805, "step": 8069 }, { "epoch": 1.4513170907129371, "grad_norm": 1.0938489437103271, "learning_rate": 8.326999663803287e-06, "loss": 0.9691, "step": 8070 }, { "epoch": 1.451496898318799, "grad_norm": 1.5466160774230957, "learning_rate": 8.326564869586296e-06, "loss": 0.7522, "step": 8071 }, { "epoch": 1.4516767059246607, "grad_norm": 1.4767909049987793, "learning_rate": 8.326130030232185e-06, "loss": 0.8292, "step": 8072 }, { "epoch": 1.4518565135305224, "grad_norm": 1.4870293140411377, "learning_rate": 8.325695145746852e-06, "loss": 0.7247, "step": 8073 }, { "epoch": 1.452036321136384, "grad_norm": 1.5422863960266113, "learning_rate": 8.3252602161362e-06, "loss": 0.7295, "step": 8074 }, { "epoch": 1.4522161287422457, "grad_norm": 1.460677146911621, "learning_rate": 8.324825241406128e-06, "loss": 0.7804, "step": 8075 }, { "epoch": 1.4523959363481076, "grad_norm": 1.457047939300537, "learning_rate": 8.324390221562544e-06, "loss": 0.6732, "step": 8076 }, { "epoch": 1.4525757439539693, "grad_norm": 1.6229561567306519, "learning_rate": 8.323955156611346e-06, "loss": 0.788, "step": 8077 }, { "epoch": 1.452755551559831, "grad_norm": 1.08207106590271, "learning_rate": 8.323520046558435e-06, "loss": 0.9688, "step": 8078 }, { "epoch": 1.4529353591656928, "grad_norm": 1.4885380268096924, "learning_rate": 8.323084891409721e-06, "loss": 0.7435, "step": 8079 }, { "epoch": 1.4531151667715545, "grad_norm": 1.120554804801941, "learning_rate": 8.322649691171104e-06, "loss": 0.9688, "step": 8080 }, { "epoch": 1.4532949743774162, "grad_norm": 1.502899408340454, "learning_rate": 8.322214445848492e-06, "loss": 0.7858, "step": 8081 }, { "epoch": 1.4534747819832778, "grad_norm": 1.6552315950393677, "learning_rate": 8.321779155447786e-06, "loss": 0.6655, "step": 8082 }, { "epoch": 1.4536545895891395, "grad_norm": 1.6996879577636719, "learning_rate": 8.321343819974899e-06, "loss": 0.7448, "step": 8083 }, { "epoch": 1.4538343971950014, "grad_norm": 1.164563775062561, "learning_rate": 8.320908439435732e-06, "loss": 0.9227, "step": 8084 }, { "epoch": 1.454014204800863, "grad_norm": 1.095819115638733, "learning_rate": 8.320473013836197e-06, "loss": 0.9336, "step": 8085 }, { "epoch": 1.4541940124067247, "grad_norm": 1.4229882955551147, "learning_rate": 8.320037543182198e-06, "loss": 0.7415, "step": 8086 }, { "epoch": 1.4543738200125866, "grad_norm": 1.1031447649002075, "learning_rate": 8.319602027479647e-06, "loss": 0.9486, "step": 8087 }, { "epoch": 1.4545536276184483, "grad_norm": 1.4584938287734985, "learning_rate": 8.319166466734451e-06, "loss": 0.7529, "step": 8088 }, { "epoch": 1.45473343522431, "grad_norm": 1.5892970561981201, "learning_rate": 8.318730860952523e-06, "loss": 0.7093, "step": 8089 }, { "epoch": 1.4549132428301716, "grad_norm": 1.4662898778915405, "learning_rate": 8.318295210139771e-06, "loss": 0.7264, "step": 8090 }, { "epoch": 1.4550930504360333, "grad_norm": 1.4810289144515991, "learning_rate": 8.317859514302107e-06, "loss": 0.7953, "step": 8091 }, { "epoch": 1.4552728580418952, "grad_norm": 1.392005443572998, "learning_rate": 8.317423773445443e-06, "loss": 0.7256, "step": 8092 }, { "epoch": 1.4554526656477569, "grad_norm": 1.3788552284240723, "learning_rate": 8.316987987575693e-06, "loss": 0.9599, "step": 8093 }, { "epoch": 1.4556324732536186, "grad_norm": 1.4490125179290771, "learning_rate": 8.316552156698766e-06, "loss": 0.6247, "step": 8094 }, { "epoch": 1.4558122808594804, "grad_norm": 1.5054724216461182, "learning_rate": 8.316116280820579e-06, "loss": 0.7484, "step": 8095 }, { "epoch": 1.4559920884653421, "grad_norm": 1.4225355386734009, "learning_rate": 8.315680359947045e-06, "loss": 0.7005, "step": 8096 }, { "epoch": 1.4561718960712038, "grad_norm": 1.4641404151916504, "learning_rate": 8.31524439408408e-06, "loss": 0.739, "step": 8097 }, { "epoch": 1.4563517036770655, "grad_norm": 1.161429762840271, "learning_rate": 8.314808383237596e-06, "loss": 0.9547, "step": 8098 }, { "epoch": 1.4565315112829273, "grad_norm": 1.1925418376922607, "learning_rate": 8.314372327413514e-06, "loss": 0.9397, "step": 8099 }, { "epoch": 1.456711318888789, "grad_norm": 1.587693452835083, "learning_rate": 8.313936226617746e-06, "loss": 0.7645, "step": 8100 }, { "epoch": 1.4568911264946507, "grad_norm": 1.5516911745071411, "learning_rate": 8.313500080856216e-06, "loss": 0.6916, "step": 8101 }, { "epoch": 1.4570709341005124, "grad_norm": 1.6671029329299927, "learning_rate": 8.313063890134834e-06, "loss": 0.7695, "step": 8102 }, { "epoch": 1.4572507417063743, "grad_norm": 1.4201935529708862, "learning_rate": 8.312627654459523e-06, "loss": 0.7607, "step": 8103 }, { "epoch": 1.457430549312236, "grad_norm": 1.555070400238037, "learning_rate": 8.312191373836203e-06, "loss": 0.8051, "step": 8104 }, { "epoch": 1.4576103569180976, "grad_norm": 1.4412299394607544, "learning_rate": 8.31175504827079e-06, "loss": 0.7712, "step": 8105 }, { "epoch": 1.4577901645239595, "grad_norm": 1.4545881748199463, "learning_rate": 8.311318677769209e-06, "loss": 0.7282, "step": 8106 }, { "epoch": 1.4579699721298212, "grad_norm": 1.4796340465545654, "learning_rate": 8.310882262337377e-06, "loss": 0.7112, "step": 8107 }, { "epoch": 1.4581497797356828, "grad_norm": 1.4269475936889648, "learning_rate": 8.310445801981215e-06, "loss": 0.7205, "step": 8108 }, { "epoch": 1.4583295873415445, "grad_norm": 1.431183934211731, "learning_rate": 8.31000929670665e-06, "loss": 0.7764, "step": 8109 }, { "epoch": 1.4585093949474062, "grad_norm": 1.4136613607406616, "learning_rate": 8.3095727465196e-06, "loss": 0.7197, "step": 8110 }, { "epoch": 1.458689202553268, "grad_norm": 1.926503300666809, "learning_rate": 8.309136151425994e-06, "loss": 0.8003, "step": 8111 }, { "epoch": 1.4588690101591297, "grad_norm": 1.1758300065994263, "learning_rate": 8.308699511431747e-06, "loss": 0.8956, "step": 8112 }, { "epoch": 1.4590488177649914, "grad_norm": 1.5155246257781982, "learning_rate": 8.308262826542794e-06, "loss": 0.7648, "step": 8113 }, { "epoch": 1.4592286253708533, "grad_norm": 1.4940259456634521, "learning_rate": 8.307826096765054e-06, "loss": 0.7541, "step": 8114 }, { "epoch": 1.459408432976715, "grad_norm": 1.0526134967803955, "learning_rate": 8.307389322104454e-06, "loss": 0.9409, "step": 8115 }, { "epoch": 1.4595882405825766, "grad_norm": 2.3385982513427734, "learning_rate": 8.30695250256692e-06, "loss": 0.7625, "step": 8116 }, { "epoch": 1.4597680481884383, "grad_norm": 1.3893554210662842, "learning_rate": 8.30651563815838e-06, "loss": 0.7446, "step": 8117 }, { "epoch": 1.4599478557943, "grad_norm": 1.4535915851593018, "learning_rate": 8.306078728884761e-06, "loss": 0.7253, "step": 8118 }, { "epoch": 1.4601276634001619, "grad_norm": 1.379348635673523, "learning_rate": 8.305641774751993e-06, "loss": 0.7542, "step": 8119 }, { "epoch": 1.4603074710060235, "grad_norm": 1.5812796354293823, "learning_rate": 8.305204775766003e-06, "loss": 0.7642, "step": 8120 }, { "epoch": 1.4604872786118852, "grad_norm": 1.5198702812194824, "learning_rate": 8.30476773193272e-06, "loss": 0.7565, "step": 8121 }, { "epoch": 1.460667086217747, "grad_norm": 1.5575850009918213, "learning_rate": 8.304330643258075e-06, "loss": 0.7927, "step": 8122 }, { "epoch": 1.4608468938236088, "grad_norm": 1.5245764255523682, "learning_rate": 8.303893509748002e-06, "loss": 0.6553, "step": 8123 }, { "epoch": 1.4610267014294704, "grad_norm": 1.1829121112823486, "learning_rate": 8.303456331408426e-06, "loss": 0.9269, "step": 8124 }, { "epoch": 1.4612065090353321, "grad_norm": 1.5043433904647827, "learning_rate": 8.303019108245283e-06, "loss": 0.7075, "step": 8125 }, { "epoch": 1.461386316641194, "grad_norm": 1.5324461460113525, "learning_rate": 8.302581840264506e-06, "loss": 0.7613, "step": 8126 }, { "epoch": 1.4615661242470557, "grad_norm": 1.5810343027114868, "learning_rate": 8.302144527472024e-06, "loss": 0.7045, "step": 8127 }, { "epoch": 1.4617459318529173, "grad_norm": 1.1492347717285156, "learning_rate": 8.301707169873777e-06, "loss": 0.9641, "step": 8128 }, { "epoch": 1.461925739458779, "grad_norm": 1.4561026096343994, "learning_rate": 8.301269767475694e-06, "loss": 0.7195, "step": 8129 }, { "epoch": 1.462105547064641, "grad_norm": 1.4970802068710327, "learning_rate": 8.300832320283711e-06, "loss": 0.7447, "step": 8130 }, { "epoch": 1.4622853546705026, "grad_norm": 1.5646874904632568, "learning_rate": 8.300394828303768e-06, "loss": 0.7723, "step": 8131 }, { "epoch": 1.4624651622763643, "grad_norm": 1.5461983680725098, "learning_rate": 8.299957291541794e-06, "loss": 0.7066, "step": 8132 }, { "epoch": 1.4626449698822261, "grad_norm": 1.413730263710022, "learning_rate": 8.299519710003732e-06, "loss": 0.7125, "step": 8133 }, { "epoch": 1.4628247774880878, "grad_norm": 1.5190731287002563, "learning_rate": 8.299082083695516e-06, "loss": 0.7095, "step": 8134 }, { "epoch": 1.4630045850939495, "grad_norm": 1.5418230295181274, "learning_rate": 8.298644412623085e-06, "loss": 0.7889, "step": 8135 }, { "epoch": 1.4631843926998112, "grad_norm": 1.6860933303833008, "learning_rate": 8.298206696792378e-06, "loss": 0.768, "step": 8136 }, { "epoch": 1.4633642003056728, "grad_norm": 1.4841344356536865, "learning_rate": 8.297768936209334e-06, "loss": 0.7643, "step": 8137 }, { "epoch": 1.4635440079115347, "grad_norm": 1.5338574647903442, "learning_rate": 8.297331130879891e-06, "loss": 0.7569, "step": 8138 }, { "epoch": 1.4637238155173964, "grad_norm": 1.4310990571975708, "learning_rate": 8.296893280809993e-06, "loss": 0.6948, "step": 8139 }, { "epoch": 1.463903623123258, "grad_norm": 1.3585014343261719, "learning_rate": 8.29645538600558e-06, "loss": 0.6874, "step": 8140 }, { "epoch": 1.46408343072912, "grad_norm": 1.2234537601470947, "learning_rate": 8.29601744647259e-06, "loss": 0.9396, "step": 8141 }, { "epoch": 1.4642632383349816, "grad_norm": 1.482885718345642, "learning_rate": 8.29557946221697e-06, "loss": 0.7603, "step": 8142 }, { "epoch": 1.4644430459408433, "grad_norm": 1.509323000907898, "learning_rate": 8.29514143324466e-06, "loss": 0.7138, "step": 8143 }, { "epoch": 1.464622853546705, "grad_norm": 1.0958800315856934, "learning_rate": 8.294703359561605e-06, "loss": 0.9238, "step": 8144 }, { "epoch": 1.4648026611525666, "grad_norm": 1.5069829225540161, "learning_rate": 8.294265241173748e-06, "loss": 0.6519, "step": 8145 }, { "epoch": 1.4649824687584285, "grad_norm": 1.4771171808242798, "learning_rate": 8.293827078087036e-06, "loss": 0.7279, "step": 8146 }, { "epoch": 1.4651622763642902, "grad_norm": 1.4393937587738037, "learning_rate": 8.29338887030741e-06, "loss": 0.7731, "step": 8147 }, { "epoch": 1.4653420839701519, "grad_norm": 1.4948762655258179, "learning_rate": 8.29295061784082e-06, "loss": 0.7382, "step": 8148 }, { "epoch": 1.4655218915760138, "grad_norm": 1.6396058797836304, "learning_rate": 8.29251232069321e-06, "loss": 0.767, "step": 8149 }, { "epoch": 1.4657016991818754, "grad_norm": 1.498496174812317, "learning_rate": 8.292073978870528e-06, "loss": 0.7513, "step": 8150 }, { "epoch": 1.465881506787737, "grad_norm": 1.0744438171386719, "learning_rate": 8.291635592378722e-06, "loss": 0.919, "step": 8151 }, { "epoch": 1.4660613143935988, "grad_norm": 1.6437928676605225, "learning_rate": 8.291197161223741e-06, "loss": 0.7656, "step": 8152 }, { "epoch": 1.4662411219994604, "grad_norm": 1.4538756608963013, "learning_rate": 8.290758685411531e-06, "loss": 0.6972, "step": 8153 }, { "epoch": 1.4664209296053223, "grad_norm": 1.562134861946106, "learning_rate": 8.290320164948046e-06, "loss": 0.7276, "step": 8154 }, { "epoch": 1.466600737211184, "grad_norm": 1.0552330017089844, "learning_rate": 8.28988159983923e-06, "loss": 0.9224, "step": 8155 }, { "epoch": 1.4667805448170457, "grad_norm": 1.455652117729187, "learning_rate": 8.289442990091041e-06, "loss": 0.6836, "step": 8156 }, { "epoch": 1.4669603524229076, "grad_norm": 1.4064151048660278, "learning_rate": 8.289004335709426e-06, "loss": 0.6964, "step": 8157 }, { "epoch": 1.4671401600287692, "grad_norm": 1.3824774026870728, "learning_rate": 8.288565636700338e-06, "loss": 0.6527, "step": 8158 }, { "epoch": 1.467319967634631, "grad_norm": 1.4409505128860474, "learning_rate": 8.288126893069729e-06, "loss": 0.7374, "step": 8159 }, { "epoch": 1.4674997752404928, "grad_norm": 1.5158456563949585, "learning_rate": 8.287688104823552e-06, "loss": 0.7603, "step": 8160 }, { "epoch": 1.4676795828463545, "grad_norm": 1.6749448776245117, "learning_rate": 8.287249271967763e-06, "loss": 0.7587, "step": 8161 }, { "epoch": 1.4678593904522161, "grad_norm": 1.7559701204299927, "learning_rate": 8.286810394508313e-06, "loss": 0.7601, "step": 8162 }, { "epoch": 1.4680391980580778, "grad_norm": 1.5411131381988525, "learning_rate": 8.28637147245116e-06, "loss": 0.7773, "step": 8163 }, { "epoch": 1.4682190056639395, "grad_norm": 1.484503149986267, "learning_rate": 8.285932505802257e-06, "loss": 0.7407, "step": 8164 }, { "epoch": 1.4683988132698014, "grad_norm": 1.9855087995529175, "learning_rate": 8.285493494567562e-06, "loss": 0.6767, "step": 8165 }, { "epoch": 1.468578620875663, "grad_norm": 1.5224567651748657, "learning_rate": 8.285054438753032e-06, "loss": 0.7185, "step": 8166 }, { "epoch": 1.4687584284815247, "grad_norm": 1.5004944801330566, "learning_rate": 8.28461533836462e-06, "loss": 0.7375, "step": 8167 }, { "epoch": 1.4689382360873866, "grad_norm": 1.7396881580352783, "learning_rate": 8.284176193408293e-06, "loss": 0.7093, "step": 8168 }, { "epoch": 1.4691180436932483, "grad_norm": 1.8604092597961426, "learning_rate": 8.283737003890002e-06, "loss": 0.7473, "step": 8169 }, { "epoch": 1.46929785129911, "grad_norm": 1.557818055152893, "learning_rate": 8.283297769815709e-06, "loss": 0.7334, "step": 8170 }, { "epoch": 1.4694776589049716, "grad_norm": 1.5424765348434448, "learning_rate": 8.282858491191372e-06, "loss": 0.7421, "step": 8171 }, { "epoch": 1.4696574665108333, "grad_norm": 1.552935242652893, "learning_rate": 8.282419168022953e-06, "loss": 0.7719, "step": 8172 }, { "epoch": 1.4698372741166952, "grad_norm": 1.6720393896102905, "learning_rate": 8.281979800316414e-06, "loss": 0.8667, "step": 8173 }, { "epoch": 1.4700170817225569, "grad_norm": 1.4256582260131836, "learning_rate": 8.281540388077716e-06, "loss": 0.7659, "step": 8174 }, { "epoch": 1.4701968893284185, "grad_norm": 1.315956950187683, "learning_rate": 8.28110093131282e-06, "loss": 0.9193, "step": 8175 }, { "epoch": 1.4703766969342804, "grad_norm": 1.475346326828003, "learning_rate": 8.28066143002769e-06, "loss": 0.7306, "step": 8176 }, { "epoch": 1.470556504540142, "grad_norm": 1.1820306777954102, "learning_rate": 8.280221884228288e-06, "loss": 0.9033, "step": 8177 }, { "epoch": 1.4707363121460038, "grad_norm": 1.4968228340148926, "learning_rate": 8.279782293920579e-06, "loss": 0.7561, "step": 8178 }, { "epoch": 1.4709161197518654, "grad_norm": 1.430180311203003, "learning_rate": 8.27934265911053e-06, "loss": 0.7666, "step": 8179 }, { "epoch": 1.471095927357727, "grad_norm": 1.5261120796203613, "learning_rate": 8.278902979804101e-06, "loss": 0.7173, "step": 8180 }, { "epoch": 1.471275734963589, "grad_norm": 1.4466477632522583, "learning_rate": 8.278463256007263e-06, "loss": 0.7943, "step": 8181 }, { "epoch": 1.4714555425694507, "grad_norm": 1.544062614440918, "learning_rate": 8.278023487725981e-06, "loss": 0.7337, "step": 8182 }, { "epoch": 1.4716353501753123, "grad_norm": 1.0750032663345337, "learning_rate": 8.277583674966219e-06, "loss": 0.9469, "step": 8183 }, { "epoch": 1.4718151577811742, "grad_norm": 1.5377429723739624, "learning_rate": 8.27714381773395e-06, "loss": 0.7257, "step": 8184 }, { "epoch": 1.471994965387036, "grad_norm": 1.4795620441436768, "learning_rate": 8.276703916035138e-06, "loss": 0.7178, "step": 8185 }, { "epoch": 1.4721747729928976, "grad_norm": 1.4625871181488037, "learning_rate": 8.276263969875753e-06, "loss": 0.7718, "step": 8186 }, { "epoch": 1.4723545805987595, "grad_norm": 1.5974531173706055, "learning_rate": 8.275823979261766e-06, "loss": 0.7353, "step": 8187 }, { "epoch": 1.4725343882046211, "grad_norm": 1.4673079252243042, "learning_rate": 8.275383944199145e-06, "loss": 0.7467, "step": 8188 }, { "epoch": 1.4727141958104828, "grad_norm": 1.4167077541351318, "learning_rate": 8.27494386469386e-06, "loss": 0.722, "step": 8189 }, { "epoch": 1.4728940034163445, "grad_norm": 1.5889241695404053, "learning_rate": 8.274503740751886e-06, "loss": 0.656, "step": 8190 }, { "epoch": 1.4730738110222061, "grad_norm": 1.4283021688461304, "learning_rate": 8.274063572379193e-06, "loss": 0.7365, "step": 8191 }, { "epoch": 1.473253618628068, "grad_norm": 1.4888184070587158, "learning_rate": 8.273623359581754e-06, "loss": 0.7339, "step": 8192 }, { "epoch": 1.4734334262339297, "grad_norm": 1.5500799417495728, "learning_rate": 8.27318310236554e-06, "loss": 0.7802, "step": 8193 }, { "epoch": 1.4736132338397914, "grad_norm": 1.5479284524917603, "learning_rate": 8.272742800736526e-06, "loss": 0.7201, "step": 8194 }, { "epoch": 1.4737930414456533, "grad_norm": 1.529345154762268, "learning_rate": 8.272302454700687e-06, "loss": 0.7271, "step": 8195 }, { "epoch": 1.473972849051515, "grad_norm": 1.090976357460022, "learning_rate": 8.271862064263997e-06, "loss": 0.9671, "step": 8196 }, { "epoch": 1.4741526566573766, "grad_norm": 1.6541649103164673, "learning_rate": 8.271421629432434e-06, "loss": 0.824, "step": 8197 }, { "epoch": 1.4743324642632383, "grad_norm": 1.7042369842529297, "learning_rate": 8.27098115021197e-06, "loss": 0.6741, "step": 8198 }, { "epoch": 1.4745122718691, "grad_norm": 1.5413740873336792, "learning_rate": 8.270540626608583e-06, "loss": 0.7283, "step": 8199 }, { "epoch": 1.4746920794749618, "grad_norm": 1.378352165222168, "learning_rate": 8.270100058628253e-06, "loss": 0.6741, "step": 8200 }, { "epoch": 1.4748718870808235, "grad_norm": 1.5565582513809204, "learning_rate": 8.269659446276955e-06, "loss": 0.7312, "step": 8201 }, { "epoch": 1.4750516946866852, "grad_norm": 1.5929290056228638, "learning_rate": 8.269218789560669e-06, "loss": 0.7896, "step": 8202 }, { "epoch": 1.475231502292547, "grad_norm": 1.1540848016738892, "learning_rate": 8.268778088485374e-06, "loss": 0.9429, "step": 8203 }, { "epoch": 1.4754113098984087, "grad_norm": 1.4563959836959839, "learning_rate": 8.268337343057049e-06, "loss": 0.7242, "step": 8204 }, { "epoch": 1.4755911175042704, "grad_norm": 1.0401403903961182, "learning_rate": 8.267896553281674e-06, "loss": 0.937, "step": 8205 }, { "epoch": 1.475770925110132, "grad_norm": 1.547757863998413, "learning_rate": 8.267455719165232e-06, "loss": 0.7159, "step": 8206 }, { "epoch": 1.4759507327159938, "grad_norm": 1.548104166984558, "learning_rate": 8.267014840713703e-06, "loss": 0.6994, "step": 8207 }, { "epoch": 1.4761305403218556, "grad_norm": 1.1829084157943726, "learning_rate": 8.266573917933069e-06, "loss": 0.9351, "step": 8208 }, { "epoch": 1.4763103479277173, "grad_norm": 1.3448768854141235, "learning_rate": 8.266132950829313e-06, "loss": 0.6972, "step": 8209 }, { "epoch": 1.476490155533579, "grad_norm": 1.610416293144226, "learning_rate": 8.265691939408417e-06, "loss": 0.7764, "step": 8210 }, { "epoch": 1.4766699631394409, "grad_norm": 1.4658699035644531, "learning_rate": 8.265250883676369e-06, "loss": 0.7752, "step": 8211 }, { "epoch": 1.4768497707453025, "grad_norm": 1.4808319807052612, "learning_rate": 8.26480978363915e-06, "loss": 0.7115, "step": 8212 }, { "epoch": 1.4770295783511642, "grad_norm": 1.5798710584640503, "learning_rate": 8.264368639302746e-06, "loss": 0.7038, "step": 8213 }, { "epoch": 1.477209385957026, "grad_norm": 1.4309720993041992, "learning_rate": 8.263927450673144e-06, "loss": 0.7312, "step": 8214 }, { "epoch": 1.4773891935628878, "grad_norm": 1.549475073814392, "learning_rate": 8.263486217756328e-06, "loss": 0.772, "step": 8215 }, { "epoch": 1.4775690011687495, "grad_norm": 1.1078706979751587, "learning_rate": 8.263044940558286e-06, "loss": 0.9372, "step": 8216 }, { "epoch": 1.4777488087746111, "grad_norm": 1.4676491022109985, "learning_rate": 8.262603619085005e-06, "loss": 0.683, "step": 8217 }, { "epoch": 1.4779286163804728, "grad_norm": 1.6146184206008911, "learning_rate": 8.262162253342475e-06, "loss": 0.7887, "step": 8218 }, { "epoch": 1.4781084239863347, "grad_norm": 1.5084514617919922, "learning_rate": 8.261720843336684e-06, "loss": 0.7043, "step": 8219 }, { "epoch": 1.4782882315921964, "grad_norm": 1.5623451471328735, "learning_rate": 8.26127938907362e-06, "loss": 0.7782, "step": 8220 }, { "epoch": 1.478468039198058, "grad_norm": 1.542629599571228, "learning_rate": 8.260837890559275e-06, "loss": 0.7082, "step": 8221 }, { "epoch": 1.47864784680392, "grad_norm": 1.512925386428833, "learning_rate": 8.260396347799638e-06, "loss": 0.7271, "step": 8222 }, { "epoch": 1.4788276544097816, "grad_norm": 1.56846284866333, "learning_rate": 8.2599547608007e-06, "loss": 0.7616, "step": 8223 }, { "epoch": 1.4790074620156433, "grad_norm": 1.4653291702270508, "learning_rate": 8.259513129568455e-06, "loss": 0.7494, "step": 8224 }, { "epoch": 1.479187269621505, "grad_norm": 1.5559698343276978, "learning_rate": 8.259071454108892e-06, "loss": 0.8146, "step": 8225 }, { "epoch": 1.4793670772273666, "grad_norm": 1.5597429275512695, "learning_rate": 8.258629734428008e-06, "loss": 0.7571, "step": 8226 }, { "epoch": 1.4795468848332285, "grad_norm": 1.4658197164535522, "learning_rate": 8.258187970531792e-06, "loss": 0.7345, "step": 8227 }, { "epoch": 1.4797266924390902, "grad_norm": 1.2190394401550293, "learning_rate": 8.257746162426241e-06, "loss": 0.8664, "step": 8228 }, { "epoch": 1.4799065000449518, "grad_norm": 1.4187390804290771, "learning_rate": 8.257304310117348e-06, "loss": 0.6699, "step": 8229 }, { "epoch": 1.4800863076508137, "grad_norm": 1.513116478919983, "learning_rate": 8.256862413611113e-06, "loss": 0.7601, "step": 8230 }, { "epoch": 1.4802661152566754, "grad_norm": 1.5155094861984253, "learning_rate": 8.256420472913525e-06, "loss": 0.7583, "step": 8231 }, { "epoch": 1.480445922862537, "grad_norm": 1.4174200296401978, "learning_rate": 8.255978488030586e-06, "loss": 0.7059, "step": 8232 }, { "epoch": 1.4806257304683987, "grad_norm": 1.49797523021698, "learning_rate": 8.25553645896829e-06, "loss": 0.7587, "step": 8233 }, { "epoch": 1.4808055380742604, "grad_norm": 1.0860832929611206, "learning_rate": 8.255094385732636e-06, "loss": 0.9767, "step": 8234 }, { "epoch": 1.4809853456801223, "grad_norm": 1.4369637966156006, "learning_rate": 8.254652268329624e-06, "loss": 0.7457, "step": 8235 }, { "epoch": 1.481165153285984, "grad_norm": 1.594443440437317, "learning_rate": 8.25421010676525e-06, "loss": 0.8222, "step": 8236 }, { "epoch": 1.4813449608918456, "grad_norm": 1.6058164834976196, "learning_rate": 8.253767901045514e-06, "loss": 0.8403, "step": 8237 }, { "epoch": 1.4815247684977075, "grad_norm": 1.4123820066452026, "learning_rate": 8.253325651176419e-06, "loss": 0.7191, "step": 8238 }, { "epoch": 1.4817045761035692, "grad_norm": 1.557568073272705, "learning_rate": 8.252883357163963e-06, "loss": 0.7571, "step": 8239 }, { "epoch": 1.4818843837094309, "grad_norm": 1.401509404182434, "learning_rate": 8.252441019014148e-06, "loss": 0.7083, "step": 8240 }, { "epoch": 1.4820641913152928, "grad_norm": 1.096911907196045, "learning_rate": 8.251998636732975e-06, "loss": 0.9677, "step": 8241 }, { "epoch": 1.4822439989211544, "grad_norm": 1.420892596244812, "learning_rate": 8.251556210326448e-06, "loss": 0.6819, "step": 8242 }, { "epoch": 1.482423806527016, "grad_norm": 1.415387511253357, "learning_rate": 8.25111373980057e-06, "loss": 0.7109, "step": 8243 }, { "epoch": 1.4826036141328778, "grad_norm": 1.1725285053253174, "learning_rate": 8.250671225161345e-06, "loss": 0.9166, "step": 8244 }, { "epoch": 1.4827834217387394, "grad_norm": 1.4644678831100464, "learning_rate": 8.250228666414777e-06, "loss": 0.7559, "step": 8245 }, { "epoch": 1.4829632293446013, "grad_norm": 1.1012977361679077, "learning_rate": 8.249786063566868e-06, "loss": 0.9308, "step": 8246 }, { "epoch": 1.483143036950463, "grad_norm": 1.5539445877075195, "learning_rate": 8.24934341662363e-06, "loss": 0.7513, "step": 8247 }, { "epoch": 1.4833228445563247, "grad_norm": 1.4961918592453003, "learning_rate": 8.248900725591064e-06, "loss": 0.7487, "step": 8248 }, { "epoch": 1.4835026521621866, "grad_norm": 1.4292548894882202, "learning_rate": 8.248457990475176e-06, "loss": 0.6953, "step": 8249 }, { "epoch": 1.4836824597680482, "grad_norm": 1.3927630186080933, "learning_rate": 8.24801521128198e-06, "loss": 0.7297, "step": 8250 }, { "epoch": 1.48386226737391, "grad_norm": 1.4314608573913574, "learning_rate": 8.247572388017476e-06, "loss": 0.6946, "step": 8251 }, { "epoch": 1.4840420749797716, "grad_norm": 1.4458423852920532, "learning_rate": 8.247129520687677e-06, "loss": 0.7276, "step": 8252 }, { "epoch": 1.4842218825856333, "grad_norm": 1.561725378036499, "learning_rate": 8.24668660929859e-06, "loss": 0.7679, "step": 8253 }, { "epoch": 1.4844016901914951, "grad_norm": 1.476361632347107, "learning_rate": 8.246243653856228e-06, "loss": 0.7303, "step": 8254 }, { "epoch": 1.4845814977973568, "grad_norm": 1.5761234760284424, "learning_rate": 8.245800654366596e-06, "loss": 0.714, "step": 8255 }, { "epoch": 1.4847613054032185, "grad_norm": 1.607306957244873, "learning_rate": 8.24535761083571e-06, "loss": 0.7647, "step": 8256 }, { "epoch": 1.4849411130090804, "grad_norm": 1.4205511808395386, "learning_rate": 8.24491452326958e-06, "loss": 0.7708, "step": 8257 }, { "epoch": 1.485120920614942, "grad_norm": 1.4283316135406494, "learning_rate": 8.244471391674218e-06, "loss": 0.7509, "step": 8258 }, { "epoch": 1.4853007282208037, "grad_norm": 1.5565547943115234, "learning_rate": 8.244028216055636e-06, "loss": 0.7601, "step": 8259 }, { "epoch": 1.4854805358266654, "grad_norm": 1.6272350549697876, "learning_rate": 8.243584996419845e-06, "loss": 0.6975, "step": 8260 }, { "epoch": 1.485660343432527, "grad_norm": 1.6209064722061157, "learning_rate": 8.243141732772864e-06, "loss": 0.7012, "step": 8261 }, { "epoch": 1.485840151038389, "grad_norm": 1.4817471504211426, "learning_rate": 8.242698425120706e-06, "loss": 0.7095, "step": 8262 }, { "epoch": 1.4860199586442506, "grad_norm": 1.4159902334213257, "learning_rate": 8.242255073469384e-06, "loss": 0.731, "step": 8263 }, { "epoch": 1.4861997662501123, "grad_norm": 1.431549072265625, "learning_rate": 8.241811677824914e-06, "loss": 0.9246, "step": 8264 }, { "epoch": 1.4863795738559742, "grad_norm": 1.4865409135818481, "learning_rate": 8.241368238193315e-06, "loss": 0.6807, "step": 8265 }, { "epoch": 1.4865593814618359, "grad_norm": 1.5420230627059937, "learning_rate": 8.240924754580602e-06, "loss": 0.8006, "step": 8266 }, { "epoch": 1.4867391890676975, "grad_norm": 1.5485459566116333, "learning_rate": 8.240481226992792e-06, "loss": 0.7448, "step": 8267 }, { "epoch": 1.4869189966735594, "grad_norm": 1.4834243059158325, "learning_rate": 8.240037655435904e-06, "loss": 0.7404, "step": 8268 }, { "epoch": 1.487098804279421, "grad_norm": 1.3655529022216797, "learning_rate": 8.239594039915957e-06, "loss": 0.7128, "step": 8269 }, { "epoch": 1.4872786118852828, "grad_norm": 1.591181755065918, "learning_rate": 8.239150380438967e-06, "loss": 0.8027, "step": 8270 }, { "epoch": 1.4874584194911444, "grad_norm": 1.5712956190109253, "learning_rate": 8.238706677010959e-06, "loss": 0.7427, "step": 8271 }, { "epoch": 1.487638227097006, "grad_norm": 1.5041141510009766, "learning_rate": 8.23826292963795e-06, "loss": 0.7309, "step": 8272 }, { "epoch": 1.487818034702868, "grad_norm": 1.4128060340881348, "learning_rate": 8.237819138325964e-06, "loss": 0.6792, "step": 8273 }, { "epoch": 1.4879978423087297, "grad_norm": 1.4041392803192139, "learning_rate": 8.23737530308102e-06, "loss": 0.7436, "step": 8274 }, { "epoch": 1.4881776499145913, "grad_norm": 1.0798516273498535, "learning_rate": 8.23693142390914e-06, "loss": 0.979, "step": 8275 }, { "epoch": 1.4883574575204532, "grad_norm": 1.571120262145996, "learning_rate": 8.236487500816347e-06, "loss": 0.7042, "step": 8276 }, { "epoch": 1.488537265126315, "grad_norm": 1.440025806427002, "learning_rate": 8.236043533808666e-06, "loss": 0.6867, "step": 8277 }, { "epoch": 1.4887170727321766, "grad_norm": 1.0321292877197266, "learning_rate": 8.23559952289212e-06, "loss": 0.9355, "step": 8278 }, { "epoch": 1.4888968803380382, "grad_norm": 1.4712117910385132, "learning_rate": 8.235155468072738e-06, "loss": 0.7558, "step": 8279 }, { "epoch": 1.4890766879439, "grad_norm": 1.4485729932785034, "learning_rate": 8.234711369356536e-06, "loss": 0.7523, "step": 8280 }, { "epoch": 1.4892564955497618, "grad_norm": 1.1080952882766724, "learning_rate": 8.234267226749547e-06, "loss": 0.9739, "step": 8281 }, { "epoch": 1.4894363031556235, "grad_norm": 1.5290387868881226, "learning_rate": 8.233823040257796e-06, "loss": 0.7367, "step": 8282 }, { "epoch": 1.4896161107614851, "grad_norm": 1.4091100692749023, "learning_rate": 8.23337880988731e-06, "loss": 0.7355, "step": 8283 }, { "epoch": 1.489795918367347, "grad_norm": 0.9990484118461609, "learning_rate": 8.232934535644115e-06, "loss": 0.9217, "step": 8284 }, { "epoch": 1.4899757259732087, "grad_norm": 1.6388920545578003, "learning_rate": 8.232490217534241e-06, "loss": 0.7397, "step": 8285 }, { "epoch": 1.4901555335790704, "grad_norm": 1.4494589567184448, "learning_rate": 8.232045855563717e-06, "loss": 0.7878, "step": 8286 }, { "epoch": 1.490335341184932, "grad_norm": 1.479661226272583, "learning_rate": 8.231601449738571e-06, "loss": 0.7221, "step": 8287 }, { "epoch": 1.4905151487907937, "grad_norm": 1.5037389993667603, "learning_rate": 8.231157000064833e-06, "loss": 0.8154, "step": 8288 }, { "epoch": 1.4906949563966556, "grad_norm": 1.8090142011642456, "learning_rate": 8.230712506548535e-06, "loss": 0.6226, "step": 8289 }, { "epoch": 1.4908747640025173, "grad_norm": 1.6199077367782593, "learning_rate": 8.230267969195706e-06, "loss": 0.6804, "step": 8290 }, { "epoch": 1.491054571608379, "grad_norm": 1.4888801574707031, "learning_rate": 8.229823388012381e-06, "loss": 0.732, "step": 8291 }, { "epoch": 1.4912343792142408, "grad_norm": 1.6014012098312378, "learning_rate": 8.22937876300459e-06, "loss": 0.7408, "step": 8292 }, { "epoch": 1.4914141868201025, "grad_norm": 1.5455900430679321, "learning_rate": 8.228934094178368e-06, "loss": 0.7716, "step": 8293 }, { "epoch": 1.4915939944259642, "grad_norm": 1.4901787042617798, "learning_rate": 8.228489381539744e-06, "loss": 0.7546, "step": 8294 }, { "epoch": 1.491773802031826, "grad_norm": 1.5430619716644287, "learning_rate": 8.228044625094757e-06, "loss": 0.7534, "step": 8295 }, { "epoch": 1.4919536096376877, "grad_norm": 1.5148839950561523, "learning_rate": 8.227599824849439e-06, "loss": 0.6685, "step": 8296 }, { "epoch": 1.4921334172435494, "grad_norm": 1.251821517944336, "learning_rate": 8.227154980809828e-06, "loss": 0.9306, "step": 8297 }, { "epoch": 1.492313224849411, "grad_norm": 1.3302665948867798, "learning_rate": 8.226710092981957e-06, "loss": 0.7218, "step": 8298 }, { "epoch": 1.4924930324552728, "grad_norm": 1.4481803178787231, "learning_rate": 8.226265161371866e-06, "loss": 0.7134, "step": 8299 }, { "epoch": 1.4926728400611347, "grad_norm": 1.6020386219024658, "learning_rate": 8.225820185985589e-06, "loss": 0.7088, "step": 8300 }, { "epoch": 1.4928526476669963, "grad_norm": 1.0791248083114624, "learning_rate": 8.225375166829164e-06, "loss": 0.934, "step": 8301 }, { "epoch": 1.493032455272858, "grad_norm": 1.53636634349823, "learning_rate": 8.22493010390863e-06, "loss": 0.7246, "step": 8302 }, { "epoch": 1.4932122628787199, "grad_norm": 1.4167062044143677, "learning_rate": 8.224484997230027e-06, "loss": 0.8003, "step": 8303 }, { "epoch": 1.4933920704845816, "grad_norm": 1.4187631607055664, "learning_rate": 8.224039846799394e-06, "loss": 0.6904, "step": 8304 }, { "epoch": 1.4935718780904432, "grad_norm": 1.4520639181137085, "learning_rate": 8.22359465262277e-06, "loss": 0.7244, "step": 8305 }, { "epoch": 1.493751685696305, "grad_norm": 1.4756306409835815, "learning_rate": 8.223149414706196e-06, "loss": 0.668, "step": 8306 }, { "epoch": 1.4939314933021666, "grad_norm": 1.1408448219299316, "learning_rate": 8.222704133055714e-06, "loss": 0.9324, "step": 8307 }, { "epoch": 1.4941113009080285, "grad_norm": 1.519895076751709, "learning_rate": 8.222258807677367e-06, "loss": 0.7422, "step": 8308 }, { "epoch": 1.4942911085138901, "grad_norm": 1.475395917892456, "learning_rate": 8.221813438577192e-06, "loss": 0.7322, "step": 8309 }, { "epoch": 1.4944709161197518, "grad_norm": 1.523459792137146, "learning_rate": 8.22136802576124e-06, "loss": 0.6841, "step": 8310 }, { "epoch": 1.4946507237256137, "grad_norm": 1.4787436723709106, "learning_rate": 8.22092256923555e-06, "loss": 0.7654, "step": 8311 }, { "epoch": 1.4948305313314754, "grad_norm": 1.430776596069336, "learning_rate": 8.220477069006166e-06, "loss": 0.7391, "step": 8312 }, { "epoch": 1.495010338937337, "grad_norm": 1.4865299463272095, "learning_rate": 8.220031525079133e-06, "loss": 0.7564, "step": 8313 }, { "epoch": 1.4951901465431987, "grad_norm": 1.5275373458862305, "learning_rate": 8.2195859374605e-06, "loss": 0.7794, "step": 8314 }, { "epoch": 1.4953699541490604, "grad_norm": 1.1145555973052979, "learning_rate": 8.219140306156308e-06, "loss": 0.9438, "step": 8315 }, { "epoch": 1.4955497617549223, "grad_norm": 1.573866844177246, "learning_rate": 8.218694631172606e-06, "loss": 0.7335, "step": 8316 }, { "epoch": 1.495729569360784, "grad_norm": 1.4004191160202026, "learning_rate": 8.218248912515443e-06, "loss": 0.6938, "step": 8317 }, { "epoch": 1.4959093769666456, "grad_norm": 1.4968795776367188, "learning_rate": 8.217803150190864e-06, "loss": 0.6923, "step": 8318 }, { "epoch": 1.4960891845725075, "grad_norm": 1.5724800825119019, "learning_rate": 8.217357344204919e-06, "loss": 0.8265, "step": 8319 }, { "epoch": 1.4962689921783692, "grad_norm": 1.5236132144927979, "learning_rate": 8.216911494563657e-06, "loss": 0.7788, "step": 8320 }, { "epoch": 1.4964487997842308, "grad_norm": 1.5135698318481445, "learning_rate": 8.216465601273127e-06, "loss": 0.7762, "step": 8321 }, { "epoch": 1.4966286073900927, "grad_norm": 1.4883581399917603, "learning_rate": 8.216019664339376e-06, "loss": 0.7063, "step": 8322 }, { "epoch": 1.4968084149959544, "grad_norm": 1.5019867420196533, "learning_rate": 8.215573683768462e-06, "loss": 0.7827, "step": 8323 }, { "epoch": 1.496988222601816, "grad_norm": 1.5685052871704102, "learning_rate": 8.21512765956643e-06, "loss": 0.7867, "step": 8324 }, { "epoch": 1.4971680302076777, "grad_norm": 1.465436577796936, "learning_rate": 8.214681591739335e-06, "loss": 0.7441, "step": 8325 }, { "epoch": 1.4973478378135394, "grad_norm": 1.6990339756011963, "learning_rate": 8.214235480293228e-06, "loss": 0.7809, "step": 8326 }, { "epoch": 1.4975276454194013, "grad_norm": 1.813413143157959, "learning_rate": 8.213789325234166e-06, "loss": 0.7275, "step": 8327 }, { "epoch": 1.497707453025263, "grad_norm": 1.5133932828903198, "learning_rate": 8.213343126568197e-06, "loss": 0.7924, "step": 8328 }, { "epoch": 1.4978872606311247, "grad_norm": 1.509260892868042, "learning_rate": 8.21289688430138e-06, "loss": 0.7135, "step": 8329 }, { "epoch": 1.4980670682369865, "grad_norm": 1.5405457019805908, "learning_rate": 8.212450598439766e-06, "loss": 0.7419, "step": 8330 }, { "epoch": 1.4982468758428482, "grad_norm": 1.1797415018081665, "learning_rate": 8.212004268989413e-06, "loss": 0.9104, "step": 8331 }, { "epoch": 1.4984266834487099, "grad_norm": 1.6344385147094727, "learning_rate": 8.211557895956378e-06, "loss": 0.821, "step": 8332 }, { "epoch": 1.4986064910545716, "grad_norm": 1.053505301475525, "learning_rate": 8.211111479346716e-06, "loss": 0.9193, "step": 8333 }, { "epoch": 1.4987862986604332, "grad_norm": 1.5764864683151245, "learning_rate": 8.210665019166484e-06, "loss": 0.765, "step": 8334 }, { "epoch": 1.4989661062662951, "grad_norm": 1.462178349494934, "learning_rate": 8.210218515421741e-06, "loss": 0.7164, "step": 8335 }, { "epoch": 1.4991459138721568, "grad_norm": 1.5001429319381714, "learning_rate": 8.209771968118544e-06, "loss": 0.7957, "step": 8336 }, { "epoch": 1.4993257214780185, "grad_norm": 1.573909044265747, "learning_rate": 8.209325377262955e-06, "loss": 0.6964, "step": 8337 }, { "epoch": 1.4995055290838804, "grad_norm": 1.4722652435302734, "learning_rate": 8.20887874286103e-06, "loss": 0.7231, "step": 8338 }, { "epoch": 1.499685336689742, "grad_norm": 2.3284051418304443, "learning_rate": 8.208432064918833e-06, "loss": 0.8422, "step": 8339 }, { "epoch": 1.4998651442956037, "grad_norm": 1.5333534479141235, "learning_rate": 8.20798534344242e-06, "loss": 0.7416, "step": 8340 }, { "epoch": 1.5000449519014656, "grad_norm": 1.4214379787445068, "learning_rate": 8.207538578437857e-06, "loss": 0.7438, "step": 8341 }, { "epoch": 1.500224759507327, "grad_norm": 1.493666410446167, "learning_rate": 8.207091769911202e-06, "loss": 0.7449, "step": 8342 }, { "epoch": 1.500404567113189, "grad_norm": 1.4797931909561157, "learning_rate": 8.206644917868523e-06, "loss": 0.6992, "step": 8343 }, { "epoch": 1.5005843747190506, "grad_norm": 1.503545880317688, "learning_rate": 8.206198022315878e-06, "loss": 0.6525, "step": 8344 }, { "epoch": 1.5007641823249123, "grad_norm": 2.547989845275879, "learning_rate": 8.205751083259334e-06, "loss": 0.755, "step": 8345 }, { "epoch": 1.5009439899307742, "grad_norm": 1.497018814086914, "learning_rate": 8.205304100704953e-06, "loss": 0.7201, "step": 8346 }, { "epoch": 1.5011237975366358, "grad_norm": 1.5383051633834839, "learning_rate": 8.204857074658803e-06, "loss": 0.7217, "step": 8347 }, { "epoch": 1.5013036051424975, "grad_norm": 1.5443402528762817, "learning_rate": 8.204410005126944e-06, "loss": 0.7666, "step": 8348 }, { "epoch": 1.5014834127483594, "grad_norm": 1.597139835357666, "learning_rate": 8.203962892115448e-06, "loss": 0.8394, "step": 8349 }, { "epoch": 1.5016632203542208, "grad_norm": 1.5360362529754639, "learning_rate": 8.203515735630381e-06, "loss": 0.7738, "step": 8350 }, { "epoch": 1.5018430279600827, "grad_norm": 1.2468763589859009, "learning_rate": 8.203068535677807e-06, "loss": 0.9571, "step": 8351 }, { "epoch": 1.5020228355659444, "grad_norm": 1.4902809858322144, "learning_rate": 8.202621292263796e-06, "loss": 0.7467, "step": 8352 }, { "epoch": 1.502202643171806, "grad_norm": 1.2752190828323364, "learning_rate": 8.202174005394419e-06, "loss": 0.9754, "step": 8353 }, { "epoch": 1.502382450777668, "grad_norm": 1.4906890392303467, "learning_rate": 8.20172667507574e-06, "loss": 0.7382, "step": 8354 }, { "epoch": 1.5025622583835296, "grad_norm": 1.4619723558425903, "learning_rate": 8.20127930131383e-06, "loss": 0.6464, "step": 8355 }, { "epoch": 1.5027420659893913, "grad_norm": 1.5059970617294312, "learning_rate": 8.200831884114763e-06, "loss": 0.741, "step": 8356 }, { "epoch": 1.5029218735952532, "grad_norm": 1.4419070482254028, "learning_rate": 8.200384423484606e-06, "loss": 0.8181, "step": 8357 }, { "epoch": 1.5031016812011146, "grad_norm": 1.1537315845489502, "learning_rate": 8.199936919429432e-06, "loss": 0.9511, "step": 8358 }, { "epoch": 1.5032814888069765, "grad_norm": 1.415105938911438, "learning_rate": 8.199489371955313e-06, "loss": 0.696, "step": 8359 }, { "epoch": 1.5034612964128382, "grad_norm": 1.5376580953598022, "learning_rate": 8.19904178106832e-06, "loss": 0.7531, "step": 8360 }, { "epoch": 1.5036411040186999, "grad_norm": 1.5052978992462158, "learning_rate": 8.19859414677453e-06, "loss": 0.7431, "step": 8361 }, { "epoch": 1.5038209116245618, "grad_norm": 1.5636866092681885, "learning_rate": 8.198146469080014e-06, "loss": 0.7423, "step": 8362 }, { "epoch": 1.5040007192304234, "grad_norm": 1.5598078966140747, "learning_rate": 8.197698747990844e-06, "loss": 0.8122, "step": 8363 }, { "epoch": 1.5041805268362851, "grad_norm": 1.4472010135650635, "learning_rate": 8.197250983513098e-06, "loss": 0.7043, "step": 8364 }, { "epoch": 1.504360334442147, "grad_norm": 1.403319239616394, "learning_rate": 8.196803175652855e-06, "loss": 0.7205, "step": 8365 }, { "epoch": 1.5045401420480087, "grad_norm": 1.8696085214614868, "learning_rate": 8.196355324416186e-06, "loss": 0.7951, "step": 8366 }, { "epoch": 1.5047199496538703, "grad_norm": 1.6015199422836304, "learning_rate": 8.195907429809168e-06, "loss": 0.7711, "step": 8367 }, { "epoch": 1.5048997572597322, "grad_norm": 1.3757134675979614, "learning_rate": 8.195459491837881e-06, "loss": 0.6853, "step": 8368 }, { "epoch": 1.5050795648655937, "grad_norm": 1.4494843482971191, "learning_rate": 8.195011510508401e-06, "loss": 0.6521, "step": 8369 }, { "epoch": 1.5052593724714556, "grad_norm": 1.4329379796981812, "learning_rate": 8.194563485826806e-06, "loss": 0.6652, "step": 8370 }, { "epoch": 1.5054391800773173, "grad_norm": 1.4750386476516724, "learning_rate": 8.194115417799178e-06, "loss": 0.7469, "step": 8371 }, { "epoch": 1.505618987683179, "grad_norm": 1.6104477643966675, "learning_rate": 8.193667306431594e-06, "loss": 0.7362, "step": 8372 }, { "epoch": 1.5057987952890408, "grad_norm": 1.4611116647720337, "learning_rate": 8.193219151730137e-06, "loss": 0.8514, "step": 8373 }, { "epoch": 1.5059786028949025, "grad_norm": 1.529005527496338, "learning_rate": 8.192770953700884e-06, "loss": 0.801, "step": 8374 }, { "epoch": 1.5061584105007642, "grad_norm": 1.586067795753479, "learning_rate": 8.192322712349917e-06, "loss": 0.6973, "step": 8375 }, { "epoch": 1.506338218106626, "grad_norm": 1.5482323169708252, "learning_rate": 8.191874427683323e-06, "loss": 0.7108, "step": 8376 }, { "epoch": 1.5065180257124875, "grad_norm": 1.419010043144226, "learning_rate": 8.191426099707181e-06, "loss": 0.7681, "step": 8377 }, { "epoch": 1.5066978333183494, "grad_norm": 1.4724093675613403, "learning_rate": 8.190977728427571e-06, "loss": 0.7682, "step": 8378 }, { "epoch": 1.506877640924211, "grad_norm": 1.618844747543335, "learning_rate": 8.190529313850584e-06, "loss": 0.8366, "step": 8379 }, { "epoch": 1.5070574485300727, "grad_norm": 1.3492246866226196, "learning_rate": 8.1900808559823e-06, "loss": 0.6716, "step": 8380 }, { "epoch": 1.5072372561359346, "grad_norm": 1.4435473680496216, "learning_rate": 8.189632354828803e-06, "loss": 0.7478, "step": 8381 }, { "epoch": 1.5074170637417963, "grad_norm": 1.4168076515197754, "learning_rate": 8.18918381039618e-06, "loss": 0.6951, "step": 8382 }, { "epoch": 1.507596871347658, "grad_norm": 1.4732236862182617, "learning_rate": 8.188735222690517e-06, "loss": 0.7373, "step": 8383 }, { "epoch": 1.5077766789535199, "grad_norm": 1.1580240726470947, "learning_rate": 8.188286591717904e-06, "loss": 0.9502, "step": 8384 }, { "epoch": 1.5079564865593813, "grad_norm": 1.526764988899231, "learning_rate": 8.187837917484422e-06, "loss": 0.7152, "step": 8385 }, { "epoch": 1.5081362941652432, "grad_norm": 1.4847837686538696, "learning_rate": 8.187389199996165e-06, "loss": 0.7545, "step": 8386 }, { "epoch": 1.5083161017711049, "grad_norm": 1.2153940200805664, "learning_rate": 8.186940439259217e-06, "loss": 0.9231, "step": 8387 }, { "epoch": 1.5084959093769665, "grad_norm": 1.5671091079711914, "learning_rate": 8.18649163527967e-06, "loss": 0.731, "step": 8388 }, { "epoch": 1.5086757169828284, "grad_norm": 1.5558271408081055, "learning_rate": 8.186042788063612e-06, "loss": 0.7681, "step": 8389 }, { "epoch": 1.50885552458869, "grad_norm": 1.3977258205413818, "learning_rate": 8.185593897617134e-06, "loss": 0.7191, "step": 8390 }, { "epoch": 1.5090353321945518, "grad_norm": 1.4097775220870972, "learning_rate": 8.185144963946328e-06, "loss": 0.7349, "step": 8391 }, { "epoch": 1.5092151398004137, "grad_norm": 1.5079710483551025, "learning_rate": 8.184695987057283e-06, "loss": 0.7097, "step": 8392 }, { "epoch": 1.5093949474062753, "grad_norm": 1.4892663955688477, "learning_rate": 8.184246966956093e-06, "loss": 0.7725, "step": 8393 }, { "epoch": 1.509574755012137, "grad_norm": 1.1614681482315063, "learning_rate": 8.18379790364885e-06, "loss": 0.9538, "step": 8394 }, { "epoch": 1.509754562617999, "grad_norm": 1.599446415901184, "learning_rate": 8.183348797141644e-06, "loss": 0.7214, "step": 8395 }, { "epoch": 1.5099343702238603, "grad_norm": 1.0814077854156494, "learning_rate": 8.182899647440575e-06, "loss": 0.9223, "step": 8396 }, { "epoch": 1.5101141778297222, "grad_norm": 1.4444602727890015, "learning_rate": 8.182450454551734e-06, "loss": 0.8061, "step": 8397 }, { "epoch": 1.510293985435584, "grad_norm": 1.5359092950820923, "learning_rate": 8.182001218481215e-06, "loss": 0.7571, "step": 8398 }, { "epoch": 1.5104737930414456, "grad_norm": 1.5036596059799194, "learning_rate": 8.181551939235115e-06, "loss": 0.755, "step": 8399 }, { "epoch": 1.5106536006473075, "grad_norm": 1.469299077987671, "learning_rate": 8.18110261681953e-06, "loss": 0.7352, "step": 8400 }, { "epoch": 1.5108334082531691, "grad_norm": 1.5531091690063477, "learning_rate": 8.180653251240556e-06, "loss": 0.7248, "step": 8401 }, { "epoch": 1.5110132158590308, "grad_norm": 1.402789831161499, "learning_rate": 8.180203842504292e-06, "loss": 0.7173, "step": 8402 }, { "epoch": 1.5111930234648927, "grad_norm": 1.4578301906585693, "learning_rate": 8.179754390616833e-06, "loss": 0.7042, "step": 8403 }, { "epoch": 1.5113728310707542, "grad_norm": 1.4721518754959106, "learning_rate": 8.179304895584282e-06, "loss": 0.7891, "step": 8404 }, { "epoch": 1.511552638676616, "grad_norm": 1.8625245094299316, "learning_rate": 8.178855357412732e-06, "loss": 0.7181, "step": 8405 }, { "epoch": 1.5117324462824777, "grad_norm": 1.5218174457550049, "learning_rate": 8.178405776108286e-06, "loss": 0.7356, "step": 8406 }, { "epoch": 1.5119122538883394, "grad_norm": 1.5190843343734741, "learning_rate": 8.177956151677046e-06, "loss": 0.7789, "step": 8407 }, { "epoch": 1.5120920614942013, "grad_norm": 1.2552393674850464, "learning_rate": 8.177506484125112e-06, "loss": 0.9776, "step": 8408 }, { "epoch": 1.512271869100063, "grad_norm": 1.4751112461090088, "learning_rate": 8.177056773458583e-06, "loss": 0.7911, "step": 8409 }, { "epoch": 1.5124516767059246, "grad_norm": 1.1237545013427734, "learning_rate": 8.176607019683561e-06, "loss": 0.9146, "step": 8410 }, { "epoch": 1.5126314843117865, "grad_norm": 1.5521903038024902, "learning_rate": 8.17615722280615e-06, "loss": 0.697, "step": 8411 }, { "epoch": 1.512811291917648, "grad_norm": 1.431693196296692, "learning_rate": 8.175707382832456e-06, "loss": 0.8374, "step": 8412 }, { "epoch": 1.5129910995235099, "grad_norm": 1.4867445230484009, "learning_rate": 8.175257499768577e-06, "loss": 0.715, "step": 8413 }, { "epoch": 1.5131709071293715, "grad_norm": 1.5791096687316895, "learning_rate": 8.17480757362062e-06, "loss": 0.781, "step": 8414 }, { "epoch": 1.5133507147352332, "grad_norm": 1.1598906517028809, "learning_rate": 8.174357604394691e-06, "loss": 0.9398, "step": 8415 }, { "epoch": 1.513530522341095, "grad_norm": 1.4693795442581177, "learning_rate": 8.173907592096895e-06, "loss": 0.7605, "step": 8416 }, { "epoch": 1.5137103299469568, "grad_norm": 1.146305799484253, "learning_rate": 8.173457536733336e-06, "loss": 0.9571, "step": 8417 }, { "epoch": 1.5138901375528184, "grad_norm": 1.5129321813583374, "learning_rate": 8.173007438310123e-06, "loss": 0.7748, "step": 8418 }, { "epoch": 1.5140699451586803, "grad_norm": 1.3686761856079102, "learning_rate": 8.172557296833363e-06, "loss": 0.6738, "step": 8419 }, { "epoch": 1.514249752764542, "grad_norm": 1.1363146305084229, "learning_rate": 8.172107112309164e-06, "loss": 0.9472, "step": 8420 }, { "epoch": 1.5144295603704037, "grad_norm": 1.6925005912780762, "learning_rate": 8.171656884743631e-06, "loss": 0.6465, "step": 8421 }, { "epoch": 1.5146093679762656, "grad_norm": 1.444277286529541, "learning_rate": 8.171206614142879e-06, "loss": 0.749, "step": 8422 }, { "epoch": 1.514789175582127, "grad_norm": 1.352659821510315, "learning_rate": 8.170756300513011e-06, "loss": 0.9023, "step": 8423 }, { "epoch": 1.514968983187989, "grad_norm": 1.6624796390533447, "learning_rate": 8.170305943860144e-06, "loss": 0.7321, "step": 8424 }, { "epoch": 1.5151487907938506, "grad_norm": 1.4490869045257568, "learning_rate": 8.169855544190383e-06, "loss": 0.7441, "step": 8425 }, { "epoch": 1.5153285983997122, "grad_norm": 1.5260320901870728, "learning_rate": 8.169405101509842e-06, "loss": 0.7133, "step": 8426 }, { "epoch": 1.5155084060055741, "grad_norm": 1.5361601114273071, "learning_rate": 8.168954615824632e-06, "loss": 0.7605, "step": 8427 }, { "epoch": 1.5156882136114358, "grad_norm": 1.451610803604126, "learning_rate": 8.168504087140867e-06, "loss": 0.7284, "step": 8428 }, { "epoch": 1.5158680212172975, "grad_norm": 1.4961400032043457, "learning_rate": 8.168053515464658e-06, "loss": 0.7356, "step": 8429 }, { "epoch": 1.5160478288231594, "grad_norm": 1.101668357849121, "learning_rate": 8.167602900802121e-06, "loss": 0.9415, "step": 8430 }, { "epoch": 1.5162276364290208, "grad_norm": 1.5736184120178223, "learning_rate": 8.167152243159367e-06, "loss": 0.7335, "step": 8431 }, { "epoch": 1.5164074440348827, "grad_norm": 1.3699703216552734, "learning_rate": 8.166701542542514e-06, "loss": 0.6987, "step": 8432 }, { "epoch": 1.5165872516407444, "grad_norm": 1.5161584615707397, "learning_rate": 8.166250798957676e-06, "loss": 0.7727, "step": 8433 }, { "epoch": 1.516767059246606, "grad_norm": 1.1602814197540283, "learning_rate": 8.16580001241097e-06, "loss": 0.9348, "step": 8434 }, { "epoch": 1.516946866852468, "grad_norm": 1.4563262462615967, "learning_rate": 8.16534918290851e-06, "loss": 0.729, "step": 8435 }, { "epoch": 1.5171266744583296, "grad_norm": 1.4972325563430786, "learning_rate": 8.164898310456416e-06, "loss": 0.7725, "step": 8436 }, { "epoch": 1.5173064820641913, "grad_norm": 1.6167892217636108, "learning_rate": 8.164447395060804e-06, "loss": 0.8553, "step": 8437 }, { "epoch": 1.5174862896700532, "grad_norm": 1.6159076690673828, "learning_rate": 8.163996436727795e-06, "loss": 0.7099, "step": 8438 }, { "epoch": 1.5176660972759146, "grad_norm": 1.4153697490692139, "learning_rate": 8.163545435463505e-06, "loss": 0.7689, "step": 8439 }, { "epoch": 1.5178459048817765, "grad_norm": 1.4870630502700806, "learning_rate": 8.163094391274053e-06, "loss": 0.7483, "step": 8440 }, { "epoch": 1.5180257124876382, "grad_norm": 1.504374623298645, "learning_rate": 8.162643304165564e-06, "loss": 0.7468, "step": 8441 }, { "epoch": 1.5182055200934998, "grad_norm": 1.4530690908432007, "learning_rate": 8.162192174144152e-06, "loss": 0.6306, "step": 8442 }, { "epoch": 1.5183853276993617, "grad_norm": 1.5171492099761963, "learning_rate": 8.161741001215942e-06, "loss": 0.6724, "step": 8443 }, { "epoch": 1.5185651353052234, "grad_norm": 1.47515070438385, "learning_rate": 8.161289785387056e-06, "loss": 0.7065, "step": 8444 }, { "epoch": 1.518744942911085, "grad_norm": 2.159127950668335, "learning_rate": 8.160838526663615e-06, "loss": 0.7186, "step": 8445 }, { "epoch": 1.518924750516947, "grad_norm": 1.5636987686157227, "learning_rate": 8.160387225051743e-06, "loss": 0.7347, "step": 8446 }, { "epoch": 1.5191045581228086, "grad_norm": 1.5840295553207397, "learning_rate": 8.159935880557563e-06, "loss": 0.7589, "step": 8447 }, { "epoch": 1.5192843657286703, "grad_norm": 1.0790895223617554, "learning_rate": 8.1594844931872e-06, "loss": 0.9676, "step": 8448 }, { "epoch": 1.5194641733345322, "grad_norm": 1.591670274734497, "learning_rate": 8.159033062946777e-06, "loss": 0.7105, "step": 8449 }, { "epoch": 1.5196439809403937, "grad_norm": 1.3432589769363403, "learning_rate": 8.158581589842421e-06, "loss": 0.9271, "step": 8450 }, { "epoch": 1.5198237885462555, "grad_norm": 1.4009042978286743, "learning_rate": 8.158130073880258e-06, "loss": 0.9177, "step": 8451 }, { "epoch": 1.5200035961521172, "grad_norm": 1.5179359912872314, "learning_rate": 8.157678515066412e-06, "loss": 0.7555, "step": 8452 }, { "epoch": 1.520183403757979, "grad_norm": 1.427241563796997, "learning_rate": 8.157226913407013e-06, "loss": 0.7185, "step": 8453 }, { "epoch": 1.5203632113638408, "grad_norm": 1.5083751678466797, "learning_rate": 8.156775268908188e-06, "loss": 0.7402, "step": 8454 }, { "epoch": 1.5205430189697025, "grad_norm": 1.4956876039505005, "learning_rate": 8.156323581576064e-06, "loss": 0.7233, "step": 8455 }, { "epoch": 1.5207228265755641, "grad_norm": 1.474896788597107, "learning_rate": 8.15587185141677e-06, "loss": 0.7725, "step": 8456 }, { "epoch": 1.520902634181426, "grad_norm": 1.4476946592330933, "learning_rate": 8.155420078436436e-06, "loss": 0.7448, "step": 8457 }, { "epoch": 1.5210824417872875, "grad_norm": 1.1751372814178467, "learning_rate": 8.154968262641193e-06, "loss": 0.9595, "step": 8458 }, { "epoch": 1.5212622493931494, "grad_norm": 1.4425032138824463, "learning_rate": 8.154516404037169e-06, "loss": 0.7204, "step": 8459 }, { "epoch": 1.521442056999011, "grad_norm": 1.5512175559997559, "learning_rate": 8.154064502630498e-06, "loss": 0.7498, "step": 8460 }, { "epoch": 1.5216218646048727, "grad_norm": 2.9198992252349854, "learning_rate": 8.153612558427311e-06, "loss": 0.7216, "step": 8461 }, { "epoch": 1.5218016722107346, "grad_norm": 1.4194626808166504, "learning_rate": 8.153160571433738e-06, "loss": 0.6775, "step": 8462 }, { "epoch": 1.5219814798165963, "grad_norm": 1.5949870347976685, "learning_rate": 8.152708541655912e-06, "loss": 0.6473, "step": 8463 }, { "epoch": 1.522161287422458, "grad_norm": 1.7754350900650024, "learning_rate": 8.152256469099971e-06, "loss": 0.7698, "step": 8464 }, { "epoch": 1.5223410950283198, "grad_norm": 1.4302175045013428, "learning_rate": 8.151804353772043e-06, "loss": 0.6816, "step": 8465 }, { "epoch": 1.5225209026341813, "grad_norm": 1.7171549797058105, "learning_rate": 8.151352195678268e-06, "loss": 0.7233, "step": 8466 }, { "epoch": 1.5227007102400432, "grad_norm": 1.5146390199661255, "learning_rate": 8.150899994824776e-06, "loss": 0.7437, "step": 8467 }, { "epoch": 1.5228805178459048, "grad_norm": 1.4116759300231934, "learning_rate": 8.15044775121771e-06, "loss": 0.7252, "step": 8468 }, { "epoch": 1.5230603254517665, "grad_norm": 1.5056636333465576, "learning_rate": 8.149995464863199e-06, "loss": 0.6841, "step": 8469 }, { "epoch": 1.5232401330576284, "grad_norm": 1.4580695629119873, "learning_rate": 8.149543135767382e-06, "loss": 0.6968, "step": 8470 }, { "epoch": 1.52341994066349, "grad_norm": 1.5304651260375977, "learning_rate": 8.149090763936398e-06, "loss": 0.7383, "step": 8471 }, { "epoch": 1.5235997482693517, "grad_norm": 1.4683308601379395, "learning_rate": 8.148638349376384e-06, "loss": 0.6941, "step": 8472 }, { "epoch": 1.5237795558752136, "grad_norm": 1.5082182884216309, "learning_rate": 8.148185892093479e-06, "loss": 0.73, "step": 8473 }, { "epoch": 1.5239593634810753, "grad_norm": 1.5918759107589722, "learning_rate": 8.147733392093823e-06, "loss": 0.7636, "step": 8474 }, { "epoch": 1.524139171086937, "grad_norm": 1.5108202695846558, "learning_rate": 8.147280849383555e-06, "loss": 0.7606, "step": 8475 }, { "epoch": 1.5243189786927989, "grad_norm": 1.1904302835464478, "learning_rate": 8.146828263968815e-06, "loss": 0.9585, "step": 8476 }, { "epoch": 1.5244987862986603, "grad_norm": 1.4668312072753906, "learning_rate": 8.146375635855745e-06, "loss": 0.7378, "step": 8477 }, { "epoch": 1.5246785939045222, "grad_norm": 1.091773271560669, "learning_rate": 8.145922965050486e-06, "loss": 0.9822, "step": 8478 }, { "epoch": 1.5248584015103839, "grad_norm": 1.5502450466156006, "learning_rate": 8.14547025155918e-06, "loss": 0.7476, "step": 8479 }, { "epoch": 1.5250382091162455, "grad_norm": 1.0815372467041016, "learning_rate": 8.145017495387972e-06, "loss": 0.9296, "step": 8480 }, { "epoch": 1.5252180167221074, "grad_norm": 1.4254212379455566, "learning_rate": 8.144564696543e-06, "loss": 0.7532, "step": 8481 }, { "epoch": 1.525397824327969, "grad_norm": 1.4541391134262085, "learning_rate": 8.144111855030413e-06, "loss": 0.6914, "step": 8482 }, { "epoch": 1.5255776319338308, "grad_norm": 1.467124581336975, "learning_rate": 8.143658970856353e-06, "loss": 0.8182, "step": 8483 }, { "epoch": 1.5257574395396927, "grad_norm": 1.625185251235962, "learning_rate": 8.143206044026968e-06, "loss": 0.7366, "step": 8484 }, { "epoch": 1.5259372471455541, "grad_norm": 1.4824634790420532, "learning_rate": 8.142753074548397e-06, "loss": 0.6943, "step": 8485 }, { "epoch": 1.526117054751416, "grad_norm": 1.0665478706359863, "learning_rate": 8.142300062426794e-06, "loss": 0.9466, "step": 8486 }, { "epoch": 1.5262968623572777, "grad_norm": 1.549742579460144, "learning_rate": 8.1418470076683e-06, "loss": 0.7628, "step": 8487 }, { "epoch": 1.5264766699631394, "grad_norm": 1.463340401649475, "learning_rate": 8.141393910279067e-06, "loss": 0.7949, "step": 8488 }, { "epoch": 1.5266564775690012, "grad_norm": 1.6051695346832275, "learning_rate": 8.140940770265238e-06, "loss": 0.7952, "step": 8489 }, { "epoch": 1.526836285174863, "grad_norm": 1.4712867736816406, "learning_rate": 8.140487587632965e-06, "loss": 0.7137, "step": 8490 }, { "epoch": 1.5270160927807246, "grad_norm": 1.5285087823867798, "learning_rate": 8.140034362388398e-06, "loss": 0.7521, "step": 8491 }, { "epoch": 1.5271959003865865, "grad_norm": 1.6474777460098267, "learning_rate": 8.139581094537685e-06, "loss": 0.6745, "step": 8492 }, { "epoch": 1.527375707992448, "grad_norm": 1.4539128541946411, "learning_rate": 8.139127784086973e-06, "loss": 0.7063, "step": 8493 }, { "epoch": 1.5275555155983098, "grad_norm": 1.5138258934020996, "learning_rate": 8.138674431042417e-06, "loss": 0.7208, "step": 8494 }, { "epoch": 1.5277353232041715, "grad_norm": 1.5113030672073364, "learning_rate": 8.138221035410167e-06, "loss": 0.765, "step": 8495 }, { "epoch": 1.5279151308100332, "grad_norm": 1.5750751495361328, "learning_rate": 8.137767597196378e-06, "loss": 0.7333, "step": 8496 }, { "epoch": 1.528094938415895, "grad_norm": 1.388782024383545, "learning_rate": 8.137314116407198e-06, "loss": 0.6902, "step": 8497 }, { "epoch": 1.5282747460217567, "grad_norm": 1.5274863243103027, "learning_rate": 8.13686059304878e-06, "loss": 0.7868, "step": 8498 }, { "epoch": 1.5284545536276184, "grad_norm": 1.6108709573745728, "learning_rate": 8.136407027127282e-06, "loss": 0.7924, "step": 8499 }, { "epoch": 1.5286343612334803, "grad_norm": 1.72779381275177, "learning_rate": 8.135953418648858e-06, "loss": 0.7752, "step": 8500 }, { "epoch": 1.5286343612334803, "eval_loss": 0.7984726428985596, "eval_runtime": 154.8086, "eval_samples_per_second": 92.902, "eval_steps_per_second": 1.453, "step": 8500 }, { "epoch": 1.528814168839342, "grad_norm": 1.5350432395935059, "learning_rate": 8.135499767619657e-06, "loss": 0.7701, "step": 8501 }, { "epoch": 1.5289939764452036, "grad_norm": 1.4552233219146729, "learning_rate": 8.135046074045842e-06, "loss": 0.7699, "step": 8502 }, { "epoch": 1.5291737840510655, "grad_norm": 1.4117952585220337, "learning_rate": 8.134592337933562e-06, "loss": 0.7305, "step": 8503 }, { "epoch": 1.529353591656927, "grad_norm": 1.5830554962158203, "learning_rate": 8.134138559288978e-06, "loss": 0.7059, "step": 8504 }, { "epoch": 1.5295333992627889, "grad_norm": 1.5905461311340332, "learning_rate": 8.133684738118247e-06, "loss": 0.7819, "step": 8505 }, { "epoch": 1.5297132068686505, "grad_norm": 1.5128662586212158, "learning_rate": 8.133230874427525e-06, "loss": 0.7686, "step": 8506 }, { "epoch": 1.5298930144745122, "grad_norm": 1.5162192583084106, "learning_rate": 8.132776968222973e-06, "loss": 0.7055, "step": 8507 }, { "epoch": 1.530072822080374, "grad_norm": 1.5238580703735352, "learning_rate": 8.132323019510746e-06, "loss": 0.7953, "step": 8508 }, { "epoch": 1.5302526296862358, "grad_norm": 1.4417535066604614, "learning_rate": 8.131869028297009e-06, "loss": 0.7284, "step": 8509 }, { "epoch": 1.5304324372920974, "grad_norm": 1.3956778049468994, "learning_rate": 8.131414994587914e-06, "loss": 0.7354, "step": 8510 }, { "epoch": 1.5306122448979593, "grad_norm": 1.5608775615692139, "learning_rate": 8.13096091838963e-06, "loss": 0.7462, "step": 8511 }, { "epoch": 1.5307920525038208, "grad_norm": 1.5587996244430542, "learning_rate": 8.130506799708313e-06, "loss": 0.7832, "step": 8512 }, { "epoch": 1.5309718601096827, "grad_norm": 1.5530003309249878, "learning_rate": 8.130052638550127e-06, "loss": 0.7147, "step": 8513 }, { "epoch": 1.5311516677155443, "grad_norm": 1.5945590734481812, "learning_rate": 8.129598434921234e-06, "loss": 0.7463, "step": 8514 }, { "epoch": 1.531331475321406, "grad_norm": 1.1612056493759155, "learning_rate": 8.129144188827795e-06, "loss": 0.9185, "step": 8515 }, { "epoch": 1.531511282927268, "grad_norm": 1.554211139678955, "learning_rate": 8.128689900275977e-06, "loss": 0.682, "step": 8516 }, { "epoch": 1.5316910905331296, "grad_norm": 1.5640379190444946, "learning_rate": 8.12823556927194e-06, "loss": 0.7727, "step": 8517 }, { "epoch": 1.5318708981389912, "grad_norm": 1.4588160514831543, "learning_rate": 8.127781195821854e-06, "loss": 0.7271, "step": 8518 }, { "epoch": 1.5320507057448531, "grad_norm": 1.4306409358978271, "learning_rate": 8.12732677993188e-06, "loss": 0.7209, "step": 8519 }, { "epoch": 1.5322305133507146, "grad_norm": 1.5507616996765137, "learning_rate": 8.126872321608185e-06, "loss": 0.7907, "step": 8520 }, { "epoch": 1.5324103209565765, "grad_norm": 1.5848512649536133, "learning_rate": 8.126417820856936e-06, "loss": 0.8344, "step": 8521 }, { "epoch": 1.5325901285624381, "grad_norm": 1.4085978269577026, "learning_rate": 8.125963277684297e-06, "loss": 0.6508, "step": 8522 }, { "epoch": 1.5327699361682998, "grad_norm": 1.431455135345459, "learning_rate": 8.125508692096442e-06, "loss": 0.6896, "step": 8523 }, { "epoch": 1.5329497437741617, "grad_norm": 1.4935392141342163, "learning_rate": 8.125054064099532e-06, "loss": 0.7651, "step": 8524 }, { "epoch": 1.5331295513800234, "grad_norm": 1.4901527166366577, "learning_rate": 8.12459939369974e-06, "loss": 0.8242, "step": 8525 }, { "epoch": 1.533309358985885, "grad_norm": 1.5324803590774536, "learning_rate": 8.124144680903235e-06, "loss": 0.745, "step": 8526 }, { "epoch": 1.533489166591747, "grad_norm": 1.493272066116333, "learning_rate": 8.123689925716185e-06, "loss": 0.6922, "step": 8527 }, { "epoch": 1.5336689741976086, "grad_norm": 1.4531830549240112, "learning_rate": 8.123235128144761e-06, "loss": 0.7562, "step": 8528 }, { "epoch": 1.5338487818034703, "grad_norm": 1.4070762395858765, "learning_rate": 8.122780288195135e-06, "loss": 0.73, "step": 8529 }, { "epoch": 1.5340285894093322, "grad_norm": 1.5724347829818726, "learning_rate": 8.122325405873477e-06, "loss": 0.7207, "step": 8530 }, { "epoch": 1.5342083970151936, "grad_norm": 1.4750750064849854, "learning_rate": 8.121870481185964e-06, "loss": 0.7632, "step": 8531 }, { "epoch": 1.5343882046210555, "grad_norm": 1.5237958431243896, "learning_rate": 8.12141551413876e-06, "loss": 0.7616, "step": 8532 }, { "epoch": 1.5345680122269172, "grad_norm": 1.507068157196045, "learning_rate": 8.120960504738044e-06, "loss": 0.6859, "step": 8533 }, { "epoch": 1.5347478198327789, "grad_norm": 1.5234942436218262, "learning_rate": 8.120505452989991e-06, "loss": 0.7822, "step": 8534 }, { "epoch": 1.5349276274386408, "grad_norm": 1.5292339324951172, "learning_rate": 8.120050358900772e-06, "loss": 0.7596, "step": 8535 }, { "epoch": 1.5351074350445024, "grad_norm": 1.464382290840149, "learning_rate": 8.119595222476567e-06, "loss": 0.7282, "step": 8536 }, { "epoch": 1.535287242650364, "grad_norm": 1.582975149154663, "learning_rate": 8.119140043723544e-06, "loss": 0.7662, "step": 8537 }, { "epoch": 1.535467050256226, "grad_norm": 1.4744055271148682, "learning_rate": 8.118684822647884e-06, "loss": 0.7548, "step": 8538 }, { "epoch": 1.5356468578620874, "grad_norm": 1.5725677013397217, "learning_rate": 8.118229559255764e-06, "loss": 0.8412, "step": 8539 }, { "epoch": 1.5358266654679493, "grad_norm": 1.5772314071655273, "learning_rate": 8.11777425355336e-06, "loss": 0.7506, "step": 8540 }, { "epoch": 1.536006473073811, "grad_norm": 1.7405625581741333, "learning_rate": 8.117318905546851e-06, "loss": 0.6956, "step": 8541 }, { "epoch": 1.5361862806796727, "grad_norm": 1.5796531438827515, "learning_rate": 8.116863515242414e-06, "loss": 0.7249, "step": 8542 }, { "epoch": 1.5363660882855346, "grad_norm": 1.165842890739441, "learning_rate": 8.11640808264623e-06, "loss": 0.93, "step": 8543 }, { "epoch": 1.5365458958913962, "grad_norm": 1.5703779458999634, "learning_rate": 8.115952607764476e-06, "loss": 0.7241, "step": 8544 }, { "epoch": 1.536725703497258, "grad_norm": 1.5084234476089478, "learning_rate": 8.115497090603337e-06, "loss": 0.7032, "step": 8545 }, { "epoch": 1.5369055111031198, "grad_norm": 1.4472323656082153, "learning_rate": 8.115041531168988e-06, "loss": 0.8342, "step": 8546 }, { "epoch": 1.5370853187089812, "grad_norm": 1.4947946071624756, "learning_rate": 8.114585929467612e-06, "loss": 0.7598, "step": 8547 }, { "epoch": 1.5372651263148431, "grad_norm": 1.452640175819397, "learning_rate": 8.114130285505392e-06, "loss": 0.7414, "step": 8548 }, { "epoch": 1.5374449339207048, "grad_norm": 1.466940999031067, "learning_rate": 8.11367459928851e-06, "loss": 0.6854, "step": 8549 }, { "epoch": 1.5376247415265665, "grad_norm": 1.4872734546661377, "learning_rate": 8.11321887082315e-06, "loss": 0.757, "step": 8550 }, { "epoch": 1.5378045491324284, "grad_norm": 1.3942735195159912, "learning_rate": 8.112763100115495e-06, "loss": 0.7586, "step": 8551 }, { "epoch": 1.53798435673829, "grad_norm": 1.1726548671722412, "learning_rate": 8.11230728717173e-06, "loss": 0.9445, "step": 8552 }, { "epoch": 1.5381641643441517, "grad_norm": 1.722454309463501, "learning_rate": 8.111851431998037e-06, "loss": 0.6512, "step": 8553 }, { "epoch": 1.5383439719500136, "grad_norm": 1.530722975730896, "learning_rate": 8.111395534600604e-06, "loss": 0.8026, "step": 8554 }, { "epoch": 1.538523779555875, "grad_norm": 1.3308885097503662, "learning_rate": 8.110939594985616e-06, "loss": 0.7089, "step": 8555 }, { "epoch": 1.538703587161737, "grad_norm": 1.4369744062423706, "learning_rate": 8.11048361315926e-06, "loss": 0.7073, "step": 8556 }, { "epoch": 1.5388833947675988, "grad_norm": 1.4805725812911987, "learning_rate": 8.110027589127723e-06, "loss": 0.7661, "step": 8557 }, { "epoch": 1.5390632023734603, "grad_norm": 1.3590878248214722, "learning_rate": 8.109571522897191e-06, "loss": 0.7102, "step": 8558 }, { "epoch": 1.5392430099793222, "grad_norm": 1.784498691558838, "learning_rate": 8.109115414473854e-06, "loss": 0.7918, "step": 8559 }, { "epoch": 1.5394228175851838, "grad_norm": 1.6031744480133057, "learning_rate": 8.108659263863901e-06, "loss": 0.7281, "step": 8560 }, { "epoch": 1.5396026251910455, "grad_norm": 1.8035837411880493, "learning_rate": 8.108203071073521e-06, "loss": 0.8161, "step": 8561 }, { "epoch": 1.5397824327969074, "grad_norm": 1.0347142219543457, "learning_rate": 8.107746836108903e-06, "loss": 0.8759, "step": 8562 }, { "epoch": 1.539962240402769, "grad_norm": 1.4888298511505127, "learning_rate": 8.10729055897624e-06, "loss": 0.7134, "step": 8563 }, { "epoch": 1.5401420480086307, "grad_norm": 1.3700708150863647, "learning_rate": 8.10683423968172e-06, "loss": 0.7299, "step": 8564 }, { "epoch": 1.5403218556144926, "grad_norm": 1.3889375925064087, "learning_rate": 8.106377878231535e-06, "loss": 0.733, "step": 8565 }, { "epoch": 1.540501663220354, "grad_norm": 1.4934570789337158, "learning_rate": 8.105921474631878e-06, "loss": 0.7203, "step": 8566 }, { "epoch": 1.540681470826216, "grad_norm": 1.4613654613494873, "learning_rate": 8.105465028888946e-06, "loss": 0.685, "step": 8567 }, { "epoch": 1.5408612784320777, "grad_norm": 1.148238182067871, "learning_rate": 8.105008541008923e-06, "loss": 0.9598, "step": 8568 }, { "epoch": 1.5410410860379393, "grad_norm": 1.2717071771621704, "learning_rate": 8.104552010998012e-06, "loss": 1.0009, "step": 8569 }, { "epoch": 1.5412208936438012, "grad_norm": 1.114931344985962, "learning_rate": 8.104095438862402e-06, "loss": 0.948, "step": 8570 }, { "epoch": 1.5414007012496629, "grad_norm": 1.5446428060531616, "learning_rate": 8.10363882460829e-06, "loss": 0.7749, "step": 8571 }, { "epoch": 1.5415805088555246, "grad_norm": 1.4236423969268799, "learning_rate": 8.103182168241873e-06, "loss": 0.6735, "step": 8572 }, { "epoch": 1.5417603164613864, "grad_norm": 1.5358927249908447, "learning_rate": 8.102725469769346e-06, "loss": 0.7549, "step": 8573 }, { "epoch": 1.541940124067248, "grad_norm": 1.4940307140350342, "learning_rate": 8.102268729196903e-06, "loss": 0.8007, "step": 8574 }, { "epoch": 1.5421199316731098, "grad_norm": 1.5312836170196533, "learning_rate": 8.101811946530746e-06, "loss": 0.7359, "step": 8575 }, { "epoch": 1.5422997392789715, "grad_norm": 1.4959672689437866, "learning_rate": 8.10135512177707e-06, "loss": 0.7507, "step": 8576 }, { "epoch": 1.5424795468848331, "grad_norm": 1.447467565536499, "learning_rate": 8.100898254942074e-06, "loss": 0.7115, "step": 8577 }, { "epoch": 1.542659354490695, "grad_norm": 1.1990796327590942, "learning_rate": 8.100441346031958e-06, "loss": 0.9741, "step": 8578 }, { "epoch": 1.5428391620965567, "grad_norm": 1.4348548650741577, "learning_rate": 8.099984395052922e-06, "loss": 0.7741, "step": 8579 }, { "epoch": 1.5430189697024184, "grad_norm": 1.1353061199188232, "learning_rate": 8.099527402011164e-06, "loss": 0.9575, "step": 8580 }, { "epoch": 1.5431987773082803, "grad_norm": 1.4042229652404785, "learning_rate": 8.099070366912887e-06, "loss": 0.6974, "step": 8581 }, { "epoch": 1.5433785849141417, "grad_norm": 1.5053882598876953, "learning_rate": 8.09861328976429e-06, "loss": 0.8127, "step": 8582 }, { "epoch": 1.5435583925200036, "grad_norm": 1.5041577816009521, "learning_rate": 8.09815617057158e-06, "loss": 0.8074, "step": 8583 }, { "epoch": 1.5437382001258655, "grad_norm": 1.6125422716140747, "learning_rate": 8.097699009340953e-06, "loss": 0.7522, "step": 8584 }, { "epoch": 1.543918007731727, "grad_norm": 1.4777419567108154, "learning_rate": 8.097241806078616e-06, "loss": 0.7537, "step": 8585 }, { "epoch": 1.5440978153375888, "grad_norm": 1.466025948524475, "learning_rate": 8.096784560790771e-06, "loss": 0.7533, "step": 8586 }, { "epoch": 1.5442776229434505, "grad_norm": 1.4675942659378052, "learning_rate": 8.096327273483625e-06, "loss": 0.6972, "step": 8587 }, { "epoch": 1.5444574305493122, "grad_norm": 1.4250106811523438, "learning_rate": 8.095869944163378e-06, "loss": 0.7227, "step": 8588 }, { "epoch": 1.544637238155174, "grad_norm": 1.6212328672409058, "learning_rate": 8.095412572836239e-06, "loss": 0.81, "step": 8589 }, { "epoch": 1.5448170457610357, "grad_norm": 1.4486894607543945, "learning_rate": 8.094955159508413e-06, "loss": 0.7678, "step": 8590 }, { "epoch": 1.5449968533668974, "grad_norm": 1.5419749021530151, "learning_rate": 8.094497704186106e-06, "loss": 0.7043, "step": 8591 }, { "epoch": 1.5451766609727593, "grad_norm": 1.505533218383789, "learning_rate": 8.094040206875526e-06, "loss": 0.7826, "step": 8592 }, { "epoch": 1.5453564685786207, "grad_norm": 1.4239193201065063, "learning_rate": 8.09358266758288e-06, "loss": 0.7497, "step": 8593 }, { "epoch": 1.5455362761844826, "grad_norm": 1.4843353033065796, "learning_rate": 8.093125086314377e-06, "loss": 0.7384, "step": 8594 }, { "epoch": 1.5457160837903443, "grad_norm": 1.42557954788208, "learning_rate": 8.092667463076225e-06, "loss": 0.7564, "step": 8595 }, { "epoch": 1.545895891396206, "grad_norm": 1.593675971031189, "learning_rate": 8.092209797874634e-06, "loss": 0.6956, "step": 8596 }, { "epoch": 1.5460756990020679, "grad_norm": 1.6308513879776, "learning_rate": 8.091752090715812e-06, "loss": 0.7721, "step": 8597 }, { "epoch": 1.5462555066079295, "grad_norm": 2.069815158843994, "learning_rate": 8.091294341605974e-06, "loss": 0.684, "step": 8598 }, { "epoch": 1.5464353142137912, "grad_norm": 1.4489529132843018, "learning_rate": 8.090836550551325e-06, "loss": 0.7217, "step": 8599 }, { "epoch": 1.546615121819653, "grad_norm": 3.001441478729248, "learning_rate": 8.090378717558079e-06, "loss": 0.7052, "step": 8600 }, { "epoch": 1.5467949294255146, "grad_norm": 1.5155798196792603, "learning_rate": 8.089920842632452e-06, "loss": 0.7711, "step": 8601 }, { "epoch": 1.5469747370313764, "grad_norm": 1.5122478008270264, "learning_rate": 8.08946292578065e-06, "loss": 0.7477, "step": 8602 }, { "epoch": 1.5471545446372381, "grad_norm": 1.6503950357437134, "learning_rate": 8.089004967008894e-06, "loss": 0.7739, "step": 8603 }, { "epoch": 1.5473343522430998, "grad_norm": 1.5572798252105713, "learning_rate": 8.088546966323389e-06, "loss": 0.7242, "step": 8604 }, { "epoch": 1.5475141598489617, "grad_norm": 1.5411580801010132, "learning_rate": 8.088088923730358e-06, "loss": 0.7733, "step": 8605 }, { "epoch": 1.5476939674548233, "grad_norm": 1.2255561351776123, "learning_rate": 8.087630839236011e-06, "loss": 0.9999, "step": 8606 }, { "epoch": 1.547873775060685, "grad_norm": 1.565306544303894, "learning_rate": 8.087172712846565e-06, "loss": 0.7187, "step": 8607 }, { "epoch": 1.548053582666547, "grad_norm": 1.439913034439087, "learning_rate": 8.086714544568236e-06, "loss": 0.737, "step": 8608 }, { "epoch": 1.5482333902724084, "grad_norm": 1.5056966543197632, "learning_rate": 8.086256334407241e-06, "loss": 0.7381, "step": 8609 }, { "epoch": 1.5484131978782703, "grad_norm": 1.401288390159607, "learning_rate": 8.085798082369796e-06, "loss": 0.7243, "step": 8610 }, { "epoch": 1.548593005484132, "grad_norm": 1.596903920173645, "learning_rate": 8.085339788462122e-06, "loss": 0.684, "step": 8611 }, { "epoch": 1.5487728130899936, "grad_norm": 1.4420371055603027, "learning_rate": 8.084881452690434e-06, "loss": 0.6931, "step": 8612 }, { "epoch": 1.5489526206958555, "grad_norm": 1.501190423965454, "learning_rate": 8.084423075060952e-06, "loss": 0.6796, "step": 8613 }, { "epoch": 1.5491324283017172, "grad_norm": 1.5064700841903687, "learning_rate": 8.083964655579898e-06, "loss": 0.7958, "step": 8614 }, { "epoch": 1.5493122359075788, "grad_norm": 1.406835913658142, "learning_rate": 8.083506194253489e-06, "loss": 0.7743, "step": 8615 }, { "epoch": 1.5494920435134407, "grad_norm": 1.0838477611541748, "learning_rate": 8.083047691087948e-06, "loss": 0.9649, "step": 8616 }, { "epoch": 1.5496718511193024, "grad_norm": 1.5437803268432617, "learning_rate": 8.082589146089495e-06, "loss": 0.7895, "step": 8617 }, { "epoch": 1.549851658725164, "grad_norm": 1.519351601600647, "learning_rate": 8.08213055926435e-06, "loss": 0.7746, "step": 8618 }, { "epoch": 1.550031466331026, "grad_norm": 1.4338666200637817, "learning_rate": 8.08167193061874e-06, "loss": 0.6838, "step": 8619 }, { "epoch": 1.5502112739368874, "grad_norm": 1.5592339038848877, "learning_rate": 8.081213260158882e-06, "loss": 0.762, "step": 8620 }, { "epoch": 1.5503910815427493, "grad_norm": 1.5737298727035522, "learning_rate": 8.080754547891007e-06, "loss": 0.8201, "step": 8621 }, { "epoch": 1.550570889148611, "grad_norm": 1.5664985179901123, "learning_rate": 8.080295793821334e-06, "loss": 0.744, "step": 8622 }, { "epoch": 1.5507506967544726, "grad_norm": 1.3091192245483398, "learning_rate": 8.079836997956087e-06, "loss": 0.9378, "step": 8623 }, { "epoch": 1.5509305043603345, "grad_norm": 1.8267310857772827, "learning_rate": 8.079378160301494e-06, "loss": 0.7345, "step": 8624 }, { "epoch": 1.5511103119661962, "grad_norm": 1.4202367067337036, "learning_rate": 8.078919280863783e-06, "loss": 0.7654, "step": 8625 }, { "epoch": 1.5512901195720579, "grad_norm": 1.4806536436080933, "learning_rate": 8.078460359649173e-06, "loss": 0.6778, "step": 8626 }, { "epoch": 1.5514699271779198, "grad_norm": 1.3884660005569458, "learning_rate": 8.078001396663897e-06, "loss": 0.7337, "step": 8627 }, { "epoch": 1.5516497347837812, "grad_norm": 1.125190258026123, "learning_rate": 8.077542391914181e-06, "loss": 0.8928, "step": 8628 }, { "epoch": 1.551829542389643, "grad_norm": 1.4990310668945312, "learning_rate": 8.077083345406252e-06, "loss": 0.7827, "step": 8629 }, { "epoch": 1.5520093499955048, "grad_norm": 1.6951239109039307, "learning_rate": 8.076624257146342e-06, "loss": 0.7775, "step": 8630 }, { "epoch": 1.5521891576013664, "grad_norm": 1.5450156927108765, "learning_rate": 8.076165127140675e-06, "loss": 0.7379, "step": 8631 }, { "epoch": 1.5523689652072283, "grad_norm": 1.5748436450958252, "learning_rate": 8.075705955395485e-06, "loss": 0.7391, "step": 8632 }, { "epoch": 1.55254877281309, "grad_norm": 0.9802360534667969, "learning_rate": 8.075246741917e-06, "loss": 0.9273, "step": 8633 }, { "epoch": 1.5527285804189517, "grad_norm": 1.4699668884277344, "learning_rate": 8.074787486711453e-06, "loss": 0.7097, "step": 8634 }, { "epoch": 1.5529083880248136, "grad_norm": 1.9028979539871216, "learning_rate": 8.074328189785072e-06, "loss": 0.7228, "step": 8635 }, { "epoch": 1.553088195630675, "grad_norm": 1.441287636756897, "learning_rate": 8.073868851144094e-06, "loss": 0.7467, "step": 8636 }, { "epoch": 1.553268003236537, "grad_norm": 1.4754940271377563, "learning_rate": 8.073409470794748e-06, "loss": 0.7225, "step": 8637 }, { "epoch": 1.5534478108423986, "grad_norm": 1.5343587398529053, "learning_rate": 8.072950048743269e-06, "loss": 0.7834, "step": 8638 }, { "epoch": 1.5536276184482603, "grad_norm": 1.4336411952972412, "learning_rate": 8.072490584995889e-06, "loss": 0.7403, "step": 8639 }, { "epoch": 1.5538074260541221, "grad_norm": 1.3941172361373901, "learning_rate": 8.072031079558845e-06, "loss": 0.7329, "step": 8640 }, { "epoch": 1.5539872336599838, "grad_norm": 1.5804715156555176, "learning_rate": 8.071571532438366e-06, "loss": 0.7793, "step": 8641 }, { "epoch": 1.5541670412658455, "grad_norm": 1.5815428495407104, "learning_rate": 8.071111943640697e-06, "loss": 0.682, "step": 8642 }, { "epoch": 1.5543468488717074, "grad_norm": 1.5035288333892822, "learning_rate": 8.070652313172064e-06, "loss": 0.7494, "step": 8643 }, { "epoch": 1.554526656477569, "grad_norm": 1.111753225326538, "learning_rate": 8.070192641038713e-06, "loss": 0.9421, "step": 8644 }, { "epoch": 1.5547064640834307, "grad_norm": 1.5692129135131836, "learning_rate": 8.069732927246872e-06, "loss": 0.8206, "step": 8645 }, { "epoch": 1.5548862716892926, "grad_norm": 1.4541195631027222, "learning_rate": 8.069273171802785e-06, "loss": 0.7468, "step": 8646 }, { "epoch": 1.555066079295154, "grad_norm": 1.5395169258117676, "learning_rate": 8.068813374712689e-06, "loss": 0.7507, "step": 8647 }, { "epoch": 1.555245886901016, "grad_norm": 1.4109464883804321, "learning_rate": 8.06835353598282e-06, "loss": 0.7844, "step": 8648 }, { "epoch": 1.5554256945068776, "grad_norm": 1.438696265220642, "learning_rate": 8.06789365561942e-06, "loss": 0.7292, "step": 8649 }, { "epoch": 1.5556055021127393, "grad_norm": 1.4625496864318848, "learning_rate": 8.067433733628731e-06, "loss": 0.7566, "step": 8650 }, { "epoch": 1.5557853097186012, "grad_norm": 1.1614469289779663, "learning_rate": 8.06697377001699e-06, "loss": 0.9044, "step": 8651 }, { "epoch": 1.5559651173244629, "grad_norm": 1.5145962238311768, "learning_rate": 8.06651376479044e-06, "loss": 0.8074, "step": 8652 }, { "epoch": 1.5561449249303245, "grad_norm": 1.5023338794708252, "learning_rate": 8.06605371795532e-06, "loss": 0.6952, "step": 8653 }, { "epoch": 1.5563247325361864, "grad_norm": 1.5672887563705444, "learning_rate": 8.065593629517875e-06, "loss": 0.7422, "step": 8654 }, { "epoch": 1.5565045401420479, "grad_norm": 1.1128630638122559, "learning_rate": 8.065133499484347e-06, "loss": 0.9446, "step": 8655 }, { "epoch": 1.5566843477479098, "grad_norm": 1.1158406734466553, "learning_rate": 8.064673327860979e-06, "loss": 0.9373, "step": 8656 }, { "epoch": 1.5568641553537714, "grad_norm": 1.6199836730957031, "learning_rate": 8.064213114654016e-06, "loss": 0.7416, "step": 8657 }, { "epoch": 1.557043962959633, "grad_norm": 1.616571307182312, "learning_rate": 8.0637528598697e-06, "loss": 0.7831, "step": 8658 }, { "epoch": 1.557223770565495, "grad_norm": 2.855882406234741, "learning_rate": 8.063292563514278e-06, "loss": 0.7652, "step": 8659 }, { "epoch": 1.5574035781713567, "grad_norm": 1.4624760150909424, "learning_rate": 8.062832225593998e-06, "loss": 0.7274, "step": 8660 }, { "epoch": 1.5575833857772183, "grad_norm": 1.5021133422851562, "learning_rate": 8.0623718461151e-06, "loss": 0.7546, "step": 8661 }, { "epoch": 1.5577631933830802, "grad_norm": 1.4755748510360718, "learning_rate": 8.061911425083837e-06, "loss": 0.7663, "step": 8662 }, { "epoch": 1.5579430009889417, "grad_norm": 1.5064666271209717, "learning_rate": 8.061450962506452e-06, "loss": 0.6767, "step": 8663 }, { "epoch": 1.5581228085948036, "grad_norm": 1.5502697229385376, "learning_rate": 8.060990458389195e-06, "loss": 0.7389, "step": 8664 }, { "epoch": 1.5583026162006652, "grad_norm": 1.48782479763031, "learning_rate": 8.060529912738316e-06, "loss": 0.7479, "step": 8665 }, { "epoch": 1.558482423806527, "grad_norm": 1.405401587486267, "learning_rate": 8.060069325560059e-06, "loss": 0.7126, "step": 8666 }, { "epoch": 1.5586622314123888, "grad_norm": 1.4503676891326904, "learning_rate": 8.059608696860677e-06, "loss": 0.7553, "step": 8667 }, { "epoch": 1.5588420390182505, "grad_norm": 1.4190680980682373, "learning_rate": 8.05914802664642e-06, "loss": 0.7179, "step": 8668 }, { "epoch": 1.5590218466241121, "grad_norm": 1.427446961402893, "learning_rate": 8.058687314923539e-06, "loss": 0.7844, "step": 8669 }, { "epoch": 1.559201654229974, "grad_norm": 1.6223288774490356, "learning_rate": 8.058226561698284e-06, "loss": 0.7624, "step": 8670 }, { "epoch": 1.5593814618358357, "grad_norm": 1.4632782936096191, "learning_rate": 8.057765766976906e-06, "loss": 0.6665, "step": 8671 }, { "epoch": 1.5595612694416974, "grad_norm": 1.4768781661987305, "learning_rate": 8.057304930765662e-06, "loss": 0.6668, "step": 8672 }, { "epoch": 1.5597410770475593, "grad_norm": 1.606993317604065, "learning_rate": 8.056844053070798e-06, "loss": 0.7553, "step": 8673 }, { "epoch": 1.5599208846534207, "grad_norm": 1.4613302946090698, "learning_rate": 8.056383133898573e-06, "loss": 0.7534, "step": 8674 }, { "epoch": 1.5601006922592826, "grad_norm": 1.557913064956665, "learning_rate": 8.05592217325524e-06, "loss": 0.7204, "step": 8675 }, { "epoch": 1.5602804998651443, "grad_norm": 1.4515421390533447, "learning_rate": 8.055461171147052e-06, "loss": 0.7466, "step": 8676 }, { "epoch": 1.560460307471006, "grad_norm": 1.4821760654449463, "learning_rate": 8.055000127580265e-06, "loss": 0.7595, "step": 8677 }, { "epoch": 1.5606401150768678, "grad_norm": 1.8042176961898804, "learning_rate": 8.054539042561136e-06, "loss": 0.8177, "step": 8678 }, { "epoch": 1.5608199226827295, "grad_norm": 1.4252628087997437, "learning_rate": 8.054077916095918e-06, "loss": 0.7262, "step": 8679 }, { "epoch": 1.5609997302885912, "grad_norm": 1.8905291557312012, "learning_rate": 8.053616748190871e-06, "loss": 0.7582, "step": 8680 }, { "epoch": 1.561179537894453, "grad_norm": 1.4793277978897095, "learning_rate": 8.053155538852252e-06, "loss": 0.7074, "step": 8681 }, { "epoch": 1.5613593455003145, "grad_norm": 1.5079405307769775, "learning_rate": 8.052694288086317e-06, "loss": 0.8301, "step": 8682 }, { "epoch": 1.5615391531061764, "grad_norm": 1.8350807428359985, "learning_rate": 8.052232995899328e-06, "loss": 0.7063, "step": 8683 }, { "epoch": 1.561718960712038, "grad_norm": 1.117103099822998, "learning_rate": 8.051771662297542e-06, "loss": 0.9983, "step": 8684 }, { "epoch": 1.5618987683178998, "grad_norm": 1.4430114030838013, "learning_rate": 8.051310287287219e-06, "loss": 0.7402, "step": 8685 }, { "epoch": 1.5620785759237616, "grad_norm": 1.146020531654358, "learning_rate": 8.050848870874618e-06, "loss": 0.9551, "step": 8686 }, { "epoch": 1.5622583835296233, "grad_norm": 1.4668102264404297, "learning_rate": 8.050387413066e-06, "loss": 0.6931, "step": 8687 }, { "epoch": 1.562438191135485, "grad_norm": 1.625548005104065, "learning_rate": 8.04992591386763e-06, "loss": 0.7345, "step": 8688 }, { "epoch": 1.5626179987413469, "grad_norm": 1.452156662940979, "learning_rate": 8.049464373285768e-06, "loss": 0.6923, "step": 8689 }, { "epoch": 1.5627978063472083, "grad_norm": 1.5793133974075317, "learning_rate": 8.049002791326673e-06, "loss": 0.6781, "step": 8690 }, { "epoch": 1.5629776139530702, "grad_norm": 1.1750493049621582, "learning_rate": 8.048541167996611e-06, "loss": 0.937, "step": 8691 }, { "epoch": 1.563157421558932, "grad_norm": 1.6020584106445312, "learning_rate": 8.048079503301847e-06, "loss": 0.7681, "step": 8692 }, { "epoch": 1.5633372291647936, "grad_norm": 1.5235265493392944, "learning_rate": 8.04761779724864e-06, "loss": 0.7361, "step": 8693 }, { "epoch": 1.5635170367706555, "grad_norm": 1.4180220365524292, "learning_rate": 8.047156049843264e-06, "loss": 0.6579, "step": 8694 }, { "epoch": 1.5636968443765171, "grad_norm": 1.0412029027938843, "learning_rate": 8.046694261091974e-06, "loss": 0.9679, "step": 8695 }, { "epoch": 1.5638766519823788, "grad_norm": 1.014623999595642, "learning_rate": 8.046232431001042e-06, "loss": 0.9292, "step": 8696 }, { "epoch": 1.5640564595882407, "grad_norm": 1.0118415355682373, "learning_rate": 8.045770559576733e-06, "loss": 0.9494, "step": 8697 }, { "epoch": 1.5642362671941024, "grad_norm": 1.1647400856018066, "learning_rate": 8.045308646825317e-06, "loss": 0.9641, "step": 8698 }, { "epoch": 1.564416074799964, "grad_norm": 1.6523064374923706, "learning_rate": 8.044846692753054e-06, "loss": 0.7253, "step": 8699 }, { "epoch": 1.564595882405826, "grad_norm": 1.4542567729949951, "learning_rate": 8.044384697366218e-06, "loss": 0.7417, "step": 8700 }, { "epoch": 1.5647756900116874, "grad_norm": 1.735237956047058, "learning_rate": 8.043922660671077e-06, "loss": 0.7746, "step": 8701 }, { "epoch": 1.5649554976175493, "grad_norm": 1.547141671180725, "learning_rate": 8.043460582673899e-06, "loss": 0.7183, "step": 8702 }, { "epoch": 1.565135305223411, "grad_norm": 1.4619523286819458, "learning_rate": 8.042998463380955e-06, "loss": 0.7486, "step": 8703 }, { "epoch": 1.5653151128292726, "grad_norm": 1.4524792432785034, "learning_rate": 8.042536302798515e-06, "loss": 0.7208, "step": 8704 }, { "epoch": 1.5654949204351345, "grad_norm": 1.5331027507781982, "learning_rate": 8.042074100932849e-06, "loss": 0.7035, "step": 8705 }, { "epoch": 1.5656747280409962, "grad_norm": 1.6284047365188599, "learning_rate": 8.041611857790228e-06, "loss": 0.7466, "step": 8706 }, { "epoch": 1.5658545356468578, "grad_norm": 1.5143016576766968, "learning_rate": 8.041149573376928e-06, "loss": 0.7348, "step": 8707 }, { "epoch": 1.5660343432527197, "grad_norm": 1.62563955783844, "learning_rate": 8.040687247699215e-06, "loss": 0.908, "step": 8708 }, { "epoch": 1.5662141508585812, "grad_norm": 1.524194359779358, "learning_rate": 8.040224880763368e-06, "loss": 0.744, "step": 8709 }, { "epoch": 1.566393958464443, "grad_norm": 1.6662122011184692, "learning_rate": 8.039762472575658e-06, "loss": 0.7214, "step": 8710 }, { "epoch": 1.5665737660703047, "grad_norm": 1.4327242374420166, "learning_rate": 8.039300023142361e-06, "loss": 0.7536, "step": 8711 }, { "epoch": 1.5667535736761664, "grad_norm": 1.5340461730957031, "learning_rate": 8.038837532469749e-06, "loss": 0.7377, "step": 8712 }, { "epoch": 1.5669333812820283, "grad_norm": 1.5044984817504883, "learning_rate": 8.0383750005641e-06, "loss": 0.7241, "step": 8713 }, { "epoch": 1.56711318888789, "grad_norm": 1.1879345178604126, "learning_rate": 8.03791242743169e-06, "loss": 0.9828, "step": 8714 }, { "epoch": 1.5672929964937516, "grad_norm": 1.4346061944961548, "learning_rate": 8.03744981307879e-06, "loss": 0.6747, "step": 8715 }, { "epoch": 1.5674728040996135, "grad_norm": 1.889353632926941, "learning_rate": 8.036987157511686e-06, "loss": 0.7057, "step": 8716 }, { "epoch": 1.567652611705475, "grad_norm": 1.498970627784729, "learning_rate": 8.03652446073665e-06, "loss": 0.7237, "step": 8717 }, { "epoch": 1.5678324193113369, "grad_norm": 1.639015555381775, "learning_rate": 8.036061722759962e-06, "loss": 0.7152, "step": 8718 }, { "epoch": 1.5680122269171985, "grad_norm": 1.4703097343444824, "learning_rate": 8.0355989435879e-06, "loss": 0.695, "step": 8719 }, { "epoch": 1.5681920345230602, "grad_norm": 1.527480959892273, "learning_rate": 8.035136123226743e-06, "loss": 0.6764, "step": 8720 }, { "epoch": 1.5683718421289221, "grad_norm": 1.621395230293274, "learning_rate": 8.034673261682771e-06, "loss": 0.7023, "step": 8721 }, { "epoch": 1.5685516497347838, "grad_norm": 1.0704668760299683, "learning_rate": 8.034210358962266e-06, "loss": 0.9213, "step": 8722 }, { "epoch": 1.5687314573406455, "grad_norm": 1.1927095651626587, "learning_rate": 8.033747415071507e-06, "loss": 0.9596, "step": 8723 }, { "epoch": 1.5689112649465073, "grad_norm": 1.7445592880249023, "learning_rate": 8.033284430016775e-06, "loss": 0.6913, "step": 8724 }, { "epoch": 1.569091072552369, "grad_norm": 0.9443660378456116, "learning_rate": 8.032821403804355e-06, "loss": 0.9409, "step": 8725 }, { "epoch": 1.5692708801582307, "grad_norm": 1.5066627264022827, "learning_rate": 8.032358336440527e-06, "loss": 0.7533, "step": 8726 }, { "epoch": 1.5694506877640926, "grad_norm": 0.9928518533706665, "learning_rate": 8.031895227931575e-06, "loss": 0.9383, "step": 8727 }, { "epoch": 1.569630495369954, "grad_norm": 1.491765022277832, "learning_rate": 8.031432078283784e-06, "loss": 0.701, "step": 8728 }, { "epoch": 1.569810302975816, "grad_norm": 1.4297407865524292, "learning_rate": 8.030968887503437e-06, "loss": 0.7051, "step": 8729 }, { "epoch": 1.5699901105816776, "grad_norm": 1.5717971324920654, "learning_rate": 8.03050565559682e-06, "loss": 0.7517, "step": 8730 }, { "epoch": 1.5701699181875393, "grad_norm": 1.0498852729797363, "learning_rate": 8.030042382570217e-06, "loss": 0.9222, "step": 8731 }, { "epoch": 1.5703497257934012, "grad_norm": 1.5592827796936035, "learning_rate": 8.029579068429916e-06, "loss": 0.7399, "step": 8732 }, { "epoch": 1.5705295333992628, "grad_norm": 1.4268699884414673, "learning_rate": 8.029115713182199e-06, "loss": 0.6846, "step": 8733 }, { "epoch": 1.5707093410051245, "grad_norm": 1.4792640209197998, "learning_rate": 8.028652316833359e-06, "loss": 0.7406, "step": 8734 }, { "epoch": 1.5708891486109864, "grad_norm": 1.5092443227767944, "learning_rate": 8.02818887938968e-06, "loss": 0.8026, "step": 8735 }, { "epoch": 1.5710689562168478, "grad_norm": 1.4781733751296997, "learning_rate": 8.027725400857452e-06, "loss": 0.7386, "step": 8736 }, { "epoch": 1.5712487638227097, "grad_norm": 1.5722603797912598, "learning_rate": 8.027261881242963e-06, "loss": 0.7559, "step": 8737 }, { "epoch": 1.5714285714285714, "grad_norm": 1.1827070713043213, "learning_rate": 8.026798320552502e-06, "loss": 0.9074, "step": 8738 }, { "epoch": 1.571608379034433, "grad_norm": 1.4404164552688599, "learning_rate": 8.02633471879236e-06, "loss": 0.7365, "step": 8739 }, { "epoch": 1.571788186640295, "grad_norm": 1.8310155868530273, "learning_rate": 8.025871075968828e-06, "loss": 0.8169, "step": 8740 }, { "epoch": 1.5719679942461566, "grad_norm": 1.6054706573486328, "learning_rate": 8.025407392088194e-06, "loss": 0.7732, "step": 8741 }, { "epoch": 1.5721478018520183, "grad_norm": 1.6827088594436646, "learning_rate": 8.02494366715675e-06, "loss": 0.7559, "step": 8742 }, { "epoch": 1.5723276094578802, "grad_norm": 1.7744615077972412, "learning_rate": 8.024479901180792e-06, "loss": 0.7742, "step": 8743 }, { "epoch": 1.5725074170637416, "grad_norm": 1.6036243438720703, "learning_rate": 8.02401609416661e-06, "loss": 0.667, "step": 8744 }, { "epoch": 1.5726872246696035, "grad_norm": 1.4904181957244873, "learning_rate": 8.023552246120498e-06, "loss": 0.8116, "step": 8745 }, { "epoch": 1.5728670322754652, "grad_norm": 1.5990691184997559, "learning_rate": 8.023088357048748e-06, "loss": 0.7948, "step": 8746 }, { "epoch": 1.5730468398813269, "grad_norm": 1.7451928853988647, "learning_rate": 8.022624426957656e-06, "loss": 0.7665, "step": 8747 }, { "epoch": 1.5732266474871888, "grad_norm": 1.5427055358886719, "learning_rate": 8.022160455853516e-06, "loss": 0.7384, "step": 8748 }, { "epoch": 1.5734064550930504, "grad_norm": 1.387579083442688, "learning_rate": 8.021696443742627e-06, "loss": 0.7282, "step": 8749 }, { "epoch": 1.573586262698912, "grad_norm": 1.4970521926879883, "learning_rate": 8.02123239063128e-06, "loss": 0.7657, "step": 8750 }, { "epoch": 1.573766070304774, "grad_norm": 1.2780969142913818, "learning_rate": 8.02076829652577e-06, "loss": 0.9754, "step": 8751 }, { "epoch": 1.5739458779106357, "grad_norm": 1.6056421995162964, "learning_rate": 8.020304161432404e-06, "loss": 0.7771, "step": 8752 }, { "epoch": 1.5741256855164973, "grad_norm": 1.6887094974517822, "learning_rate": 8.019839985357472e-06, "loss": 0.7957, "step": 8753 }, { "epoch": 1.5743054931223592, "grad_norm": 1.479557752609253, "learning_rate": 8.019375768307272e-06, "loss": 0.6946, "step": 8754 }, { "epoch": 1.5744853007282207, "grad_norm": 1.3973908424377441, "learning_rate": 8.018911510288105e-06, "loss": 0.7014, "step": 8755 }, { "epoch": 1.5746651083340826, "grad_norm": 1.4366804361343384, "learning_rate": 8.018447211306271e-06, "loss": 0.7668, "step": 8756 }, { "epoch": 1.5748449159399442, "grad_norm": 1.3874272108078003, "learning_rate": 8.01798287136807e-06, "loss": 0.6833, "step": 8757 }, { "epoch": 1.575024723545806, "grad_norm": 1.4204775094985962, "learning_rate": 8.017518490479798e-06, "loss": 0.7658, "step": 8758 }, { "epoch": 1.5752045311516678, "grad_norm": 1.4855729341506958, "learning_rate": 8.017054068647762e-06, "loss": 0.712, "step": 8759 }, { "epoch": 1.5753843387575295, "grad_norm": 1.4508209228515625, "learning_rate": 8.016589605878263e-06, "loss": 0.7494, "step": 8760 }, { "epoch": 1.5755641463633911, "grad_norm": 1.479156255722046, "learning_rate": 8.016125102177599e-06, "loss": 0.7403, "step": 8761 }, { "epoch": 1.575743953969253, "grad_norm": 1.5775039196014404, "learning_rate": 8.015660557552074e-06, "loss": 0.7893, "step": 8762 }, { "epoch": 1.5759237615751145, "grad_norm": 1.1698061227798462, "learning_rate": 8.015195972007994e-06, "loss": 0.9675, "step": 8763 }, { "epoch": 1.5761035691809764, "grad_norm": 1.434113621711731, "learning_rate": 8.01473134555166e-06, "loss": 0.6675, "step": 8764 }, { "epoch": 1.576283376786838, "grad_norm": 1.4578056335449219, "learning_rate": 8.014266678189378e-06, "loss": 0.7147, "step": 8765 }, { "epoch": 1.5764631843926997, "grad_norm": 1.6694152355194092, "learning_rate": 8.01380196992745e-06, "loss": 0.7786, "step": 8766 }, { "epoch": 1.5766429919985616, "grad_norm": 1.388193964958191, "learning_rate": 8.013337220772186e-06, "loss": 0.7396, "step": 8767 }, { "epoch": 1.5768227996044233, "grad_norm": 1.4637837409973145, "learning_rate": 8.012872430729888e-06, "loss": 0.7576, "step": 8768 }, { "epoch": 1.577002607210285, "grad_norm": 1.5020959377288818, "learning_rate": 8.012407599806867e-06, "loss": 0.7396, "step": 8769 }, { "epoch": 1.5771824148161468, "grad_norm": 1.499991774559021, "learning_rate": 8.011942728009426e-06, "loss": 0.7532, "step": 8770 }, { "epoch": 1.5773622224220083, "grad_norm": 1.4343596696853638, "learning_rate": 8.011477815343876e-06, "loss": 0.7028, "step": 8771 }, { "epoch": 1.5775420300278702, "grad_norm": 1.6093720197677612, "learning_rate": 8.011012861816521e-06, "loss": 0.768, "step": 8772 }, { "epoch": 1.5777218376337319, "grad_norm": 1.5019099712371826, "learning_rate": 8.010547867433674e-06, "loss": 0.736, "step": 8773 }, { "epoch": 1.5779016452395935, "grad_norm": 1.460262417793274, "learning_rate": 8.010082832201641e-06, "loss": 0.7958, "step": 8774 }, { "epoch": 1.5780814528454554, "grad_norm": 1.117609977722168, "learning_rate": 8.009617756126736e-06, "loss": 0.9759, "step": 8775 }, { "epoch": 1.578261260451317, "grad_norm": 1.5073260068893433, "learning_rate": 8.009152639215265e-06, "loss": 0.7384, "step": 8776 }, { "epoch": 1.5784410680571788, "grad_norm": 1.5071333646774292, "learning_rate": 8.008687481473542e-06, "loss": 0.7079, "step": 8777 }, { "epoch": 1.5786208756630407, "grad_norm": 1.5385464429855347, "learning_rate": 8.008222282907879e-06, "loss": 0.7162, "step": 8778 }, { "epoch": 1.5788006832689023, "grad_norm": 1.7121493816375732, "learning_rate": 8.007757043524585e-06, "loss": 0.733, "step": 8779 }, { "epoch": 1.578980490874764, "grad_norm": 1.5277119874954224, "learning_rate": 8.007291763329974e-06, "loss": 0.7488, "step": 8780 }, { "epoch": 1.579160298480626, "grad_norm": 1.4773975610733032, "learning_rate": 8.006826442330362e-06, "loss": 0.6841, "step": 8781 }, { "epoch": 1.5793401060864873, "grad_norm": 1.6047852039337158, "learning_rate": 8.006361080532059e-06, "loss": 0.6798, "step": 8782 }, { "epoch": 1.5795199136923492, "grad_norm": 1.4620370864868164, "learning_rate": 8.00589567794138e-06, "loss": 0.7093, "step": 8783 }, { "epoch": 1.579699721298211, "grad_norm": 1.4467216730117798, "learning_rate": 8.005430234564643e-06, "loss": 0.6954, "step": 8784 }, { "epoch": 1.5798795289040726, "grad_norm": 1.4699428081512451, "learning_rate": 8.004964750408159e-06, "loss": 0.6666, "step": 8785 }, { "epoch": 1.5800593365099345, "grad_norm": 1.5144691467285156, "learning_rate": 8.004499225478248e-06, "loss": 0.759, "step": 8786 }, { "epoch": 1.5802391441157961, "grad_norm": 1.0573872327804565, "learning_rate": 8.004033659781225e-06, "loss": 0.9323, "step": 8787 }, { "epoch": 1.5804189517216578, "grad_norm": 1.4713534116744995, "learning_rate": 8.003568053323406e-06, "loss": 0.755, "step": 8788 }, { "epoch": 1.5805987593275197, "grad_norm": 1.5134857892990112, "learning_rate": 8.003102406111109e-06, "loss": 0.7466, "step": 8789 }, { "epoch": 1.5807785669333811, "grad_norm": 1.6988754272460938, "learning_rate": 8.002636718150654e-06, "loss": 0.686, "step": 8790 }, { "epoch": 1.580958374539243, "grad_norm": 1.5504682064056396, "learning_rate": 8.002170989448358e-06, "loss": 0.7867, "step": 8791 }, { "epoch": 1.5811381821451047, "grad_norm": 1.099808692932129, "learning_rate": 8.001705220010542e-06, "loss": 0.9442, "step": 8792 }, { "epoch": 1.5813179897509664, "grad_norm": 1.5467090606689453, "learning_rate": 8.001239409843524e-06, "loss": 0.6763, "step": 8793 }, { "epoch": 1.5814977973568283, "grad_norm": 1.511326789855957, "learning_rate": 8.000773558953626e-06, "loss": 0.6967, "step": 8794 }, { "epoch": 1.58167760496269, "grad_norm": 1.4919203519821167, "learning_rate": 8.000307667347167e-06, "loss": 0.6902, "step": 8795 }, { "epoch": 1.5818574125685516, "grad_norm": 1.1002625226974487, "learning_rate": 7.99984173503047e-06, "loss": 0.9957, "step": 8796 }, { "epoch": 1.5820372201744135, "grad_norm": 1.4919489622116089, "learning_rate": 7.999375762009859e-06, "loss": 0.7703, "step": 8797 }, { "epoch": 1.582217027780275, "grad_norm": 1.55800461769104, "learning_rate": 7.99890974829165e-06, "loss": 0.7295, "step": 8798 }, { "epoch": 1.5823968353861368, "grad_norm": 1.5100973844528198, "learning_rate": 7.998443693882174e-06, "loss": 0.7441, "step": 8799 }, { "epoch": 1.5825766429919985, "grad_norm": 1.4747943878173828, "learning_rate": 7.99797759878775e-06, "loss": 0.7696, "step": 8800 }, { "epoch": 1.5827564505978602, "grad_norm": 1.5456165075302124, "learning_rate": 7.997511463014705e-06, "loss": 0.6685, "step": 8801 }, { "epoch": 1.582936258203722, "grad_norm": 1.4121532440185547, "learning_rate": 7.997045286569362e-06, "loss": 0.7534, "step": 8802 }, { "epoch": 1.5831160658095838, "grad_norm": 1.1048874855041504, "learning_rate": 7.996579069458048e-06, "loss": 0.9821, "step": 8803 }, { "epoch": 1.5832958734154454, "grad_norm": 1.5363657474517822, "learning_rate": 7.996112811687086e-06, "loss": 0.6835, "step": 8804 }, { "epoch": 1.5834756810213073, "grad_norm": 1.4854568243026733, "learning_rate": 7.995646513262805e-06, "loss": 0.7712, "step": 8805 }, { "epoch": 1.583655488627169, "grad_norm": 1.4536688327789307, "learning_rate": 7.995180174191532e-06, "loss": 0.77, "step": 8806 }, { "epoch": 1.5838352962330307, "grad_norm": 1.5322380065917969, "learning_rate": 7.994713794479595e-06, "loss": 0.7698, "step": 8807 }, { "epoch": 1.5840151038388925, "grad_norm": 1.482103705406189, "learning_rate": 7.994247374133318e-06, "loss": 0.8204, "step": 8808 }, { "epoch": 1.584194911444754, "grad_norm": 1.5091776847839355, "learning_rate": 7.993780913159037e-06, "loss": 0.7486, "step": 8809 }, { "epoch": 1.5843747190506159, "grad_norm": 1.5051413774490356, "learning_rate": 7.993314411563075e-06, "loss": 0.7072, "step": 8810 }, { "epoch": 1.5845545266564776, "grad_norm": 1.5607846975326538, "learning_rate": 7.992847869351765e-06, "loss": 0.7224, "step": 8811 }, { "epoch": 1.5847343342623392, "grad_norm": 1.3986866474151611, "learning_rate": 7.992381286531437e-06, "loss": 0.745, "step": 8812 }, { "epoch": 1.5849141418682011, "grad_norm": 1.6147295236587524, "learning_rate": 7.99191466310842e-06, "loss": 0.7512, "step": 8813 }, { "epoch": 1.5850939494740628, "grad_norm": 1.429916501045227, "learning_rate": 7.991447999089047e-06, "loss": 0.739, "step": 8814 }, { "epoch": 1.5852737570799245, "grad_norm": 1.469426155090332, "learning_rate": 7.990981294479652e-06, "loss": 0.6624, "step": 8815 }, { "epoch": 1.5854535646857864, "grad_norm": 1.5435676574707031, "learning_rate": 7.990514549286562e-06, "loss": 0.7388, "step": 8816 }, { "epoch": 1.5856333722916478, "grad_norm": 1.5345431566238403, "learning_rate": 7.990047763516115e-06, "loss": 0.7833, "step": 8817 }, { "epoch": 1.5858131798975097, "grad_norm": 1.4707050323486328, "learning_rate": 7.989580937174643e-06, "loss": 0.7109, "step": 8818 }, { "epoch": 1.5859929875033714, "grad_norm": 1.10646390914917, "learning_rate": 7.989114070268482e-06, "loss": 0.9267, "step": 8819 }, { "epoch": 1.586172795109233, "grad_norm": 1.4985052347183228, "learning_rate": 7.988647162803965e-06, "loss": 0.7704, "step": 8820 }, { "epoch": 1.586352602715095, "grad_norm": 1.5140259265899658, "learning_rate": 7.988180214787424e-06, "loss": 0.7226, "step": 8821 }, { "epoch": 1.5865324103209566, "grad_norm": 1.5277668237686157, "learning_rate": 7.987713226225202e-06, "loss": 0.7184, "step": 8822 }, { "epoch": 1.5867122179268183, "grad_norm": 1.4894498586654663, "learning_rate": 7.98724619712363e-06, "loss": 0.7786, "step": 8823 }, { "epoch": 1.5868920255326802, "grad_norm": 1.4990514516830444, "learning_rate": 7.986779127489049e-06, "loss": 0.7288, "step": 8824 }, { "epoch": 1.5870718331385416, "grad_norm": 1.4288569688796997, "learning_rate": 7.986312017327792e-06, "loss": 0.7325, "step": 8825 }, { "epoch": 1.5872516407444035, "grad_norm": 1.5407392978668213, "learning_rate": 7.9858448666462e-06, "loss": 0.7265, "step": 8826 }, { "epoch": 1.5874314483502652, "grad_norm": 1.530807375907898, "learning_rate": 7.98537767545061e-06, "loss": 0.7481, "step": 8827 }, { "epoch": 1.5876112559561268, "grad_norm": 1.6871848106384277, "learning_rate": 7.984910443747364e-06, "loss": 0.7646, "step": 8828 }, { "epoch": 1.5877910635619887, "grad_norm": 3.4434049129486084, "learning_rate": 7.9844431715428e-06, "loss": 0.7285, "step": 8829 }, { "epoch": 1.5879708711678504, "grad_norm": 1.7318507432937622, "learning_rate": 7.983975858843256e-06, "loss": 0.6778, "step": 8830 }, { "epoch": 1.588150678773712, "grad_norm": 1.7773617506027222, "learning_rate": 7.983508505655077e-06, "loss": 0.6948, "step": 8831 }, { "epoch": 1.588330486379574, "grad_norm": 1.4000630378723145, "learning_rate": 7.983041111984601e-06, "loss": 0.7038, "step": 8832 }, { "epoch": 1.5885102939854354, "grad_norm": 1.5652039051055908, "learning_rate": 7.982573677838172e-06, "loss": 0.691, "step": 8833 }, { "epoch": 1.5886901015912973, "grad_norm": 1.3834588527679443, "learning_rate": 7.982106203222131e-06, "loss": 0.6941, "step": 8834 }, { "epoch": 1.5888699091971592, "grad_norm": 1.4554625749588013, "learning_rate": 7.981638688142823e-06, "loss": 0.7613, "step": 8835 }, { "epoch": 1.5890497168030207, "grad_norm": 1.510589838027954, "learning_rate": 7.98117113260659e-06, "loss": 0.7341, "step": 8836 }, { "epoch": 1.5892295244088825, "grad_norm": 1.5595800876617432, "learning_rate": 7.980703536619776e-06, "loss": 0.7313, "step": 8837 }, { "epoch": 1.5894093320147442, "grad_norm": 1.4045436382293701, "learning_rate": 7.980235900188726e-06, "loss": 0.6815, "step": 8838 }, { "epoch": 1.5895891396206059, "grad_norm": 1.3786842823028564, "learning_rate": 7.979768223319786e-06, "loss": 0.7376, "step": 8839 }, { "epoch": 1.5897689472264678, "grad_norm": 1.4910467863082886, "learning_rate": 7.9793005060193e-06, "loss": 0.7573, "step": 8840 }, { "epoch": 1.5899487548323294, "grad_norm": 1.565023422241211, "learning_rate": 7.978832748293617e-06, "loss": 0.8463, "step": 8841 }, { "epoch": 1.5901285624381911, "grad_norm": 1.4986180067062378, "learning_rate": 7.97836495014908e-06, "loss": 0.7813, "step": 8842 }, { "epoch": 1.590308370044053, "grad_norm": 1.109261393547058, "learning_rate": 7.977897111592041e-06, "loss": 0.9237, "step": 8843 }, { "epoch": 1.5904881776499145, "grad_norm": 1.5318666696548462, "learning_rate": 7.977429232628844e-06, "loss": 0.7664, "step": 8844 }, { "epoch": 1.5906679852557764, "grad_norm": 1.5027052164077759, "learning_rate": 7.97696131326584e-06, "loss": 0.8143, "step": 8845 }, { "epoch": 1.590847792861638, "grad_norm": 1.8148410320281982, "learning_rate": 7.976493353509377e-06, "loss": 0.7926, "step": 8846 }, { "epoch": 1.5910276004674997, "grad_norm": 1.4722692966461182, "learning_rate": 7.976025353365804e-06, "loss": 0.7403, "step": 8847 }, { "epoch": 1.5912074080733616, "grad_norm": 1.5478289127349854, "learning_rate": 7.975557312841473e-06, "loss": 0.7241, "step": 8848 }, { "epoch": 1.5913872156792233, "grad_norm": 1.5526368618011475, "learning_rate": 7.975089231942731e-06, "loss": 0.6831, "step": 8849 }, { "epoch": 1.591567023285085, "grad_norm": 1.552110195159912, "learning_rate": 7.974621110675936e-06, "loss": 0.7319, "step": 8850 }, { "epoch": 1.5917468308909468, "grad_norm": 1.5379056930541992, "learning_rate": 7.974152949047433e-06, "loss": 0.6826, "step": 8851 }, { "epoch": 1.5919266384968083, "grad_norm": 1.4716404676437378, "learning_rate": 7.973684747063577e-06, "loss": 0.7815, "step": 8852 }, { "epoch": 1.5921064461026702, "grad_norm": 1.4628541469573975, "learning_rate": 7.973216504730722e-06, "loss": 0.6744, "step": 8853 }, { "epoch": 1.5922862537085318, "grad_norm": 1.6038498878479004, "learning_rate": 7.97274822205522e-06, "loss": 0.7201, "step": 8854 }, { "epoch": 1.5924660613143935, "grad_norm": 1.4482508897781372, "learning_rate": 7.972279899043424e-06, "loss": 0.7521, "step": 8855 }, { "epoch": 1.5926458689202554, "grad_norm": 1.2529566287994385, "learning_rate": 7.97181153570169e-06, "loss": 0.9185, "step": 8856 }, { "epoch": 1.592825676526117, "grad_norm": 1.4799591302871704, "learning_rate": 7.971343132036374e-06, "loss": 0.6517, "step": 8857 }, { "epoch": 1.5930054841319787, "grad_norm": 1.08322274684906, "learning_rate": 7.97087468805383e-06, "loss": 0.9425, "step": 8858 }, { "epoch": 1.5931852917378406, "grad_norm": 1.503521203994751, "learning_rate": 7.970406203760415e-06, "loss": 0.7176, "step": 8859 }, { "epoch": 1.593365099343702, "grad_norm": 1.58603835105896, "learning_rate": 7.969937679162485e-06, "loss": 0.7568, "step": 8860 }, { "epoch": 1.593544906949564, "grad_norm": 1.4993765354156494, "learning_rate": 7.969469114266399e-06, "loss": 0.7152, "step": 8861 }, { "epoch": 1.5937247145554259, "grad_norm": 1.5323556661605835, "learning_rate": 7.969000509078512e-06, "loss": 0.696, "step": 8862 }, { "epoch": 1.5939045221612873, "grad_norm": 1.5236122608184814, "learning_rate": 7.968531863605184e-06, "loss": 0.7525, "step": 8863 }, { "epoch": 1.5940843297671492, "grad_norm": 1.4347610473632812, "learning_rate": 7.968063177852775e-06, "loss": 0.6968, "step": 8864 }, { "epoch": 1.5942641373730109, "grad_norm": 1.5253392457962036, "learning_rate": 7.96759445182764e-06, "loss": 0.7016, "step": 8865 }, { "epoch": 1.5944439449788725, "grad_norm": 1.1614985466003418, "learning_rate": 7.967125685536145e-06, "loss": 0.9749, "step": 8866 }, { "epoch": 1.5946237525847344, "grad_norm": 1.414533019065857, "learning_rate": 7.966656878984647e-06, "loss": 0.6918, "step": 8867 }, { "epoch": 1.594803560190596, "grad_norm": 1.5323922634124756, "learning_rate": 7.966188032179507e-06, "loss": 0.7723, "step": 8868 }, { "epoch": 1.5949833677964578, "grad_norm": 1.4670579433441162, "learning_rate": 7.965719145127089e-06, "loss": 0.6393, "step": 8869 }, { "epoch": 1.5951631754023197, "grad_norm": 1.5052247047424316, "learning_rate": 7.965250217833753e-06, "loss": 0.7853, "step": 8870 }, { "epoch": 1.5953429830081811, "grad_norm": 1.4809520244598389, "learning_rate": 7.964781250305863e-06, "loss": 0.7243, "step": 8871 }, { "epoch": 1.595522790614043, "grad_norm": 1.0620635747909546, "learning_rate": 7.964312242549779e-06, "loss": 0.9399, "step": 8872 }, { "epoch": 1.5957025982199047, "grad_norm": 1.538450837135315, "learning_rate": 7.96384319457187e-06, "loss": 0.7812, "step": 8873 }, { "epoch": 1.5958824058257663, "grad_norm": 1.8770644664764404, "learning_rate": 7.963374106378496e-06, "loss": 0.7524, "step": 8874 }, { "epoch": 1.5960622134316282, "grad_norm": 1.638709306716919, "learning_rate": 7.962904977976027e-06, "loss": 0.7792, "step": 8875 }, { "epoch": 1.59624202103749, "grad_norm": 1.5659608840942383, "learning_rate": 7.962435809370823e-06, "loss": 0.7556, "step": 8876 }, { "epoch": 1.5964218286433516, "grad_norm": 1.451534628868103, "learning_rate": 7.961966600569251e-06, "loss": 0.6926, "step": 8877 }, { "epoch": 1.5966016362492135, "grad_norm": 1.5059257745742798, "learning_rate": 7.96149735157768e-06, "loss": 0.6853, "step": 8878 }, { "epoch": 1.596781443855075, "grad_norm": 1.433655023574829, "learning_rate": 7.961028062402475e-06, "loss": 0.7073, "step": 8879 }, { "epoch": 1.5969612514609368, "grad_norm": 1.5300065279006958, "learning_rate": 7.960558733050005e-06, "loss": 0.7394, "step": 8880 }, { "epoch": 1.5971410590667985, "grad_norm": 1.4686812162399292, "learning_rate": 7.96008936352664e-06, "loss": 0.7146, "step": 8881 }, { "epoch": 1.5973208666726602, "grad_norm": 1.060952067375183, "learning_rate": 7.959619953838741e-06, "loss": 0.9257, "step": 8882 }, { "epoch": 1.597500674278522, "grad_norm": 1.5511462688446045, "learning_rate": 7.959150503992688e-06, "loss": 0.7649, "step": 8883 }, { "epoch": 1.5976804818843837, "grad_norm": 1.5000544786453247, "learning_rate": 7.958681013994843e-06, "loss": 0.7119, "step": 8884 }, { "epoch": 1.5978602894902454, "grad_norm": 1.4865782260894775, "learning_rate": 7.958211483851579e-06, "loss": 0.7576, "step": 8885 }, { "epoch": 1.5980400970961073, "grad_norm": 1.4896819591522217, "learning_rate": 7.957741913569268e-06, "loss": 0.7434, "step": 8886 }, { "epoch": 1.5982199047019687, "grad_norm": 1.4558944702148438, "learning_rate": 7.957272303154277e-06, "loss": 0.6957, "step": 8887 }, { "epoch": 1.5983997123078306, "grad_norm": 1.176942229270935, "learning_rate": 7.956802652612986e-06, "loss": 0.9341, "step": 8888 }, { "epoch": 1.5985795199136925, "grad_norm": 1.5952152013778687, "learning_rate": 7.956332961951758e-06, "loss": 0.7918, "step": 8889 }, { "epoch": 1.598759327519554, "grad_norm": 1.3808664083480835, "learning_rate": 7.955863231176974e-06, "loss": 0.7477, "step": 8890 }, { "epoch": 1.5989391351254159, "grad_norm": 1.4953465461730957, "learning_rate": 7.955393460295003e-06, "loss": 0.7423, "step": 8891 }, { "epoch": 1.5991189427312775, "grad_norm": 1.261005163192749, "learning_rate": 7.95492364931222e-06, "loss": 0.9075, "step": 8892 }, { "epoch": 1.5992987503371392, "grad_norm": 1.4783462285995483, "learning_rate": 7.954453798235003e-06, "loss": 0.7335, "step": 8893 }, { "epoch": 1.599478557943001, "grad_norm": 1.428004264831543, "learning_rate": 7.953983907069722e-06, "loss": 0.7356, "step": 8894 }, { "epoch": 1.5996583655488628, "grad_norm": 1.492014765739441, "learning_rate": 7.953513975822755e-06, "loss": 0.7043, "step": 8895 }, { "epoch": 1.5998381731547244, "grad_norm": 1.4739532470703125, "learning_rate": 7.953044004500481e-06, "loss": 0.7225, "step": 8896 }, { "epoch": 1.6000179807605863, "grad_norm": 1.4353723526000977, "learning_rate": 7.952573993109273e-06, "loss": 0.7369, "step": 8897 }, { "epoch": 1.6001977883664478, "grad_norm": 1.4548612833023071, "learning_rate": 7.95210394165551e-06, "loss": 0.7091, "step": 8898 }, { "epoch": 1.6003775959723097, "grad_norm": 1.5071227550506592, "learning_rate": 7.951633850145572e-06, "loss": 0.7401, "step": 8899 }, { "epoch": 1.6005574035781713, "grad_norm": 1.6917427778244019, "learning_rate": 7.951163718585835e-06, "loss": 0.7706, "step": 8900 }, { "epoch": 1.600737211184033, "grad_norm": 1.4892525672912598, "learning_rate": 7.950693546982679e-06, "loss": 0.7121, "step": 8901 }, { "epoch": 1.600917018789895, "grad_norm": 1.428937315940857, "learning_rate": 7.950223335342482e-06, "loss": 0.7383, "step": 8902 }, { "epoch": 1.6010968263957566, "grad_norm": 1.5726686716079712, "learning_rate": 7.949753083671625e-06, "loss": 0.7345, "step": 8903 }, { "epoch": 1.6012766340016182, "grad_norm": 1.4367761611938477, "learning_rate": 7.949282791976491e-06, "loss": 0.7582, "step": 8904 }, { "epoch": 1.6014564416074801, "grad_norm": 1.6812089681625366, "learning_rate": 7.948812460263457e-06, "loss": 0.6999, "step": 8905 }, { "epoch": 1.6016362492133416, "grad_norm": 1.5477970838546753, "learning_rate": 7.94834208853891e-06, "loss": 0.7402, "step": 8906 }, { "epoch": 1.6018160568192035, "grad_norm": 1.3986001014709473, "learning_rate": 7.947871676809228e-06, "loss": 0.8051, "step": 8907 }, { "epoch": 1.6019958644250651, "grad_norm": 1.498500108718872, "learning_rate": 7.947401225080795e-06, "loss": 0.727, "step": 8908 }, { "epoch": 1.6021756720309268, "grad_norm": 1.740687608718872, "learning_rate": 7.946930733359995e-06, "loss": 0.7825, "step": 8909 }, { "epoch": 1.6023554796367887, "grad_norm": 1.6653306484222412, "learning_rate": 7.946460201653211e-06, "loss": 0.7343, "step": 8910 }, { "epoch": 1.6025352872426504, "grad_norm": 1.4377602338790894, "learning_rate": 7.94598962996683e-06, "loss": 0.7246, "step": 8911 }, { "epoch": 1.602715094848512, "grad_norm": 1.494131326675415, "learning_rate": 7.945519018307236e-06, "loss": 0.6598, "step": 8912 }, { "epoch": 1.602894902454374, "grad_norm": 1.4385946989059448, "learning_rate": 7.94504836668081e-06, "loss": 0.7843, "step": 8913 }, { "epoch": 1.6030747100602354, "grad_norm": 1.3988420963287354, "learning_rate": 7.944577675093945e-06, "loss": 0.7362, "step": 8914 }, { "epoch": 1.6032545176660973, "grad_norm": 1.4932655096054077, "learning_rate": 7.944106943553025e-06, "loss": 0.7132, "step": 8915 }, { "epoch": 1.603434325271959, "grad_norm": 1.5094119310379028, "learning_rate": 7.943636172064435e-06, "loss": 0.7406, "step": 8916 }, { "epoch": 1.6036141328778206, "grad_norm": 1.0842524766921997, "learning_rate": 7.943165360634565e-06, "loss": 1.0197, "step": 8917 }, { "epoch": 1.6037939404836825, "grad_norm": 1.7098737955093384, "learning_rate": 7.942694509269804e-06, "loss": 0.7231, "step": 8918 }, { "epoch": 1.6039737480895442, "grad_norm": 1.4828495979309082, "learning_rate": 7.942223617976537e-06, "loss": 0.6934, "step": 8919 }, { "epoch": 1.6041535556954059, "grad_norm": 1.0383068323135376, "learning_rate": 7.941752686761159e-06, "loss": 0.912, "step": 8920 }, { "epoch": 1.6043333633012677, "grad_norm": 1.3581653833389282, "learning_rate": 7.941281715630056e-06, "loss": 0.6722, "step": 8921 }, { "epoch": 1.6045131709071294, "grad_norm": 1.1362404823303223, "learning_rate": 7.94081070458962e-06, "loss": 0.9301, "step": 8922 }, { "epoch": 1.604692978512991, "grad_norm": 1.5191199779510498, "learning_rate": 7.940339653646241e-06, "loss": 0.6869, "step": 8923 }, { "epoch": 1.604872786118853, "grad_norm": 1.533315896987915, "learning_rate": 7.939868562806311e-06, "loss": 0.7703, "step": 8924 }, { "epoch": 1.6050525937247144, "grad_norm": 1.6665538549423218, "learning_rate": 7.939397432076222e-06, "loss": 0.7594, "step": 8925 }, { "epoch": 1.6052324013305763, "grad_norm": 1.5718337297439575, "learning_rate": 7.938926261462366e-06, "loss": 0.7114, "step": 8926 }, { "epoch": 1.605412208936438, "grad_norm": 1.5632007122039795, "learning_rate": 7.938455050971138e-06, "loss": 0.7731, "step": 8927 }, { "epoch": 1.6055920165422997, "grad_norm": 1.2700672149658203, "learning_rate": 7.937983800608931e-06, "loss": 0.8933, "step": 8928 }, { "epoch": 1.6057718241481616, "grad_norm": 1.5957473516464233, "learning_rate": 7.937512510382138e-06, "loss": 0.8247, "step": 8929 }, { "epoch": 1.6059516317540232, "grad_norm": 1.4872585535049438, "learning_rate": 7.937041180297156e-06, "loss": 0.7052, "step": 8930 }, { "epoch": 1.606131439359885, "grad_norm": 1.5361191034317017, "learning_rate": 7.936569810360378e-06, "loss": 0.7709, "step": 8931 }, { "epoch": 1.6063112469657468, "grad_norm": 1.4512816667556763, "learning_rate": 7.9360984005782e-06, "loss": 0.7317, "step": 8932 }, { "epoch": 1.6064910545716082, "grad_norm": 1.2612454891204834, "learning_rate": 7.935626950957019e-06, "loss": 0.95, "step": 8933 }, { "epoch": 1.6066708621774701, "grad_norm": 1.4291083812713623, "learning_rate": 7.935155461503235e-06, "loss": 0.73, "step": 8934 }, { "epoch": 1.6068506697833318, "grad_norm": 2.849071979522705, "learning_rate": 7.934683932223239e-06, "loss": 0.7445, "step": 8935 }, { "epoch": 1.6070304773891935, "grad_norm": 1.4954922199249268, "learning_rate": 7.934212363123435e-06, "loss": 0.724, "step": 8936 }, { "epoch": 1.6072102849950554, "grad_norm": 1.5417169332504272, "learning_rate": 7.933740754210218e-06, "loss": 0.7342, "step": 8937 }, { "epoch": 1.607390092600917, "grad_norm": 1.412243366241455, "learning_rate": 7.93326910548999e-06, "loss": 0.6779, "step": 8938 }, { "epoch": 1.6075699002067787, "grad_norm": 1.129539132118225, "learning_rate": 7.93279741696915e-06, "loss": 0.9678, "step": 8939 }, { "epoch": 1.6077497078126406, "grad_norm": 1.624971866607666, "learning_rate": 7.932325688654095e-06, "loss": 0.7414, "step": 8940 }, { "epoch": 1.607929515418502, "grad_norm": 1.5538995265960693, "learning_rate": 7.931853920551229e-06, "loss": 0.6667, "step": 8941 }, { "epoch": 1.608109323024364, "grad_norm": 1.4288289546966553, "learning_rate": 7.931382112666952e-06, "loss": 0.6894, "step": 8942 }, { "epoch": 1.6082891306302256, "grad_norm": 1.4282037019729614, "learning_rate": 7.930910265007666e-06, "loss": 0.7789, "step": 8943 }, { "epoch": 1.6084689382360873, "grad_norm": 1.5529837608337402, "learning_rate": 7.930438377579775e-06, "loss": 0.7344, "step": 8944 }, { "epoch": 1.6086487458419492, "grad_norm": 1.8369883298873901, "learning_rate": 7.929966450389677e-06, "loss": 0.7375, "step": 8945 }, { "epoch": 1.6088285534478108, "grad_norm": 1.1019036769866943, "learning_rate": 7.929494483443781e-06, "loss": 0.8591, "step": 8946 }, { "epoch": 1.6090083610536725, "grad_norm": 1.4406852722167969, "learning_rate": 7.92902247674849e-06, "loss": 0.7261, "step": 8947 }, { "epoch": 1.6091881686595344, "grad_norm": 1.3887829780578613, "learning_rate": 7.928550430310205e-06, "loss": 0.6786, "step": 8948 }, { "epoch": 1.609367976265396, "grad_norm": 1.6640968322753906, "learning_rate": 7.928078344135332e-06, "loss": 0.7443, "step": 8949 }, { "epoch": 1.6095477838712577, "grad_norm": 1.3648104667663574, "learning_rate": 7.927606218230282e-06, "loss": 0.658, "step": 8950 }, { "epoch": 1.6097275914771196, "grad_norm": 1.4491220712661743, "learning_rate": 7.927134052601455e-06, "loss": 0.7224, "step": 8951 }, { "epoch": 1.609907399082981, "grad_norm": 1.2070776224136353, "learning_rate": 7.92666184725526e-06, "loss": 0.8744, "step": 8952 }, { "epoch": 1.610087206688843, "grad_norm": 1.450098991394043, "learning_rate": 7.926189602198103e-06, "loss": 0.7669, "step": 8953 }, { "epoch": 1.6102670142947046, "grad_norm": 1.5735024213790894, "learning_rate": 7.925717317436394e-06, "loss": 0.7317, "step": 8954 }, { "epoch": 1.6104468219005663, "grad_norm": 1.5369714498519897, "learning_rate": 7.925244992976538e-06, "loss": 0.7532, "step": 8955 }, { "epoch": 1.6106266295064282, "grad_norm": 1.5493699312210083, "learning_rate": 7.924772628824948e-06, "loss": 0.7659, "step": 8956 }, { "epoch": 1.6108064371122899, "grad_norm": 1.557234764099121, "learning_rate": 7.92430022498803e-06, "loss": 0.6737, "step": 8957 }, { "epoch": 1.6109862447181515, "grad_norm": 1.5705879926681519, "learning_rate": 7.923827781472195e-06, "loss": 0.7676, "step": 8958 }, { "epoch": 1.6111660523240134, "grad_norm": 1.4526891708374023, "learning_rate": 7.923355298283853e-06, "loss": 0.7435, "step": 8959 }, { "epoch": 1.611345859929875, "grad_norm": 1.537680745124817, "learning_rate": 7.922882775429418e-06, "loss": 0.7275, "step": 8960 }, { "epoch": 1.6115256675357368, "grad_norm": 1.5514894723892212, "learning_rate": 7.922410212915297e-06, "loss": 0.7022, "step": 8961 }, { "epoch": 1.6117054751415985, "grad_norm": 1.4776145219802856, "learning_rate": 7.921937610747905e-06, "loss": 0.8018, "step": 8962 }, { "epoch": 1.6118852827474601, "grad_norm": 1.5603358745574951, "learning_rate": 7.921464968933652e-06, "loss": 0.7525, "step": 8963 }, { "epoch": 1.612065090353322, "grad_norm": 1.67233407497406, "learning_rate": 7.920992287478953e-06, "loss": 0.7816, "step": 8964 }, { "epoch": 1.6122448979591837, "grad_norm": 1.5673143863677979, "learning_rate": 7.920519566390222e-06, "loss": 0.8005, "step": 8965 }, { "epoch": 1.6124247055650454, "grad_norm": 1.5091547966003418, "learning_rate": 7.920046805673873e-06, "loss": 0.7744, "step": 8966 }, { "epoch": 1.6126045131709072, "grad_norm": 1.4696192741394043, "learning_rate": 7.91957400533632e-06, "loss": 0.7821, "step": 8967 }, { "epoch": 1.6127843207767687, "grad_norm": 2.2560746669769287, "learning_rate": 7.919101165383977e-06, "loss": 0.7284, "step": 8968 }, { "epoch": 1.6129641283826306, "grad_norm": 1.5402584075927734, "learning_rate": 7.918628285823263e-06, "loss": 0.6696, "step": 8969 }, { "epoch": 1.6131439359884923, "grad_norm": 1.4629042148590088, "learning_rate": 7.918155366660593e-06, "loss": 0.706, "step": 8970 }, { "epoch": 1.613323743594354, "grad_norm": 1.5444238185882568, "learning_rate": 7.917682407902383e-06, "loss": 0.7939, "step": 8971 }, { "epoch": 1.6135035512002158, "grad_norm": 1.5159794092178345, "learning_rate": 7.91720940955505e-06, "loss": 0.7401, "step": 8972 }, { "epoch": 1.6136833588060775, "grad_norm": 1.6432572603225708, "learning_rate": 7.916736371625016e-06, "loss": 0.7258, "step": 8973 }, { "epoch": 1.6138631664119392, "grad_norm": 1.5166804790496826, "learning_rate": 7.916263294118696e-06, "loss": 0.7825, "step": 8974 }, { "epoch": 1.614042974017801, "grad_norm": 1.5898147821426392, "learning_rate": 7.915790177042509e-06, "loss": 0.7695, "step": 8975 }, { "epoch": 1.6142227816236627, "grad_norm": 1.4826982021331787, "learning_rate": 7.915317020402874e-06, "loss": 0.7403, "step": 8976 }, { "epoch": 1.6144025892295244, "grad_norm": 1.4882395267486572, "learning_rate": 7.914843824206212e-06, "loss": 0.7557, "step": 8977 }, { "epoch": 1.6145823968353863, "grad_norm": 1.5139868259429932, "learning_rate": 7.914370588458947e-06, "loss": 0.7362, "step": 8978 }, { "epoch": 1.6147622044412477, "grad_norm": 1.5597659349441528, "learning_rate": 7.913897313167495e-06, "loss": 0.6636, "step": 8979 }, { "epoch": 1.6149420120471096, "grad_norm": 1.6587196588516235, "learning_rate": 7.91342399833828e-06, "loss": 0.7543, "step": 8980 }, { "epoch": 1.6151218196529713, "grad_norm": 1.432120680809021, "learning_rate": 7.912950643977725e-06, "loss": 0.7171, "step": 8981 }, { "epoch": 1.615301627258833, "grad_norm": 1.5353363752365112, "learning_rate": 7.912477250092252e-06, "loss": 0.6762, "step": 8982 }, { "epoch": 1.6154814348646949, "grad_norm": 1.553174376487732, "learning_rate": 7.912003816688283e-06, "loss": 0.7261, "step": 8983 }, { "epoch": 1.6156612424705565, "grad_norm": 1.3164751529693604, "learning_rate": 7.911530343772244e-06, "loss": 0.9052, "step": 8984 }, { "epoch": 1.6158410500764182, "grad_norm": 1.4632983207702637, "learning_rate": 7.911056831350558e-06, "loss": 0.7583, "step": 8985 }, { "epoch": 1.61602085768228, "grad_norm": 1.4146380424499512, "learning_rate": 7.91058327942965e-06, "loss": 0.7458, "step": 8986 }, { "epoch": 1.6162006652881415, "grad_norm": 1.6648757457733154, "learning_rate": 7.910109688015947e-06, "loss": 0.7676, "step": 8987 }, { "epoch": 1.6163804728940034, "grad_norm": 1.029478669166565, "learning_rate": 7.909636057115875e-06, "loss": 0.9266, "step": 8988 }, { "epoch": 1.616560280499865, "grad_norm": 1.5564018487930298, "learning_rate": 7.909162386735858e-06, "loss": 0.8166, "step": 8989 }, { "epoch": 1.6167400881057268, "grad_norm": 1.468689203262329, "learning_rate": 7.908688676882326e-06, "loss": 0.7284, "step": 8990 }, { "epoch": 1.6169198957115887, "grad_norm": 1.4646661281585693, "learning_rate": 7.908214927561704e-06, "loss": 0.7135, "step": 8991 }, { "epoch": 1.6170997033174503, "grad_norm": 1.516098976135254, "learning_rate": 7.907741138780422e-06, "loss": 0.726, "step": 8992 }, { "epoch": 1.617279510923312, "grad_norm": 1.5142838954925537, "learning_rate": 7.907267310544909e-06, "loss": 0.7777, "step": 8993 }, { "epoch": 1.617459318529174, "grad_norm": 1.4736461639404297, "learning_rate": 7.906793442861591e-06, "loss": 0.713, "step": 8994 }, { "epoch": 1.6176391261350354, "grad_norm": 1.4706615209579468, "learning_rate": 7.906319535736902e-06, "loss": 0.7133, "step": 8995 }, { "epoch": 1.6178189337408972, "grad_norm": 2.197317600250244, "learning_rate": 7.90584558917727e-06, "loss": 0.7393, "step": 8996 }, { "epoch": 1.617998741346759, "grad_norm": 1.1815438270568848, "learning_rate": 7.905371603189124e-06, "loss": 0.9702, "step": 8997 }, { "epoch": 1.6181785489526206, "grad_norm": 1.4770545959472656, "learning_rate": 7.904897577778901e-06, "loss": 0.7858, "step": 8998 }, { "epoch": 1.6183583565584825, "grad_norm": 1.408695101737976, "learning_rate": 7.904423512953027e-06, "loss": 0.642, "step": 8999 }, { "epoch": 1.6185381641643442, "grad_norm": 1.5470024347305298, "learning_rate": 7.903949408717939e-06, "loss": 0.7265, "step": 9000 }, { "epoch": 1.6185381641643442, "eval_loss": 0.7950135469436646, "eval_runtime": 148.603, "eval_samples_per_second": 96.781, "eval_steps_per_second": 1.514, "step": 9000 }, { "epoch": 1.6187179717702058, "grad_norm": 1.5490803718566895, "learning_rate": 7.903475265080067e-06, "loss": 0.7048, "step": 9001 }, { "epoch": 1.6188977793760677, "grad_norm": 1.4983030557632446, "learning_rate": 7.903001082045846e-06, "loss": 0.659, "step": 9002 }, { "epoch": 1.6190775869819294, "grad_norm": 1.5550872087478638, "learning_rate": 7.902526859621707e-06, "loss": 0.7362, "step": 9003 }, { "epoch": 1.619257394587791, "grad_norm": 1.4120204448699951, "learning_rate": 7.90205259781409e-06, "loss": 0.7189, "step": 9004 }, { "epoch": 1.619437202193653, "grad_norm": 1.4445040225982666, "learning_rate": 7.901578296629426e-06, "loss": 0.6946, "step": 9005 }, { "epoch": 1.6196170097995144, "grad_norm": 1.488895297050476, "learning_rate": 7.90110395607415e-06, "loss": 0.7296, "step": 9006 }, { "epoch": 1.6197968174053763, "grad_norm": 1.494186282157898, "learning_rate": 7.900629576154702e-06, "loss": 0.705, "step": 9007 }, { "epoch": 1.619976625011238, "grad_norm": 1.3876582384109497, "learning_rate": 7.900155156877517e-06, "loss": 0.7732, "step": 9008 }, { "epoch": 1.6201564326170996, "grad_norm": 1.364359736442566, "learning_rate": 7.89968069824903e-06, "loss": 0.7084, "step": 9009 }, { "epoch": 1.6203362402229615, "grad_norm": 1.5189272165298462, "learning_rate": 7.899206200275682e-06, "loss": 0.7879, "step": 9010 }, { "epoch": 1.6205160478288232, "grad_norm": 1.6239475011825562, "learning_rate": 7.89873166296391e-06, "loss": 0.7942, "step": 9011 }, { "epoch": 1.6206958554346849, "grad_norm": 1.6530557870864868, "learning_rate": 7.898257086320153e-06, "loss": 0.8373, "step": 9012 }, { "epoch": 1.6208756630405468, "grad_norm": 1.540385127067566, "learning_rate": 7.89778247035085e-06, "loss": 0.6961, "step": 9013 }, { "epoch": 1.6210554706464082, "grad_norm": 1.156214952468872, "learning_rate": 7.89730781506244e-06, "loss": 0.9758, "step": 9014 }, { "epoch": 1.62123527825227, "grad_norm": 1.5362296104431152, "learning_rate": 7.896833120461367e-06, "loss": 0.6987, "step": 9015 }, { "epoch": 1.6214150858581318, "grad_norm": 1.4555790424346924, "learning_rate": 7.896358386554068e-06, "loss": 0.7608, "step": 9016 }, { "epoch": 1.6215948934639934, "grad_norm": 1.4746673107147217, "learning_rate": 7.895883613346988e-06, "loss": 0.794, "step": 9017 }, { "epoch": 1.6217747010698553, "grad_norm": 1.4947013854980469, "learning_rate": 7.895408800846564e-06, "loss": 0.697, "step": 9018 }, { "epoch": 1.621954508675717, "grad_norm": 1.5011494159698486, "learning_rate": 7.894933949059245e-06, "loss": 0.7629, "step": 9019 }, { "epoch": 1.6221343162815787, "grad_norm": 1.5937068462371826, "learning_rate": 7.894459057991469e-06, "loss": 0.7371, "step": 9020 }, { "epoch": 1.6223141238874406, "grad_norm": 1.4819624423980713, "learning_rate": 7.893984127649682e-06, "loss": 0.71, "step": 9021 }, { "epoch": 1.622493931493302, "grad_norm": 1.479293704032898, "learning_rate": 7.893509158040327e-06, "loss": 0.74, "step": 9022 }, { "epoch": 1.622673739099164, "grad_norm": 1.5751004219055176, "learning_rate": 7.89303414916985e-06, "loss": 0.7617, "step": 9023 }, { "epoch": 1.6228535467050256, "grad_norm": 1.107586145401001, "learning_rate": 7.892559101044694e-06, "loss": 0.9458, "step": 9024 }, { "epoch": 1.6230333543108872, "grad_norm": 1.371267557144165, "learning_rate": 7.892084013671308e-06, "loss": 0.7085, "step": 9025 }, { "epoch": 1.6232131619167491, "grad_norm": 1.518990159034729, "learning_rate": 7.891608887056138e-06, "loss": 0.7238, "step": 9026 }, { "epoch": 1.6233929695226108, "grad_norm": 1.6616981029510498, "learning_rate": 7.891133721205629e-06, "loss": 0.7451, "step": 9027 }, { "epoch": 1.6235727771284725, "grad_norm": 1.5644443035125732, "learning_rate": 7.890658516126227e-06, "loss": 0.7614, "step": 9028 }, { "epoch": 1.6237525847343344, "grad_norm": 1.4400285482406616, "learning_rate": 7.890183271824384e-06, "loss": 0.7126, "step": 9029 }, { "epoch": 1.623932392340196, "grad_norm": 1.598997712135315, "learning_rate": 7.889707988306546e-06, "loss": 0.6484, "step": 9030 }, { "epoch": 1.6241121999460577, "grad_norm": 1.4111595153808594, "learning_rate": 7.88923266557916e-06, "loss": 0.6522, "step": 9031 }, { "epoch": 1.6242920075519196, "grad_norm": 1.4756860733032227, "learning_rate": 7.88875730364868e-06, "loss": 0.6763, "step": 9032 }, { "epoch": 1.624471815157781, "grad_norm": 1.646697759628296, "learning_rate": 7.888281902521552e-06, "loss": 0.7717, "step": 9033 }, { "epoch": 1.624651622763643, "grad_norm": 1.328004002571106, "learning_rate": 7.88780646220423e-06, "loss": 0.9349, "step": 9034 }, { "epoch": 1.6248314303695046, "grad_norm": 1.4564323425292969, "learning_rate": 7.887330982703165e-06, "loss": 0.6979, "step": 9035 }, { "epoch": 1.6250112379753663, "grad_norm": 1.6649396419525146, "learning_rate": 7.886855464024805e-06, "loss": 0.7428, "step": 9036 }, { "epoch": 1.6251910455812282, "grad_norm": 1.4666589498519897, "learning_rate": 7.886379906175605e-06, "loss": 0.7031, "step": 9037 }, { "epoch": 1.6253708531870898, "grad_norm": 1.1417138576507568, "learning_rate": 7.885904309162016e-06, "loss": 0.977, "step": 9038 }, { "epoch": 1.6255506607929515, "grad_norm": 1.538137674331665, "learning_rate": 7.885428672990495e-06, "loss": 0.827, "step": 9039 }, { "epoch": 1.6257304683988134, "grad_norm": 1.4747395515441895, "learning_rate": 7.88495299766749e-06, "loss": 0.6778, "step": 9040 }, { "epoch": 1.6259102760046749, "grad_norm": 1.4952890872955322, "learning_rate": 7.884477283199458e-06, "loss": 0.7018, "step": 9041 }, { "epoch": 1.6260900836105368, "grad_norm": 1.474760890007019, "learning_rate": 7.884001529592855e-06, "loss": 0.6858, "step": 9042 }, { "epoch": 1.6262698912163984, "grad_norm": 1.4588230848312378, "learning_rate": 7.883525736854135e-06, "loss": 0.7514, "step": 9043 }, { "epoch": 1.62644969882226, "grad_norm": 1.074989676475525, "learning_rate": 7.883049904989757e-06, "loss": 0.935, "step": 9044 }, { "epoch": 1.626629506428122, "grad_norm": 1.4121346473693848, "learning_rate": 7.882574034006173e-06, "loss": 0.7457, "step": 9045 }, { "epoch": 1.6268093140339837, "grad_norm": 1.529719591140747, "learning_rate": 7.88209812390984e-06, "loss": 0.7496, "step": 9046 }, { "epoch": 1.6269891216398453, "grad_norm": 1.5765330791473389, "learning_rate": 7.88162217470722e-06, "loss": 0.7632, "step": 9047 }, { "epoch": 1.6271689292457072, "grad_norm": 1.571122646331787, "learning_rate": 7.881146186404766e-06, "loss": 0.7918, "step": 9048 }, { "epoch": 1.6273487368515687, "grad_norm": 1.4345043897628784, "learning_rate": 7.88067015900894e-06, "loss": 0.7234, "step": 9049 }, { "epoch": 1.6275285444574306, "grad_norm": 1.068477749824524, "learning_rate": 7.8801940925262e-06, "loss": 0.9566, "step": 9050 }, { "epoch": 1.6277083520632922, "grad_norm": 1.581676959991455, "learning_rate": 7.879717986963004e-06, "loss": 0.7302, "step": 9051 }, { "epoch": 1.627888159669154, "grad_norm": 1.6313128471374512, "learning_rate": 7.879241842325814e-06, "loss": 0.7683, "step": 9052 }, { "epoch": 1.6280679672750158, "grad_norm": 1.1062872409820557, "learning_rate": 7.87876565862109e-06, "loss": 0.9515, "step": 9053 }, { "epoch": 1.6282477748808775, "grad_norm": 1.5288630723953247, "learning_rate": 7.878289435855293e-06, "loss": 0.7531, "step": 9054 }, { "epoch": 1.6284275824867391, "grad_norm": 1.4409253597259521, "learning_rate": 7.877813174034888e-06, "loss": 0.7398, "step": 9055 }, { "epoch": 1.628607390092601, "grad_norm": 1.5115182399749756, "learning_rate": 7.87733687316633e-06, "loss": 0.7109, "step": 9056 }, { "epoch": 1.6287871976984627, "grad_norm": 1.5979466438293457, "learning_rate": 7.876860533256088e-06, "loss": 0.741, "step": 9057 }, { "epoch": 1.6289670053043244, "grad_norm": 1.539665937423706, "learning_rate": 7.876384154310623e-06, "loss": 0.7776, "step": 9058 }, { "epoch": 1.6291468129101863, "grad_norm": 1.103088617324829, "learning_rate": 7.875907736336401e-06, "loss": 0.9762, "step": 9059 }, { "epoch": 1.6293266205160477, "grad_norm": 1.5217865705490112, "learning_rate": 7.875431279339884e-06, "loss": 0.6964, "step": 9060 }, { "epoch": 1.6295064281219096, "grad_norm": 1.4205961227416992, "learning_rate": 7.874954783327537e-06, "loss": 0.7098, "step": 9061 }, { "epoch": 1.6296862357277713, "grad_norm": 1.5562989711761475, "learning_rate": 7.874478248305825e-06, "loss": 0.7859, "step": 9062 }, { "epoch": 1.629866043333633, "grad_norm": 1.4730682373046875, "learning_rate": 7.874001674281217e-06, "loss": 0.7035, "step": 9063 }, { "epoch": 1.6300458509394948, "grad_norm": 1.4912748336791992, "learning_rate": 7.873525061260174e-06, "loss": 0.7099, "step": 9064 }, { "epoch": 1.6302256585453565, "grad_norm": 1.4790794849395752, "learning_rate": 7.87304840924917e-06, "loss": 0.6942, "step": 9065 }, { "epoch": 1.6304054661512182, "grad_norm": 1.5040934085845947, "learning_rate": 7.872571718254666e-06, "loss": 0.7233, "step": 9066 }, { "epoch": 1.63058527375708, "grad_norm": 1.4638092517852783, "learning_rate": 7.872094988283136e-06, "loss": 0.7618, "step": 9067 }, { "epoch": 1.6307650813629415, "grad_norm": 1.5584030151367188, "learning_rate": 7.871618219341044e-06, "loss": 0.7077, "step": 9068 }, { "epoch": 1.6309448889688034, "grad_norm": 1.5290933847427368, "learning_rate": 7.87114141143486e-06, "loss": 0.7957, "step": 9069 }, { "epoch": 1.631124696574665, "grad_norm": 1.0898394584655762, "learning_rate": 7.870664564571055e-06, "loss": 0.9438, "step": 9070 }, { "epoch": 1.6313045041805267, "grad_norm": 1.4327688217163086, "learning_rate": 7.870187678756099e-06, "loss": 0.7024, "step": 9071 }, { "epoch": 1.6314843117863886, "grad_norm": 1.16121244430542, "learning_rate": 7.869710753996462e-06, "loss": 0.9322, "step": 9072 }, { "epoch": 1.6316641193922503, "grad_norm": 1.5874732732772827, "learning_rate": 7.869233790298615e-06, "loss": 0.7582, "step": 9073 }, { "epoch": 1.631843926998112, "grad_norm": 1.4957642555236816, "learning_rate": 7.868756787669029e-06, "loss": 0.7821, "step": 9074 }, { "epoch": 1.6320237346039739, "grad_norm": 1.5010619163513184, "learning_rate": 7.86827974611418e-06, "loss": 0.7667, "step": 9075 }, { "epoch": 1.6322035422098353, "grad_norm": 1.3754690885543823, "learning_rate": 7.867802665640538e-06, "loss": 0.681, "step": 9076 }, { "epoch": 1.6323833498156972, "grad_norm": 1.5914931297302246, "learning_rate": 7.867325546254577e-06, "loss": 0.7553, "step": 9077 }, { "epoch": 1.6325631574215589, "grad_norm": 1.1595762968063354, "learning_rate": 7.86684838796277e-06, "loss": 0.9287, "step": 9078 }, { "epoch": 1.6327429650274206, "grad_norm": 1.6659601926803589, "learning_rate": 7.866371190771592e-06, "loss": 0.7212, "step": 9079 }, { "epoch": 1.6329227726332824, "grad_norm": 1.555238962173462, "learning_rate": 7.865893954687517e-06, "loss": 0.7996, "step": 9080 }, { "epoch": 1.6331025802391441, "grad_norm": 1.5602283477783203, "learning_rate": 7.865416679717025e-06, "loss": 0.7637, "step": 9081 }, { "epoch": 1.6332823878450058, "grad_norm": 1.4874550104141235, "learning_rate": 7.864939365866584e-06, "loss": 0.8025, "step": 9082 }, { "epoch": 1.6334621954508677, "grad_norm": 1.4009250402450562, "learning_rate": 7.864462013142678e-06, "loss": 0.7248, "step": 9083 }, { "epoch": 1.6336420030567294, "grad_norm": 1.4199682474136353, "learning_rate": 7.863984621551781e-06, "loss": 0.7101, "step": 9084 }, { "epoch": 1.633821810662591, "grad_norm": 1.4483155012130737, "learning_rate": 7.86350719110037e-06, "loss": 0.739, "step": 9085 }, { "epoch": 1.634001618268453, "grad_norm": 1.0307261943817139, "learning_rate": 7.863029721794923e-06, "loss": 0.9084, "step": 9086 }, { "epoch": 1.6341814258743144, "grad_norm": 1.442684292793274, "learning_rate": 7.862552213641921e-06, "loss": 0.7013, "step": 9087 }, { "epoch": 1.6343612334801763, "grad_norm": 1.5200848579406738, "learning_rate": 7.86207466664784e-06, "loss": 0.7693, "step": 9088 }, { "epoch": 1.634541041086038, "grad_norm": 1.5438542366027832, "learning_rate": 7.86159708081916e-06, "loss": 0.742, "step": 9089 }, { "epoch": 1.6347208486918996, "grad_norm": 1.5235282182693481, "learning_rate": 7.861119456162365e-06, "loss": 0.7239, "step": 9090 }, { "epoch": 1.6349006562977615, "grad_norm": 1.5022791624069214, "learning_rate": 7.860641792683931e-06, "loss": 0.7037, "step": 9091 }, { "epoch": 1.6350804639036232, "grad_norm": 1.389607310295105, "learning_rate": 7.860164090390343e-06, "loss": 0.7452, "step": 9092 }, { "epoch": 1.6352602715094848, "grad_norm": 1.101386308670044, "learning_rate": 7.859686349288083e-06, "loss": 0.9576, "step": 9093 }, { "epoch": 1.6354400791153467, "grad_norm": 1.6120308637619019, "learning_rate": 7.859208569383629e-06, "loss": 0.722, "step": 9094 }, { "epoch": 1.6356198867212082, "grad_norm": 1.013251781463623, "learning_rate": 7.858730750683465e-06, "loss": 0.9578, "step": 9095 }, { "epoch": 1.63579969432707, "grad_norm": 1.5178136825561523, "learning_rate": 7.858252893194079e-06, "loss": 0.7627, "step": 9096 }, { "epoch": 1.6359795019329317, "grad_norm": 1.4451466798782349, "learning_rate": 7.85777499692195e-06, "loss": 0.7048, "step": 9097 }, { "epoch": 1.6361593095387934, "grad_norm": 1.4358967542648315, "learning_rate": 7.857297061873563e-06, "loss": 0.6812, "step": 9098 }, { "epoch": 1.6363391171446553, "grad_norm": 1.539306402206421, "learning_rate": 7.856819088055407e-06, "loss": 0.7754, "step": 9099 }, { "epoch": 1.636518924750517, "grad_norm": 1.5065958499908447, "learning_rate": 7.856341075473963e-06, "loss": 0.7802, "step": 9100 }, { "epoch": 1.6366987323563786, "grad_norm": 1.4963099956512451, "learning_rate": 7.855863024135717e-06, "loss": 0.7091, "step": 9101 }, { "epoch": 1.6368785399622405, "grad_norm": 1.493699073791504, "learning_rate": 7.855384934047159e-06, "loss": 0.7351, "step": 9102 }, { "epoch": 1.637058347568102, "grad_norm": 1.508113980293274, "learning_rate": 7.854906805214774e-06, "loss": 0.7674, "step": 9103 }, { "epoch": 1.6372381551739639, "grad_norm": 1.482499361038208, "learning_rate": 7.854428637645048e-06, "loss": 0.7508, "step": 9104 }, { "epoch": 1.6374179627798255, "grad_norm": 1.5014090538024902, "learning_rate": 7.853950431344472e-06, "loss": 0.6469, "step": 9105 }, { "epoch": 1.6375977703856872, "grad_norm": 1.2235281467437744, "learning_rate": 7.853472186319534e-06, "loss": 0.9863, "step": 9106 }, { "epoch": 1.637777577991549, "grad_norm": 1.7411892414093018, "learning_rate": 7.852993902576723e-06, "loss": 0.7602, "step": 9107 }, { "epoch": 1.6379573855974108, "grad_norm": 1.4184305667877197, "learning_rate": 7.852515580122526e-06, "loss": 0.7197, "step": 9108 }, { "epoch": 1.6381371932032724, "grad_norm": 1.1388887166976929, "learning_rate": 7.852037218963438e-06, "loss": 0.9073, "step": 9109 }, { "epoch": 1.6383170008091343, "grad_norm": 1.3017587661743164, "learning_rate": 7.851558819105944e-06, "loss": 0.9652, "step": 9110 }, { "epoch": 1.638496808414996, "grad_norm": 1.3877849578857422, "learning_rate": 7.851080380556542e-06, "loss": 0.7504, "step": 9111 }, { "epoch": 1.6386766160208577, "grad_norm": 1.4665886163711548, "learning_rate": 7.850601903321717e-06, "loss": 0.7125, "step": 9112 }, { "epoch": 1.6388564236267196, "grad_norm": 1.1599366664886475, "learning_rate": 7.850123387407968e-06, "loss": 0.9202, "step": 9113 }, { "epoch": 1.639036231232581, "grad_norm": 1.4566344022750854, "learning_rate": 7.849644832821781e-06, "loss": 0.7457, "step": 9114 }, { "epoch": 1.639216038838443, "grad_norm": 1.6053719520568848, "learning_rate": 7.849166239569654e-06, "loss": 0.7723, "step": 9115 }, { "epoch": 1.6393958464443046, "grad_norm": 1.1329728364944458, "learning_rate": 7.848687607658081e-06, "loss": 0.9349, "step": 9116 }, { "epoch": 1.6395756540501663, "grad_norm": 1.5119707584381104, "learning_rate": 7.848208937093553e-06, "loss": 0.7729, "step": 9117 }, { "epoch": 1.6397554616560281, "grad_norm": 1.6052749156951904, "learning_rate": 7.84773022788257e-06, "loss": 0.8208, "step": 9118 }, { "epoch": 1.6399352692618898, "grad_norm": 1.512404203414917, "learning_rate": 7.847251480031621e-06, "loss": 0.7078, "step": 9119 }, { "epoch": 1.6401150768677515, "grad_norm": 1.4929962158203125, "learning_rate": 7.846772693547207e-06, "loss": 0.7436, "step": 9120 }, { "epoch": 1.6402948844736134, "grad_norm": 1.5691183805465698, "learning_rate": 7.846293868435822e-06, "loss": 0.7695, "step": 9121 }, { "epoch": 1.6404746920794748, "grad_norm": 1.6047481298446655, "learning_rate": 7.845815004703965e-06, "loss": 0.7537, "step": 9122 }, { "epoch": 1.6406544996853367, "grad_norm": 2.1642093658447266, "learning_rate": 7.845336102358132e-06, "loss": 0.7409, "step": 9123 }, { "epoch": 1.6408343072911984, "grad_norm": 1.2349272966384888, "learning_rate": 7.84485716140482e-06, "loss": 0.9477, "step": 9124 }, { "epoch": 1.64101411489706, "grad_norm": 1.5601595640182495, "learning_rate": 7.844378181850532e-06, "loss": 0.6907, "step": 9125 }, { "epoch": 1.641193922502922, "grad_norm": 1.5145255327224731, "learning_rate": 7.843899163701762e-06, "loss": 0.7874, "step": 9126 }, { "epoch": 1.6413737301087836, "grad_norm": 1.4802759885787964, "learning_rate": 7.843420106965015e-06, "loss": 0.6737, "step": 9127 }, { "epoch": 1.6415535377146453, "grad_norm": 1.466490387916565, "learning_rate": 7.842941011646786e-06, "loss": 0.7596, "step": 9128 }, { "epoch": 1.6417333453205072, "grad_norm": 1.4317413568496704, "learning_rate": 7.842461877753575e-06, "loss": 0.7326, "step": 9129 }, { "epoch": 1.6419131529263686, "grad_norm": 1.0681096315383911, "learning_rate": 7.84198270529189e-06, "loss": 0.9199, "step": 9130 }, { "epoch": 1.6420929605322305, "grad_norm": 1.4838247299194336, "learning_rate": 7.841503494268227e-06, "loss": 0.7606, "step": 9131 }, { "epoch": 1.6422727681380922, "grad_norm": 1.6217985153198242, "learning_rate": 7.841024244689093e-06, "loss": 0.7971, "step": 9132 }, { "epoch": 1.6424525757439539, "grad_norm": 1.1499178409576416, "learning_rate": 7.840544956560985e-06, "loss": 0.9253, "step": 9133 }, { "epoch": 1.6426323833498158, "grad_norm": 1.4600690603256226, "learning_rate": 7.840065629890409e-06, "loss": 0.7763, "step": 9134 }, { "epoch": 1.6428121909556774, "grad_norm": 1.4583745002746582, "learning_rate": 7.83958626468387e-06, "loss": 0.7148, "step": 9135 }, { "epoch": 1.642991998561539, "grad_norm": 1.557934045791626, "learning_rate": 7.83910686094787e-06, "loss": 0.7789, "step": 9136 }, { "epoch": 1.643171806167401, "grad_norm": 1.07636559009552, "learning_rate": 7.838627418688915e-06, "loss": 0.9138, "step": 9137 }, { "epoch": 1.6433516137732624, "grad_norm": 1.4393539428710938, "learning_rate": 7.838147937913513e-06, "loss": 0.7257, "step": 9138 }, { "epoch": 1.6435314213791243, "grad_norm": 1.4910950660705566, "learning_rate": 7.837668418628165e-06, "loss": 0.6499, "step": 9139 }, { "epoch": 1.6437112289849862, "grad_norm": 1.5583943128585815, "learning_rate": 7.837188860839382e-06, "loss": 0.7509, "step": 9140 }, { "epoch": 1.6438910365908477, "grad_norm": 1.4513269662857056, "learning_rate": 7.836709264553669e-06, "loss": 0.7417, "step": 9141 }, { "epoch": 1.6440708441967096, "grad_norm": 1.5287971496582031, "learning_rate": 7.836229629777532e-06, "loss": 0.7391, "step": 9142 }, { "epoch": 1.6442506518025712, "grad_norm": 1.2040812969207764, "learning_rate": 7.835749956517481e-06, "loss": 0.8844, "step": 9143 }, { "epoch": 1.644430459408433, "grad_norm": 1.6486051082611084, "learning_rate": 7.835270244780024e-06, "loss": 0.7044, "step": 9144 }, { "epoch": 1.6446102670142948, "grad_norm": 1.5235031843185425, "learning_rate": 7.83479049457167e-06, "loss": 0.7556, "step": 9145 }, { "epoch": 1.6447900746201565, "grad_norm": 1.442718267440796, "learning_rate": 7.834310705898928e-06, "loss": 0.7186, "step": 9146 }, { "epoch": 1.6449698822260181, "grad_norm": 1.4260085821151733, "learning_rate": 7.833830878768309e-06, "loss": 0.6946, "step": 9147 }, { "epoch": 1.64514968983188, "grad_norm": 1.5426169633865356, "learning_rate": 7.833351013186326e-06, "loss": 0.7912, "step": 9148 }, { "epoch": 1.6453294974377415, "grad_norm": 1.5468391180038452, "learning_rate": 7.832871109159484e-06, "loss": 0.8577, "step": 9149 }, { "epoch": 1.6455093050436034, "grad_norm": 1.5802699327468872, "learning_rate": 7.8323911666943e-06, "loss": 0.7806, "step": 9150 }, { "epoch": 1.645689112649465, "grad_norm": 1.5162990093231201, "learning_rate": 7.831911185797282e-06, "loss": 0.7663, "step": 9151 }, { "epoch": 1.6458689202553267, "grad_norm": 1.4143617153167725, "learning_rate": 7.831431166474948e-06, "loss": 0.7057, "step": 9152 }, { "epoch": 1.6460487278611886, "grad_norm": 1.5125948190689087, "learning_rate": 7.830951108733807e-06, "loss": 0.7812, "step": 9153 }, { "epoch": 1.6462285354670503, "grad_norm": 1.5386438369750977, "learning_rate": 7.830471012580374e-06, "loss": 0.7169, "step": 9154 }, { "epoch": 1.646408343072912, "grad_norm": 1.5350807905197144, "learning_rate": 7.829990878021164e-06, "loss": 0.7045, "step": 9155 }, { "epoch": 1.6465881506787738, "grad_norm": 1.5789250135421753, "learning_rate": 7.82951070506269e-06, "loss": 0.76, "step": 9156 }, { "epoch": 1.6467679582846353, "grad_norm": 1.601035475730896, "learning_rate": 7.829030493711467e-06, "loss": 0.6353, "step": 9157 }, { "epoch": 1.6469477658904972, "grad_norm": 1.3525978326797485, "learning_rate": 7.828550243974015e-06, "loss": 0.6895, "step": 9158 }, { "epoch": 1.6471275734963589, "grad_norm": 1.1057994365692139, "learning_rate": 7.828069955856848e-06, "loss": 0.9514, "step": 9159 }, { "epoch": 1.6473073811022205, "grad_norm": 1.4718362092971802, "learning_rate": 7.82758962936648e-06, "loss": 0.6531, "step": 9160 }, { "epoch": 1.6474871887080824, "grad_norm": 1.50388765335083, "learning_rate": 7.827109264509434e-06, "loss": 0.7172, "step": 9161 }, { "epoch": 1.647666996313944, "grad_norm": 1.066515564918518, "learning_rate": 7.826628861292222e-06, "loss": 0.927, "step": 9162 }, { "epoch": 1.6478468039198058, "grad_norm": 1.7242941856384277, "learning_rate": 7.826148419721367e-06, "loss": 0.7394, "step": 9163 }, { "epoch": 1.6480266115256677, "grad_norm": 1.4666945934295654, "learning_rate": 7.825667939803385e-06, "loss": 0.733, "step": 9164 }, { "epoch": 1.648206419131529, "grad_norm": 1.5266534090042114, "learning_rate": 7.825187421544798e-06, "loss": 0.7692, "step": 9165 }, { "epoch": 1.648386226737391, "grad_norm": 1.127700686454773, "learning_rate": 7.824706864952124e-06, "loss": 0.9366, "step": 9166 }, { "epoch": 1.6485660343432529, "grad_norm": 1.4597337245941162, "learning_rate": 7.824226270031884e-06, "loss": 0.7189, "step": 9167 }, { "epoch": 1.6487458419491143, "grad_norm": 1.706567645072937, "learning_rate": 7.8237456367906e-06, "loss": 0.7414, "step": 9168 }, { "epoch": 1.6489256495549762, "grad_norm": 1.6175693273544312, "learning_rate": 7.82326496523479e-06, "loss": 0.7414, "step": 9169 }, { "epoch": 1.649105457160838, "grad_norm": 1.5436211824417114, "learning_rate": 7.822784255370984e-06, "loss": 0.7406, "step": 9170 }, { "epoch": 1.6492852647666996, "grad_norm": 1.5079293251037598, "learning_rate": 7.822303507205697e-06, "loss": 0.7908, "step": 9171 }, { "epoch": 1.6494650723725615, "grad_norm": 1.7088580131530762, "learning_rate": 7.821822720745455e-06, "loss": 0.7557, "step": 9172 }, { "epoch": 1.6496448799784231, "grad_norm": 1.107600212097168, "learning_rate": 7.821341895996779e-06, "loss": 0.9538, "step": 9173 }, { "epoch": 1.6498246875842848, "grad_norm": 1.5232353210449219, "learning_rate": 7.820861032966199e-06, "loss": 0.707, "step": 9174 }, { "epoch": 1.6500044951901467, "grad_norm": 1.3678489923477173, "learning_rate": 7.820380131660234e-06, "loss": 0.6607, "step": 9175 }, { "epoch": 1.6501843027960081, "grad_norm": 1.200295329093933, "learning_rate": 7.819899192085412e-06, "loss": 0.9225, "step": 9176 }, { "epoch": 1.65036411040187, "grad_norm": 1.4272719621658325, "learning_rate": 7.819418214248257e-06, "loss": 0.7597, "step": 9177 }, { "epoch": 1.6505439180077317, "grad_norm": 1.3959413766860962, "learning_rate": 7.818937198155298e-06, "loss": 0.7885, "step": 9178 }, { "epoch": 1.6507237256135934, "grad_norm": 1.5267473459243774, "learning_rate": 7.81845614381306e-06, "loss": 0.6745, "step": 9179 }, { "epoch": 1.6509035332194553, "grad_norm": 1.9059991836547852, "learning_rate": 7.817975051228068e-06, "loss": 0.7688, "step": 9180 }, { "epoch": 1.651083340825317, "grad_norm": 1.5515915155410767, "learning_rate": 7.817493920406855e-06, "loss": 0.693, "step": 9181 }, { "epoch": 1.6512631484311786, "grad_norm": 1.544194221496582, "learning_rate": 7.817012751355945e-06, "loss": 0.7308, "step": 9182 }, { "epoch": 1.6514429560370405, "grad_norm": 1.5193506479263306, "learning_rate": 7.816531544081868e-06, "loss": 0.7539, "step": 9183 }, { "epoch": 1.651622763642902, "grad_norm": 1.6265249252319336, "learning_rate": 7.816050298591153e-06, "loss": 0.7783, "step": 9184 }, { "epoch": 1.6518025712487638, "grad_norm": 1.5624849796295166, "learning_rate": 7.815569014890331e-06, "loss": 0.6788, "step": 9185 }, { "epoch": 1.6519823788546255, "grad_norm": 1.5594470500946045, "learning_rate": 7.815087692985935e-06, "loss": 0.7314, "step": 9186 }, { "epoch": 1.6521621864604872, "grad_norm": 1.4771523475646973, "learning_rate": 7.81460633288449e-06, "loss": 0.6927, "step": 9187 }, { "epoch": 1.652341994066349, "grad_norm": 1.4982925653457642, "learning_rate": 7.814124934592528e-06, "loss": 0.7516, "step": 9188 }, { "epoch": 1.6525218016722107, "grad_norm": 1.166456699371338, "learning_rate": 7.813643498116587e-06, "loss": 0.9113, "step": 9189 }, { "epoch": 1.6527016092780724, "grad_norm": 1.5896340608596802, "learning_rate": 7.813162023463195e-06, "loss": 0.744, "step": 9190 }, { "epoch": 1.6528814168839343, "grad_norm": 1.579929232597351, "learning_rate": 7.812680510638883e-06, "loss": 0.7805, "step": 9191 }, { "epoch": 1.6530612244897958, "grad_norm": 1.5810565948486328, "learning_rate": 7.81219895965019e-06, "loss": 0.7464, "step": 9192 }, { "epoch": 1.6532410320956576, "grad_norm": 1.1910511255264282, "learning_rate": 7.811717370503646e-06, "loss": 0.9597, "step": 9193 }, { "epoch": 1.6534208397015195, "grad_norm": 1.44368314743042, "learning_rate": 7.811235743205786e-06, "loss": 0.7273, "step": 9194 }, { "epoch": 1.653600647307381, "grad_norm": 1.4079447984695435, "learning_rate": 7.810754077763144e-06, "loss": 0.728, "step": 9195 }, { "epoch": 1.6537804549132429, "grad_norm": 1.4532800912857056, "learning_rate": 7.810272374182262e-06, "loss": 0.6951, "step": 9196 }, { "epoch": 1.6539602625191046, "grad_norm": 1.1239678859710693, "learning_rate": 7.809790632469668e-06, "loss": 0.9441, "step": 9197 }, { "epoch": 1.6541400701249662, "grad_norm": 1.4782570600509644, "learning_rate": 7.809308852631905e-06, "loss": 0.6981, "step": 9198 }, { "epoch": 1.6543198777308281, "grad_norm": 1.0427428483963013, "learning_rate": 7.808827034675504e-06, "loss": 0.9178, "step": 9199 }, { "epoch": 1.6544996853366898, "grad_norm": 1.4312841892242432, "learning_rate": 7.808345178607006e-06, "loss": 0.6945, "step": 9200 }, { "epoch": 1.6546794929425515, "grad_norm": 1.6679767370224, "learning_rate": 7.807863284432948e-06, "loss": 0.7593, "step": 9201 }, { "epoch": 1.6548593005484133, "grad_norm": 1.473891019821167, "learning_rate": 7.807381352159872e-06, "loss": 0.7692, "step": 9202 }, { "epoch": 1.6550391081542748, "grad_norm": 1.0340259075164795, "learning_rate": 7.806899381794314e-06, "loss": 0.9556, "step": 9203 }, { "epoch": 1.6552189157601367, "grad_norm": 1.0889695882797241, "learning_rate": 7.806417373342814e-06, "loss": 0.8998, "step": 9204 }, { "epoch": 1.6553987233659984, "grad_norm": 1.5462335348129272, "learning_rate": 7.805935326811913e-06, "loss": 0.6551, "step": 9205 }, { "epoch": 1.65557853097186, "grad_norm": 1.5559308528900146, "learning_rate": 7.805453242208151e-06, "loss": 0.6586, "step": 9206 }, { "epoch": 1.655758338577722, "grad_norm": 1.210142970085144, "learning_rate": 7.80497111953807e-06, "loss": 0.9121, "step": 9207 }, { "epoch": 1.6559381461835836, "grad_norm": 1.4289159774780273, "learning_rate": 7.804488958808211e-06, "loss": 0.7044, "step": 9208 }, { "epoch": 1.6561179537894453, "grad_norm": 1.422571063041687, "learning_rate": 7.804006760025116e-06, "loss": 0.7272, "step": 9209 }, { "epoch": 1.6562977613953072, "grad_norm": 1.6096409559249878, "learning_rate": 7.80352452319533e-06, "loss": 0.7226, "step": 9210 }, { "epoch": 1.6564775690011686, "grad_norm": 1.4455978870391846, "learning_rate": 7.803042248325394e-06, "loss": 0.6658, "step": 9211 }, { "epoch": 1.6566573766070305, "grad_norm": 1.4487800598144531, "learning_rate": 7.802559935421853e-06, "loss": 0.7437, "step": 9212 }, { "epoch": 1.6568371842128922, "grad_norm": 1.4700015783309937, "learning_rate": 7.802077584491251e-06, "loss": 0.7667, "step": 9213 }, { "epoch": 1.6570169918187538, "grad_norm": 1.5188368558883667, "learning_rate": 7.801595195540132e-06, "loss": 0.717, "step": 9214 }, { "epoch": 1.6571967994246157, "grad_norm": 1.497963309288025, "learning_rate": 7.801112768575043e-06, "loss": 0.7257, "step": 9215 }, { "epoch": 1.6573766070304774, "grad_norm": 1.4649659395217896, "learning_rate": 7.800630303602529e-06, "loss": 0.7966, "step": 9216 }, { "epoch": 1.657556414636339, "grad_norm": 1.5155755281448364, "learning_rate": 7.800147800629137e-06, "loss": 0.7769, "step": 9217 }, { "epoch": 1.657736222242201, "grad_norm": 1.2371717691421509, "learning_rate": 7.799665259661414e-06, "loss": 0.9464, "step": 9218 }, { "epoch": 1.6579160298480624, "grad_norm": 1.5063246488571167, "learning_rate": 7.799182680705908e-06, "loss": 0.7419, "step": 9219 }, { "epoch": 1.6580958374539243, "grad_norm": 1.4534329175949097, "learning_rate": 7.798700063769162e-06, "loss": 0.7361, "step": 9220 }, { "epoch": 1.658275645059786, "grad_norm": 1.503507137298584, "learning_rate": 7.79821740885773e-06, "loss": 0.6892, "step": 9221 }, { "epoch": 1.6584554526656476, "grad_norm": 1.5595570802688599, "learning_rate": 7.797734715978163e-06, "loss": 0.7728, "step": 9222 }, { "epoch": 1.6586352602715095, "grad_norm": 1.49167001247406, "learning_rate": 7.797251985137002e-06, "loss": 0.7559, "step": 9223 }, { "epoch": 1.6588150678773712, "grad_norm": 1.4846243858337402, "learning_rate": 7.796769216340805e-06, "loss": 0.8331, "step": 9224 }, { "epoch": 1.6589948754832329, "grad_norm": 1.394248127937317, "learning_rate": 7.796286409596118e-06, "loss": 0.7458, "step": 9225 }, { "epoch": 1.6591746830890948, "grad_norm": 1.7443143129348755, "learning_rate": 7.795803564909494e-06, "loss": 0.7433, "step": 9226 }, { "epoch": 1.6593544906949564, "grad_norm": 1.1284306049346924, "learning_rate": 7.795320682287485e-06, "loss": 0.922, "step": 9227 }, { "epoch": 1.6595342983008181, "grad_norm": 1.5846372842788696, "learning_rate": 7.79483776173664e-06, "loss": 0.7373, "step": 9228 }, { "epoch": 1.65971410590668, "grad_norm": 1.6158196926116943, "learning_rate": 7.794354803263514e-06, "loss": 0.7373, "step": 9229 }, { "epoch": 1.6598939135125415, "grad_norm": 1.6451380252838135, "learning_rate": 7.793871806874662e-06, "loss": 0.7066, "step": 9230 }, { "epoch": 1.6600737211184033, "grad_norm": 1.5218442678451538, "learning_rate": 7.793388772576635e-06, "loss": 0.6909, "step": 9231 }, { "epoch": 1.660253528724265, "grad_norm": 1.5070246458053589, "learning_rate": 7.792905700375987e-06, "loss": 0.7441, "step": 9232 }, { "epoch": 1.6604333363301267, "grad_norm": 1.520904779434204, "learning_rate": 7.792422590279272e-06, "loss": 0.754, "step": 9233 }, { "epoch": 1.6606131439359886, "grad_norm": 1.4893778562545776, "learning_rate": 7.791939442293048e-06, "loss": 0.7337, "step": 9234 }, { "epoch": 1.6607929515418502, "grad_norm": 1.4573025703430176, "learning_rate": 7.791456256423871e-06, "loss": 0.7365, "step": 9235 }, { "epoch": 1.660972759147712, "grad_norm": 1.4029624462127686, "learning_rate": 7.790973032678292e-06, "loss": 0.683, "step": 9236 }, { "epoch": 1.6611525667535738, "grad_norm": 1.488815188407898, "learning_rate": 7.790489771062873e-06, "loss": 0.6926, "step": 9237 }, { "epoch": 1.6613323743594353, "grad_norm": 1.5729176998138428, "learning_rate": 7.790006471584168e-06, "loss": 0.7204, "step": 9238 }, { "epoch": 1.6615121819652972, "grad_norm": 1.5149319171905518, "learning_rate": 7.789523134248737e-06, "loss": 0.7673, "step": 9239 }, { "epoch": 1.6616919895711588, "grad_norm": 1.5190331935882568, "learning_rate": 7.789039759063137e-06, "loss": 0.7637, "step": 9240 }, { "epoch": 1.6618717971770205, "grad_norm": 1.5042533874511719, "learning_rate": 7.788556346033928e-06, "loss": 0.7205, "step": 9241 }, { "epoch": 1.6620516047828824, "grad_norm": 1.6653281450271606, "learning_rate": 7.788072895167667e-06, "loss": 0.7509, "step": 9242 }, { "epoch": 1.662231412388744, "grad_norm": 1.0950936079025269, "learning_rate": 7.787589406470916e-06, "loss": 0.9113, "step": 9243 }, { "epoch": 1.6624112199946057, "grad_norm": 1.5139424800872803, "learning_rate": 7.787105879950234e-06, "loss": 0.7273, "step": 9244 }, { "epoch": 1.6625910276004676, "grad_norm": 2.2440826892852783, "learning_rate": 7.786622315612182e-06, "loss": 0.671, "step": 9245 }, { "epoch": 1.662770835206329, "grad_norm": 1.4877996444702148, "learning_rate": 7.786138713463324e-06, "loss": 0.6586, "step": 9246 }, { "epoch": 1.662950642812191, "grad_norm": 1.535531997680664, "learning_rate": 7.785655073510216e-06, "loss": 0.7601, "step": 9247 }, { "epoch": 1.6631304504180526, "grad_norm": 1.529927372932434, "learning_rate": 7.785171395759426e-06, "loss": 0.7048, "step": 9248 }, { "epoch": 1.6633102580239143, "grad_norm": 1.512593150138855, "learning_rate": 7.784687680217513e-06, "loss": 0.7146, "step": 9249 }, { "epoch": 1.6634900656297762, "grad_norm": 1.4079867601394653, "learning_rate": 7.784203926891043e-06, "loss": 0.736, "step": 9250 }, { "epoch": 1.6636698732356379, "grad_norm": 1.4553786516189575, "learning_rate": 7.78372013578658e-06, "loss": 0.6983, "step": 9251 }, { "epoch": 1.6638496808414995, "grad_norm": 1.193891167640686, "learning_rate": 7.783236306910686e-06, "loss": 0.9321, "step": 9252 }, { "epoch": 1.6640294884473614, "grad_norm": 1.4894206523895264, "learning_rate": 7.782752440269928e-06, "loss": 0.7111, "step": 9253 }, { "epoch": 1.664209296053223, "grad_norm": 1.6220709085464478, "learning_rate": 7.78226853587087e-06, "loss": 0.7597, "step": 9254 }, { "epoch": 1.6643891036590848, "grad_norm": 1.5678365230560303, "learning_rate": 7.78178459372008e-06, "loss": 0.7574, "step": 9255 }, { "epoch": 1.6645689112649467, "grad_norm": 1.4249216318130493, "learning_rate": 7.78130061382412e-06, "loss": 0.7101, "step": 9256 }, { "epoch": 1.664748718870808, "grad_norm": 1.4494645595550537, "learning_rate": 7.780816596189565e-06, "loss": 0.7256, "step": 9257 }, { "epoch": 1.66492852647667, "grad_norm": 1.5510634183883667, "learning_rate": 7.780332540822974e-06, "loss": 0.7896, "step": 9258 }, { "epoch": 1.6651083340825317, "grad_norm": 1.4594521522521973, "learning_rate": 7.77984844773092e-06, "loss": 0.6911, "step": 9259 }, { "epoch": 1.6652881416883933, "grad_norm": 1.506706714630127, "learning_rate": 7.779364316919971e-06, "loss": 0.6746, "step": 9260 }, { "epoch": 1.6654679492942552, "grad_norm": 1.3446589708328247, "learning_rate": 7.778880148396692e-06, "loss": 0.7068, "step": 9261 }, { "epoch": 1.665647756900117, "grad_norm": 1.5157462358474731, "learning_rate": 7.778395942167657e-06, "loss": 0.7221, "step": 9262 }, { "epoch": 1.6658275645059786, "grad_norm": 1.5442363023757935, "learning_rate": 7.777911698239437e-06, "loss": 0.7474, "step": 9263 }, { "epoch": 1.6660073721118405, "grad_norm": 1.548003077507019, "learning_rate": 7.777427416618596e-06, "loss": 0.7943, "step": 9264 }, { "epoch": 1.666187179717702, "grad_norm": 1.4786590337753296, "learning_rate": 7.776943097311713e-06, "loss": 0.6981, "step": 9265 }, { "epoch": 1.6663669873235638, "grad_norm": 1.2101479768753052, "learning_rate": 7.776458740325354e-06, "loss": 0.8982, "step": 9266 }, { "epoch": 1.6665467949294255, "grad_norm": 1.6320750713348389, "learning_rate": 7.775974345666096e-06, "loss": 0.7728, "step": 9267 }, { "epoch": 1.6667266025352871, "grad_norm": 1.4196670055389404, "learning_rate": 7.775489913340504e-06, "loss": 0.6709, "step": 9268 }, { "epoch": 1.666906410141149, "grad_norm": 1.4167232513427734, "learning_rate": 7.775005443355159e-06, "loss": 0.717, "step": 9269 }, { "epoch": 1.6670862177470107, "grad_norm": 1.1643016338348389, "learning_rate": 7.77452093571663e-06, "loss": 0.9619, "step": 9270 }, { "epoch": 1.6672660253528724, "grad_norm": 1.5517516136169434, "learning_rate": 7.774036390431493e-06, "loss": 0.7436, "step": 9271 }, { "epoch": 1.6674458329587343, "grad_norm": 1.5645393133163452, "learning_rate": 7.773551807506321e-06, "loss": 0.7218, "step": 9272 }, { "epoch": 1.6676256405645957, "grad_norm": 1.426802158355713, "learning_rate": 7.773067186947693e-06, "loss": 0.7033, "step": 9273 }, { "epoch": 1.6678054481704576, "grad_norm": 1.471656084060669, "learning_rate": 7.772582528762179e-06, "loss": 0.7146, "step": 9274 }, { "epoch": 1.6679852557763193, "grad_norm": 1.130440354347229, "learning_rate": 7.77209783295636e-06, "loss": 0.954, "step": 9275 }, { "epoch": 1.668165063382181, "grad_norm": 1.4958679676055908, "learning_rate": 7.77161309953681e-06, "loss": 0.7655, "step": 9276 }, { "epoch": 1.6683448709880428, "grad_norm": 1.4058609008789062, "learning_rate": 7.771128328510106e-06, "loss": 0.7608, "step": 9277 }, { "epoch": 1.6685246785939045, "grad_norm": 1.4522837400436401, "learning_rate": 7.770643519882828e-06, "loss": 0.6736, "step": 9278 }, { "epoch": 1.6687044861997662, "grad_norm": 1.482911467552185, "learning_rate": 7.770158673661551e-06, "loss": 0.7548, "step": 9279 }, { "epoch": 1.668884293805628, "grad_norm": 1.4666765928268433, "learning_rate": 7.769673789852859e-06, "loss": 0.6829, "step": 9280 }, { "epoch": 1.6690641014114898, "grad_norm": 1.4129472970962524, "learning_rate": 7.769188868463324e-06, "loss": 0.7357, "step": 9281 }, { "epoch": 1.6692439090173514, "grad_norm": 1.7108172178268433, "learning_rate": 7.768703909499532e-06, "loss": 0.7264, "step": 9282 }, { "epoch": 1.6694237166232133, "grad_norm": 1.563642144203186, "learning_rate": 7.76821891296806e-06, "loss": 0.8057, "step": 9283 }, { "epoch": 1.6696035242290748, "grad_norm": 1.609076738357544, "learning_rate": 7.76773387887549e-06, "loss": 0.6953, "step": 9284 }, { "epoch": 1.6697833318349367, "grad_norm": 1.5003845691680908, "learning_rate": 7.767248807228405e-06, "loss": 0.7599, "step": 9285 }, { "epoch": 1.6699631394407983, "grad_norm": 1.5055044889450073, "learning_rate": 7.766763698033381e-06, "loss": 0.7341, "step": 9286 }, { "epoch": 1.67014294704666, "grad_norm": 1.4544334411621094, "learning_rate": 7.766278551297006e-06, "loss": 0.7849, "step": 9287 }, { "epoch": 1.670322754652522, "grad_norm": 1.488691806793213, "learning_rate": 7.76579336702586e-06, "loss": 0.7337, "step": 9288 }, { "epoch": 1.6705025622583836, "grad_norm": 1.5003759860992432, "learning_rate": 7.765308145226528e-06, "loss": 0.7767, "step": 9289 }, { "epoch": 1.6706823698642452, "grad_norm": 1.4437637329101562, "learning_rate": 7.764822885905592e-06, "loss": 0.7043, "step": 9290 }, { "epoch": 1.6708621774701071, "grad_norm": 1.7761969566345215, "learning_rate": 7.764337589069638e-06, "loss": 0.7483, "step": 9291 }, { "epoch": 1.6710419850759686, "grad_norm": 1.7633514404296875, "learning_rate": 7.763852254725251e-06, "loss": 0.7742, "step": 9292 }, { "epoch": 1.6712217926818305, "grad_norm": 1.0566657781600952, "learning_rate": 7.763366882879014e-06, "loss": 0.9367, "step": 9293 }, { "epoch": 1.6714016002876921, "grad_norm": 1.5279933214187622, "learning_rate": 7.762881473537514e-06, "loss": 0.7366, "step": 9294 }, { "epoch": 1.6715814078935538, "grad_norm": 1.4603952169418335, "learning_rate": 7.762396026707338e-06, "loss": 0.7346, "step": 9295 }, { "epoch": 1.6717612154994157, "grad_norm": 1.4435582160949707, "learning_rate": 7.761910542395073e-06, "loss": 0.8217, "step": 9296 }, { "epoch": 1.6719410231052774, "grad_norm": 1.561822533607483, "learning_rate": 7.761425020607305e-06, "loss": 0.7698, "step": 9297 }, { "epoch": 1.672120830711139, "grad_norm": 1.44832444190979, "learning_rate": 7.760939461350622e-06, "loss": 0.7109, "step": 9298 }, { "epoch": 1.672300638317001, "grad_norm": 1.2042163610458374, "learning_rate": 7.760453864631616e-06, "loss": 0.8992, "step": 9299 }, { "epoch": 1.6724804459228624, "grad_norm": 1.4968628883361816, "learning_rate": 7.759968230456873e-06, "loss": 0.701, "step": 9300 }, { "epoch": 1.6726602535287243, "grad_norm": 1.0920939445495605, "learning_rate": 7.759482558832982e-06, "loss": 0.9602, "step": 9301 }, { "epoch": 1.672840061134586, "grad_norm": 1.5836526155471802, "learning_rate": 7.758996849766533e-06, "loss": 0.7869, "step": 9302 }, { "epoch": 1.6730198687404476, "grad_norm": 1.55878484249115, "learning_rate": 7.758511103264116e-06, "loss": 0.7571, "step": 9303 }, { "epoch": 1.6731996763463095, "grad_norm": 3.228954315185547, "learning_rate": 7.758025319332323e-06, "loss": 0.7856, "step": 9304 }, { "epoch": 1.6733794839521712, "grad_norm": 1.5167129039764404, "learning_rate": 7.757539497977747e-06, "loss": 0.6839, "step": 9305 }, { "epoch": 1.6735592915580328, "grad_norm": 1.0276949405670166, "learning_rate": 7.757053639206977e-06, "loss": 0.91, "step": 9306 }, { "epoch": 1.6737390991638947, "grad_norm": 1.4367343187332153, "learning_rate": 7.756567743026608e-06, "loss": 0.7249, "step": 9307 }, { "epoch": 1.6739189067697564, "grad_norm": 1.4595921039581299, "learning_rate": 7.75608180944323e-06, "loss": 0.7517, "step": 9308 }, { "epoch": 1.674098714375618, "grad_norm": 1.4850189685821533, "learning_rate": 7.75559583846344e-06, "loss": 0.7221, "step": 9309 }, { "epoch": 1.67427852198148, "grad_norm": 1.4109632968902588, "learning_rate": 7.75510983009383e-06, "loss": 0.6997, "step": 9310 }, { "epoch": 1.6744583295873414, "grad_norm": 1.4989420175552368, "learning_rate": 7.754623784340993e-06, "loss": 0.7076, "step": 9311 }, { "epoch": 1.6746381371932033, "grad_norm": 1.4728553295135498, "learning_rate": 7.754137701211526e-06, "loss": 0.7471, "step": 9312 }, { "epoch": 1.674817944799065, "grad_norm": 1.4874070882797241, "learning_rate": 7.753651580712025e-06, "loss": 0.6838, "step": 9313 }, { "epoch": 1.6749977524049267, "grad_norm": 1.1170365810394287, "learning_rate": 7.753165422849086e-06, "loss": 0.9352, "step": 9314 }, { "epoch": 1.6751775600107885, "grad_norm": 1.4511278867721558, "learning_rate": 7.752679227629304e-06, "loss": 0.7623, "step": 9315 }, { "epoch": 1.6753573676166502, "grad_norm": 1.5036450624465942, "learning_rate": 7.752192995059276e-06, "loss": 0.7169, "step": 9316 }, { "epoch": 1.6755371752225119, "grad_norm": 1.6318954229354858, "learning_rate": 7.751706725145601e-06, "loss": 0.7291, "step": 9317 }, { "epoch": 1.6757169828283738, "grad_norm": 1.9917601346969604, "learning_rate": 7.751220417894876e-06, "loss": 0.7842, "step": 9318 }, { "epoch": 1.6758967904342352, "grad_norm": 1.4290826320648193, "learning_rate": 7.7507340733137e-06, "loss": 0.7658, "step": 9319 }, { "epoch": 1.6760765980400971, "grad_norm": 1.4590109586715698, "learning_rate": 7.750247691408672e-06, "loss": 0.7558, "step": 9320 }, { "epoch": 1.6762564056459588, "grad_norm": 1.1023468971252441, "learning_rate": 7.749761272186392e-06, "loss": 0.9434, "step": 9321 }, { "epoch": 1.6764362132518205, "grad_norm": 1.0491228103637695, "learning_rate": 7.74927481565346e-06, "loss": 0.9017, "step": 9322 }, { "epoch": 1.6766160208576824, "grad_norm": 1.1035314798355103, "learning_rate": 7.748788321816477e-06, "loss": 0.9772, "step": 9323 }, { "epoch": 1.676795828463544, "grad_norm": 1.5533138513565063, "learning_rate": 7.74830179068204e-06, "loss": 0.7316, "step": 9324 }, { "epoch": 1.6769756360694057, "grad_norm": 1.4797219038009644, "learning_rate": 7.747815222256756e-06, "loss": 0.8425, "step": 9325 }, { "epoch": 1.6771554436752676, "grad_norm": 1.5495007038116455, "learning_rate": 7.747328616547223e-06, "loss": 0.8079, "step": 9326 }, { "epoch": 1.677335251281129, "grad_norm": 1.5445448160171509, "learning_rate": 7.746841973560048e-06, "loss": 0.7469, "step": 9327 }, { "epoch": 1.677515058886991, "grad_norm": 1.4815174341201782, "learning_rate": 7.74635529330183e-06, "loss": 0.7502, "step": 9328 }, { "epoch": 1.6776948664928526, "grad_norm": 1.4613192081451416, "learning_rate": 7.745868575779176e-06, "loss": 0.7408, "step": 9329 }, { "epoch": 1.6778746740987143, "grad_norm": 1.3835968971252441, "learning_rate": 7.745381820998687e-06, "loss": 0.7047, "step": 9330 }, { "epoch": 1.6780544817045762, "grad_norm": 1.5629788637161255, "learning_rate": 7.74489502896697e-06, "loss": 0.8028, "step": 9331 }, { "epoch": 1.6782342893104378, "grad_norm": 1.550609827041626, "learning_rate": 7.744408199690628e-06, "loss": 0.7456, "step": 9332 }, { "epoch": 1.6784140969162995, "grad_norm": 1.4027644395828247, "learning_rate": 7.743921333176269e-06, "loss": 0.7383, "step": 9333 }, { "epoch": 1.6785939045221614, "grad_norm": 1.6320791244506836, "learning_rate": 7.743434429430496e-06, "loss": 0.7277, "step": 9334 }, { "epoch": 1.678773712128023, "grad_norm": 1.525590419769287, "learning_rate": 7.742947488459918e-06, "loss": 0.7023, "step": 9335 }, { "epoch": 1.6789535197338847, "grad_norm": 1.4640415906906128, "learning_rate": 7.742460510271143e-06, "loss": 0.6946, "step": 9336 }, { "epoch": 1.6791333273397466, "grad_norm": 1.5500715970993042, "learning_rate": 7.741973494870777e-06, "loss": 0.7111, "step": 9337 }, { "epoch": 1.679313134945608, "grad_norm": 1.604873776435852, "learning_rate": 7.741486442265428e-06, "loss": 0.7225, "step": 9338 }, { "epoch": 1.67949294255147, "grad_norm": 1.4705619812011719, "learning_rate": 7.740999352461707e-06, "loss": 0.922, "step": 9339 }, { "epoch": 1.6796727501573316, "grad_norm": 1.5549602508544922, "learning_rate": 7.74051222546622e-06, "loss": 0.9041, "step": 9340 }, { "epoch": 1.6798525577631933, "grad_norm": 1.2449430227279663, "learning_rate": 7.740025061285577e-06, "loss": 0.9459, "step": 9341 }, { "epoch": 1.6800323653690552, "grad_norm": 1.456619381904602, "learning_rate": 7.739537859926388e-06, "loss": 0.7332, "step": 9342 }, { "epoch": 1.6802121729749169, "grad_norm": 1.768462896347046, "learning_rate": 7.73905062139527e-06, "loss": 0.7346, "step": 9343 }, { "epoch": 1.6803919805807785, "grad_norm": 1.5188982486724854, "learning_rate": 7.738563345698824e-06, "loss": 0.7596, "step": 9344 }, { "epoch": 1.6805717881866404, "grad_norm": 1.5486036539077759, "learning_rate": 7.73807603284367e-06, "loss": 0.7325, "step": 9345 }, { "epoch": 1.6807515957925019, "grad_norm": 1.5540506839752197, "learning_rate": 7.737588682836414e-06, "loss": 0.7687, "step": 9346 }, { "epoch": 1.6809314033983638, "grad_norm": 1.1967494487762451, "learning_rate": 7.737101295683674e-06, "loss": 0.9633, "step": 9347 }, { "epoch": 1.6811112110042254, "grad_norm": 1.5581809282302856, "learning_rate": 7.73661387139206e-06, "loss": 0.7713, "step": 9348 }, { "epoch": 1.6812910186100871, "grad_norm": 1.4266620874404907, "learning_rate": 7.736126409968188e-06, "loss": 0.7011, "step": 9349 }, { "epoch": 1.681470826215949, "grad_norm": 1.3202705383300781, "learning_rate": 7.73563891141867e-06, "loss": 0.6983, "step": 9350 }, { "epoch": 1.6816506338218107, "grad_norm": 1.7271236181259155, "learning_rate": 7.73515137575012e-06, "loss": 0.7969, "step": 9351 }, { "epoch": 1.6818304414276724, "grad_norm": 1.3640847206115723, "learning_rate": 7.734663802969156e-06, "loss": 0.7254, "step": 9352 }, { "epoch": 1.6820102490335342, "grad_norm": 1.3815484046936035, "learning_rate": 7.734176193082393e-06, "loss": 0.7047, "step": 9353 }, { "epoch": 1.6821900566393957, "grad_norm": 1.5031561851501465, "learning_rate": 7.733688546096445e-06, "loss": 0.6931, "step": 9354 }, { "epoch": 1.6823698642452576, "grad_norm": 1.4330763816833496, "learning_rate": 7.733200862017932e-06, "loss": 0.6608, "step": 9355 }, { "epoch": 1.6825496718511193, "grad_norm": 1.4751592874526978, "learning_rate": 7.73271314085347e-06, "loss": 0.7677, "step": 9356 }, { "epoch": 1.682729479456981, "grad_norm": 1.5398366451263428, "learning_rate": 7.732225382609675e-06, "loss": 0.7275, "step": 9357 }, { "epoch": 1.6829092870628428, "grad_norm": 1.100224494934082, "learning_rate": 7.731737587293166e-06, "loss": 0.9212, "step": 9358 }, { "epoch": 1.6830890946687045, "grad_norm": 1.4846066236495972, "learning_rate": 7.731249754910564e-06, "loss": 0.7652, "step": 9359 }, { "epoch": 1.6832689022745662, "grad_norm": 1.060160517692566, "learning_rate": 7.730761885468486e-06, "loss": 0.8839, "step": 9360 }, { "epoch": 1.683448709880428, "grad_norm": 1.5642485618591309, "learning_rate": 7.730273978973552e-06, "loss": 0.7267, "step": 9361 }, { "epoch": 1.6836285174862897, "grad_norm": 1.4059737920761108, "learning_rate": 7.729786035432383e-06, "loss": 0.668, "step": 9362 }, { "epoch": 1.6838083250921514, "grad_norm": 1.5548326969146729, "learning_rate": 7.729298054851599e-06, "loss": 0.7102, "step": 9363 }, { "epoch": 1.6839881326980133, "grad_norm": 1.5408999919891357, "learning_rate": 7.728810037237822e-06, "loss": 0.7234, "step": 9364 }, { "epoch": 1.6841679403038747, "grad_norm": 1.522443413734436, "learning_rate": 7.728321982597673e-06, "loss": 0.7599, "step": 9365 }, { "epoch": 1.6843477479097366, "grad_norm": 1.4361367225646973, "learning_rate": 7.727833890937775e-06, "loss": 0.708, "step": 9366 }, { "epoch": 1.6845275555155983, "grad_norm": 1.4408161640167236, "learning_rate": 7.72734576226475e-06, "loss": 0.6752, "step": 9367 }, { "epoch": 1.68470736312146, "grad_norm": 1.5941522121429443, "learning_rate": 7.726857596585221e-06, "loss": 0.7713, "step": 9368 }, { "epoch": 1.6848871707273219, "grad_norm": 1.1790521144866943, "learning_rate": 7.726369393905814e-06, "loss": 0.8816, "step": 9369 }, { "epoch": 1.6850669783331835, "grad_norm": 1.5367072820663452, "learning_rate": 7.725881154233151e-06, "loss": 0.7315, "step": 9370 }, { "epoch": 1.6852467859390452, "grad_norm": 1.4821146726608276, "learning_rate": 7.725392877573859e-06, "loss": 0.7421, "step": 9371 }, { "epoch": 1.685426593544907, "grad_norm": 1.5274231433868408, "learning_rate": 7.724904563934559e-06, "loss": 0.6528, "step": 9372 }, { "epoch": 1.6856064011507685, "grad_norm": 1.104501724243164, "learning_rate": 7.724416213321882e-06, "loss": 0.9203, "step": 9373 }, { "epoch": 1.6857862087566304, "grad_norm": 1.6544491052627563, "learning_rate": 7.72392782574245e-06, "loss": 0.806, "step": 9374 }, { "epoch": 1.685966016362492, "grad_norm": 1.5382747650146484, "learning_rate": 7.72343940120289e-06, "loss": 0.7775, "step": 9375 }, { "epoch": 1.6861458239683538, "grad_norm": 1.4540715217590332, "learning_rate": 7.722950939709834e-06, "loss": 0.7109, "step": 9376 }, { "epoch": 1.6863256315742157, "grad_norm": 1.4671941995620728, "learning_rate": 7.722462441269905e-06, "loss": 0.7288, "step": 9377 }, { "epoch": 1.6865054391800773, "grad_norm": 1.5153722763061523, "learning_rate": 7.721973905889734e-06, "loss": 0.7589, "step": 9378 }, { "epoch": 1.686685246785939, "grad_norm": 1.4860568046569824, "learning_rate": 7.721485333575948e-06, "loss": 0.6942, "step": 9379 }, { "epoch": 1.686865054391801, "grad_norm": 1.5339937210083008, "learning_rate": 7.720996724335178e-06, "loss": 0.7279, "step": 9380 }, { "epoch": 1.6870448619976623, "grad_norm": 1.4422738552093506, "learning_rate": 7.720508078174052e-06, "loss": 0.7086, "step": 9381 }, { "epoch": 1.6872246696035242, "grad_norm": 1.4763693809509277, "learning_rate": 7.7200193950992e-06, "loss": 0.713, "step": 9382 }, { "epoch": 1.687404477209386, "grad_norm": 1.420395016670227, "learning_rate": 7.719530675117255e-06, "loss": 0.7205, "step": 9383 }, { "epoch": 1.6875842848152476, "grad_norm": 1.4617496728897095, "learning_rate": 7.719041918234849e-06, "loss": 0.705, "step": 9384 }, { "epoch": 1.6877640924211095, "grad_norm": 1.5066617727279663, "learning_rate": 7.718553124458609e-06, "loss": 0.6892, "step": 9385 }, { "epoch": 1.6879439000269711, "grad_norm": 1.6612558364868164, "learning_rate": 7.718064293795171e-06, "loss": 0.7475, "step": 9386 }, { "epoch": 1.6881237076328328, "grad_norm": 1.1702771186828613, "learning_rate": 7.717575426251167e-06, "loss": 0.9526, "step": 9387 }, { "epoch": 1.6883035152386947, "grad_norm": 1.6375652551651, "learning_rate": 7.71708652183323e-06, "loss": 0.6948, "step": 9388 }, { "epoch": 1.6884833228445564, "grad_norm": 1.466111421585083, "learning_rate": 7.716597580547995e-06, "loss": 0.6964, "step": 9389 }, { "epoch": 1.688663130450418, "grad_norm": 1.2722169160842896, "learning_rate": 7.716108602402094e-06, "loss": 0.9629, "step": 9390 }, { "epoch": 1.68884293805628, "grad_norm": 1.5283726453781128, "learning_rate": 7.715619587402165e-06, "loss": 0.7719, "step": 9391 }, { "epoch": 1.6890227456621414, "grad_norm": 1.4871834516525269, "learning_rate": 7.71513053555484e-06, "loss": 0.7512, "step": 9392 }, { "epoch": 1.6892025532680033, "grad_norm": 1.5391395092010498, "learning_rate": 7.714641446866757e-06, "loss": 0.6815, "step": 9393 }, { "epoch": 1.689382360873865, "grad_norm": 1.4523464441299438, "learning_rate": 7.714152321344553e-06, "loss": 0.6811, "step": 9394 }, { "epoch": 1.6895621684797266, "grad_norm": 1.1829112768173218, "learning_rate": 7.71366315899486e-06, "loss": 0.9111, "step": 9395 }, { "epoch": 1.6897419760855885, "grad_norm": 1.4969490766525269, "learning_rate": 7.71317395982432e-06, "loss": 0.7002, "step": 9396 }, { "epoch": 1.6899217836914502, "grad_norm": 1.4458650350570679, "learning_rate": 7.71268472383957e-06, "loss": 0.7498, "step": 9397 }, { "epoch": 1.6901015912973119, "grad_norm": 1.6146836280822754, "learning_rate": 7.712195451047247e-06, "loss": 0.7677, "step": 9398 }, { "epoch": 1.6902813989031737, "grad_norm": 1.4975076913833618, "learning_rate": 7.711706141453991e-06, "loss": 0.7751, "step": 9399 }, { "epoch": 1.6904612065090352, "grad_norm": 1.4670195579528809, "learning_rate": 7.711216795066441e-06, "loss": 0.7222, "step": 9400 }, { "epoch": 1.690641014114897, "grad_norm": 1.5463414192199707, "learning_rate": 7.710727411891237e-06, "loss": 0.7629, "step": 9401 }, { "epoch": 1.6908208217207588, "grad_norm": 1.0461225509643555, "learning_rate": 7.710237991935017e-06, "loss": 0.9026, "step": 9402 }, { "epoch": 1.6910006293266204, "grad_norm": 1.3320001363754272, "learning_rate": 7.709748535204425e-06, "loss": 0.7221, "step": 9403 }, { "epoch": 1.6911804369324823, "grad_norm": 1.4394831657409668, "learning_rate": 7.7092590417061e-06, "loss": 0.7594, "step": 9404 }, { "epoch": 1.691360244538344, "grad_norm": 1.4970650672912598, "learning_rate": 7.708769511446686e-06, "loss": 0.7504, "step": 9405 }, { "epoch": 1.6915400521442057, "grad_norm": 1.4976272583007812, "learning_rate": 7.708279944432823e-06, "loss": 0.7022, "step": 9406 }, { "epoch": 1.6917198597500676, "grad_norm": 1.098783016204834, "learning_rate": 7.707790340671156e-06, "loss": 0.9451, "step": 9407 }, { "epoch": 1.691899667355929, "grad_norm": 1.4572415351867676, "learning_rate": 7.707300700168327e-06, "loss": 0.7499, "step": 9408 }, { "epoch": 1.692079474961791, "grad_norm": 1.4954699277877808, "learning_rate": 7.706811022930978e-06, "loss": 0.6621, "step": 9409 }, { "epoch": 1.6922592825676526, "grad_norm": 1.087489366531372, "learning_rate": 7.706321308965757e-06, "loss": 0.8962, "step": 9410 }, { "epoch": 1.6924390901735142, "grad_norm": 1.587023377418518, "learning_rate": 7.705831558279307e-06, "loss": 0.7562, "step": 9411 }, { "epoch": 1.6926188977793761, "grad_norm": 1.4785014390945435, "learning_rate": 7.705341770878273e-06, "loss": 0.6864, "step": 9412 }, { "epoch": 1.6927987053852378, "grad_norm": 1.4492201805114746, "learning_rate": 7.704851946769299e-06, "loss": 0.7239, "step": 9413 }, { "epoch": 1.6929785129910995, "grad_norm": 1.5970710515975952, "learning_rate": 7.704362085959034e-06, "loss": 0.7219, "step": 9414 }, { "epoch": 1.6931583205969614, "grad_norm": 1.437921404838562, "learning_rate": 7.703872188454125e-06, "loss": 0.9077, "step": 9415 }, { "epoch": 1.693338128202823, "grad_norm": 1.1369229555130005, "learning_rate": 7.703382254261217e-06, "loss": 0.9479, "step": 9416 }, { "epoch": 1.6935179358086847, "grad_norm": 1.501562237739563, "learning_rate": 7.70289228338696e-06, "loss": 0.7703, "step": 9417 }, { "epoch": 1.6936977434145466, "grad_norm": 1.5532636642456055, "learning_rate": 7.702402275838002e-06, "loss": 0.7545, "step": 9418 }, { "epoch": 1.693877551020408, "grad_norm": 1.5072014331817627, "learning_rate": 7.70191223162099e-06, "loss": 0.7506, "step": 9419 }, { "epoch": 1.69405735862627, "grad_norm": 1.0439255237579346, "learning_rate": 7.701422150742575e-06, "loss": 0.932, "step": 9420 }, { "epoch": 1.6942371662321316, "grad_norm": 1.0574676990509033, "learning_rate": 7.700932033209406e-06, "loss": 0.9403, "step": 9421 }, { "epoch": 1.6944169738379933, "grad_norm": 1.0793170928955078, "learning_rate": 7.700441879028132e-06, "loss": 0.9055, "step": 9422 }, { "epoch": 1.6945967814438552, "grad_norm": 1.5101567506790161, "learning_rate": 7.699951688205405e-06, "loss": 0.7353, "step": 9423 }, { "epoch": 1.6947765890497168, "grad_norm": 1.3856881856918335, "learning_rate": 7.699461460747878e-06, "loss": 0.7271, "step": 9424 }, { "epoch": 1.6949563966555785, "grad_norm": 1.4078853130340576, "learning_rate": 7.6989711966622e-06, "loss": 0.9083, "step": 9425 }, { "epoch": 1.6951362042614404, "grad_norm": 1.570081353187561, "learning_rate": 7.698480895955024e-06, "loss": 0.77, "step": 9426 }, { "epoch": 1.6953160118673019, "grad_norm": 1.5555118322372437, "learning_rate": 7.697990558633003e-06, "loss": 0.7711, "step": 9427 }, { "epoch": 1.6954958194731637, "grad_norm": 1.5843963623046875, "learning_rate": 7.69750018470279e-06, "loss": 0.7504, "step": 9428 }, { "epoch": 1.6956756270790254, "grad_norm": 1.47349214553833, "learning_rate": 7.69700977417104e-06, "loss": 0.6701, "step": 9429 }, { "epoch": 1.695855434684887, "grad_norm": 1.1833124160766602, "learning_rate": 7.696519327044407e-06, "loss": 0.8597, "step": 9430 }, { "epoch": 1.696035242290749, "grad_norm": 1.4838664531707764, "learning_rate": 7.696028843329543e-06, "loss": 0.7392, "step": 9431 }, { "epoch": 1.6962150498966106, "grad_norm": 1.4721556901931763, "learning_rate": 7.695538323033108e-06, "loss": 0.6852, "step": 9432 }, { "epoch": 1.6963948575024723, "grad_norm": 1.4604403972625732, "learning_rate": 7.695047766161752e-06, "loss": 0.7865, "step": 9433 }, { "epoch": 1.6965746651083342, "grad_norm": 1.63369619846344, "learning_rate": 7.694557172722135e-06, "loss": 0.7614, "step": 9434 }, { "epoch": 1.6967544727141957, "grad_norm": 1.4160354137420654, "learning_rate": 7.694066542720911e-06, "loss": 0.6095, "step": 9435 }, { "epoch": 1.6969342803200576, "grad_norm": 1.5253781080245972, "learning_rate": 7.693575876164743e-06, "loss": 0.746, "step": 9436 }, { "epoch": 1.6971140879259192, "grad_norm": 1.5774011611938477, "learning_rate": 7.693085173060281e-06, "loss": 0.7403, "step": 9437 }, { "epoch": 1.697293895531781, "grad_norm": 1.512210726737976, "learning_rate": 7.69259443341419e-06, "loss": 0.6732, "step": 9438 }, { "epoch": 1.6974737031376428, "grad_norm": 1.1754412651062012, "learning_rate": 7.692103657233122e-06, "loss": 0.9854, "step": 9439 }, { "epoch": 1.6976535107435045, "grad_norm": 1.458345651626587, "learning_rate": 7.691612844523741e-06, "loss": 0.7467, "step": 9440 }, { "epoch": 1.6978333183493661, "grad_norm": 1.5207655429840088, "learning_rate": 7.691121995292708e-06, "loss": 0.7076, "step": 9441 }, { "epoch": 1.698013125955228, "grad_norm": 1.4328478574752808, "learning_rate": 7.690631109546678e-06, "loss": 0.6834, "step": 9442 }, { "epoch": 1.6981929335610895, "grad_norm": 1.571574091911316, "learning_rate": 7.690140187292314e-06, "loss": 0.7031, "step": 9443 }, { "epoch": 1.6983727411669514, "grad_norm": 1.512743353843689, "learning_rate": 7.68964922853628e-06, "loss": 0.6929, "step": 9444 }, { "epoch": 1.6985525487728133, "grad_norm": 1.4272329807281494, "learning_rate": 7.689158233285233e-06, "loss": 0.7014, "step": 9445 }, { "epoch": 1.6987323563786747, "grad_norm": 1.4970004558563232, "learning_rate": 7.688667201545838e-06, "loss": 0.8044, "step": 9446 }, { "epoch": 1.6989121639845366, "grad_norm": 1.4790462255477905, "learning_rate": 7.688176133324758e-06, "loss": 0.7412, "step": 9447 }, { "epoch": 1.6990919715903983, "grad_norm": 1.540419578552246, "learning_rate": 7.687685028628653e-06, "loss": 0.7416, "step": 9448 }, { "epoch": 1.69927177919626, "grad_norm": 1.4655736684799194, "learning_rate": 7.68719388746419e-06, "loss": 0.709, "step": 9449 }, { "epoch": 1.6994515868021218, "grad_norm": 1.599494218826294, "learning_rate": 7.686702709838032e-06, "loss": 0.7168, "step": 9450 }, { "epoch": 1.6996313944079835, "grad_norm": 1.5516672134399414, "learning_rate": 7.686211495756843e-06, "loss": 0.7441, "step": 9451 }, { "epoch": 1.6998112020138452, "grad_norm": 1.6111207008361816, "learning_rate": 7.68572024522729e-06, "loss": 0.7235, "step": 9452 }, { "epoch": 1.699991009619707, "grad_norm": 1.5953763723373413, "learning_rate": 7.685228958256036e-06, "loss": 0.6904, "step": 9453 }, { "epoch": 1.7001708172255685, "grad_norm": 1.1975663900375366, "learning_rate": 7.68473763484975e-06, "loss": 0.983, "step": 9454 }, { "epoch": 1.7003506248314304, "grad_norm": 1.2006800174713135, "learning_rate": 7.684246275015095e-06, "loss": 0.9461, "step": 9455 }, { "epoch": 1.700530432437292, "grad_norm": 1.4565794467926025, "learning_rate": 7.68375487875874e-06, "loss": 0.714, "step": 9456 }, { "epoch": 1.7007102400431537, "grad_norm": 1.3948854207992554, "learning_rate": 7.683263446087354e-06, "loss": 0.7182, "step": 9457 }, { "epoch": 1.7008900476490156, "grad_norm": 1.3373639583587646, "learning_rate": 7.682771977007604e-06, "loss": 0.6735, "step": 9458 }, { "epoch": 1.7010698552548773, "grad_norm": 1.4120622873306274, "learning_rate": 7.682280471526158e-06, "loss": 0.6938, "step": 9459 }, { "epoch": 1.701249662860739, "grad_norm": 1.4374724626541138, "learning_rate": 7.681788929649685e-06, "loss": 0.725, "step": 9460 }, { "epoch": 1.7014294704666009, "grad_norm": 1.4871665239334106, "learning_rate": 7.681297351384856e-06, "loss": 0.8051, "step": 9461 }, { "epoch": 1.7016092780724623, "grad_norm": 1.4450044631958008, "learning_rate": 7.68080573673834e-06, "loss": 0.6685, "step": 9462 }, { "epoch": 1.7017890856783242, "grad_norm": 1.5861928462982178, "learning_rate": 7.680314085716807e-06, "loss": 0.7329, "step": 9463 }, { "epoch": 1.7019688932841859, "grad_norm": 1.4533216953277588, "learning_rate": 7.679822398326931e-06, "loss": 0.6877, "step": 9464 }, { "epoch": 1.7021487008900476, "grad_norm": 1.4902347326278687, "learning_rate": 7.679330674575379e-06, "loss": 0.7289, "step": 9465 }, { "epoch": 1.7023285084959094, "grad_norm": 1.5586457252502441, "learning_rate": 7.678838914468827e-06, "loss": 0.711, "step": 9466 }, { "epoch": 1.7025083161017711, "grad_norm": 1.4219398498535156, "learning_rate": 7.678347118013944e-06, "loss": 0.6901, "step": 9467 }, { "epoch": 1.7026881237076328, "grad_norm": 1.4431406259536743, "learning_rate": 7.677855285217406e-06, "loss": 0.7249, "step": 9468 }, { "epoch": 1.7028679313134947, "grad_norm": 1.3836312294006348, "learning_rate": 7.677363416085886e-06, "loss": 0.764, "step": 9469 }, { "epoch": 1.7030477389193561, "grad_norm": 1.4709112644195557, "learning_rate": 7.676871510626057e-06, "loss": 0.7547, "step": 9470 }, { "epoch": 1.703227546525218, "grad_norm": 1.6776924133300781, "learning_rate": 7.676379568844592e-06, "loss": 0.7047, "step": 9471 }, { "epoch": 1.70340735413108, "grad_norm": 1.4684944152832031, "learning_rate": 7.67588759074817e-06, "loss": 0.7267, "step": 9472 }, { "epoch": 1.7035871617369414, "grad_norm": 1.6064951419830322, "learning_rate": 7.675395576343465e-06, "loss": 0.7615, "step": 9473 }, { "epoch": 1.7037669693428033, "grad_norm": 1.526611328125, "learning_rate": 7.674903525637153e-06, "loss": 0.7265, "step": 9474 }, { "epoch": 1.703946776948665, "grad_norm": 1.5422614812850952, "learning_rate": 7.674411438635909e-06, "loss": 0.7225, "step": 9475 }, { "epoch": 1.7041265845545266, "grad_norm": 1.570933222770691, "learning_rate": 7.673919315346412e-06, "loss": 0.7751, "step": 9476 }, { "epoch": 1.7043063921603885, "grad_norm": 1.5897895097732544, "learning_rate": 7.673427155775336e-06, "loss": 0.7341, "step": 9477 }, { "epoch": 1.7044861997662502, "grad_norm": 2.1557586193084717, "learning_rate": 7.672934959929363e-06, "loss": 0.6971, "step": 9478 }, { "epoch": 1.7046660073721118, "grad_norm": 1.4810645580291748, "learning_rate": 7.67244272781517e-06, "loss": 0.7183, "step": 9479 }, { "epoch": 1.7048458149779737, "grad_norm": 1.521802544593811, "learning_rate": 7.671950459439434e-06, "loss": 0.7822, "step": 9480 }, { "epoch": 1.7050256225838352, "grad_norm": 1.6208854913711548, "learning_rate": 7.671458154808838e-06, "loss": 0.725, "step": 9481 }, { "epoch": 1.705205430189697, "grad_norm": 1.5400257110595703, "learning_rate": 7.67096581393006e-06, "loss": 0.7161, "step": 9482 }, { "epoch": 1.7053852377955587, "grad_norm": 1.4784908294677734, "learning_rate": 7.670473436809782e-06, "loss": 0.7102, "step": 9483 }, { "epoch": 1.7055650454014204, "grad_norm": 1.114553451538086, "learning_rate": 7.669981023454682e-06, "loss": 0.9571, "step": 9484 }, { "epoch": 1.7057448530072823, "grad_norm": 1.4286490678787231, "learning_rate": 7.669488573871443e-06, "loss": 0.6826, "step": 9485 }, { "epoch": 1.705924660613144, "grad_norm": 1.7199796438217163, "learning_rate": 7.668996088066747e-06, "loss": 0.7707, "step": 9486 }, { "epoch": 1.7061044682190056, "grad_norm": 1.0796369314193726, "learning_rate": 7.668503566047275e-06, "loss": 0.9604, "step": 9487 }, { "epoch": 1.7062842758248675, "grad_norm": 1.0699880123138428, "learning_rate": 7.668011007819712e-06, "loss": 0.9297, "step": 9488 }, { "epoch": 1.706464083430729, "grad_norm": 1.5158183574676514, "learning_rate": 7.66751841339074e-06, "loss": 0.7622, "step": 9489 }, { "epoch": 1.7066438910365909, "grad_norm": 1.054179072380066, "learning_rate": 7.667025782767044e-06, "loss": 0.8928, "step": 9490 }, { "epoch": 1.7068236986424525, "grad_norm": 1.513206958770752, "learning_rate": 7.666533115955308e-06, "loss": 0.7921, "step": 9491 }, { "epoch": 1.7070035062483142, "grad_norm": 1.136136531829834, "learning_rate": 7.666040412962215e-06, "loss": 0.9011, "step": 9492 }, { "epoch": 1.707183313854176, "grad_norm": 1.4645413160324097, "learning_rate": 7.665547673794452e-06, "loss": 0.7072, "step": 9493 }, { "epoch": 1.7073631214600378, "grad_norm": 1.45308256149292, "learning_rate": 7.665054898458704e-06, "loss": 0.7375, "step": 9494 }, { "epoch": 1.7075429290658994, "grad_norm": 1.30874502658844, "learning_rate": 7.66456208696166e-06, "loss": 0.927, "step": 9495 }, { "epoch": 1.7077227366717613, "grad_norm": 1.427588701248169, "learning_rate": 7.664069239310003e-06, "loss": 0.7179, "step": 9496 }, { "epoch": 1.7079025442776228, "grad_norm": 1.776084065437317, "learning_rate": 7.663576355510423e-06, "loss": 0.7132, "step": 9497 }, { "epoch": 1.7080823518834847, "grad_norm": 1.4265034198760986, "learning_rate": 7.663083435569606e-06, "loss": 0.7681, "step": 9498 }, { "epoch": 1.7082621594893466, "grad_norm": 1.5346689224243164, "learning_rate": 7.662590479494243e-06, "loss": 0.7091, "step": 9499 }, { "epoch": 1.708441967095208, "grad_norm": 1.4180866479873657, "learning_rate": 7.66209748729102e-06, "loss": 0.6778, "step": 9500 }, { "epoch": 1.708441967095208, "eval_loss": 0.7913333177566528, "eval_runtime": 152.6355, "eval_samples_per_second": 94.224, "eval_steps_per_second": 1.474, "step": 9500 }, { "epoch": 1.70862177470107, "grad_norm": 1.1466652154922485, "learning_rate": 7.661604458966628e-06, "loss": 0.9567, "step": 9501 }, { "epoch": 1.7088015823069316, "grad_norm": 1.627600908279419, "learning_rate": 7.661111394527752e-06, "loss": 0.7336, "step": 9502 }, { "epoch": 1.7089813899127932, "grad_norm": 1.180016040802002, "learning_rate": 7.660618293981089e-06, "loss": 0.9094, "step": 9503 }, { "epoch": 1.7091611975186551, "grad_norm": 1.5346282720565796, "learning_rate": 7.660125157333327e-06, "loss": 0.7393, "step": 9504 }, { "epoch": 1.7093410051245168, "grad_norm": 1.3636409044265747, "learning_rate": 7.659631984591156e-06, "loss": 0.7234, "step": 9505 }, { "epoch": 1.7095208127303785, "grad_norm": 1.1380043029785156, "learning_rate": 7.65913877576127e-06, "loss": 0.9192, "step": 9506 }, { "epoch": 1.7097006203362404, "grad_norm": 1.4846729040145874, "learning_rate": 7.658645530850359e-06, "loss": 0.7042, "step": 9507 }, { "epoch": 1.7098804279421018, "grad_norm": 1.5277702808380127, "learning_rate": 7.658152249865117e-06, "loss": 0.7094, "step": 9508 }, { "epoch": 1.7100602355479637, "grad_norm": 1.4604370594024658, "learning_rate": 7.657658932812238e-06, "loss": 0.8133, "step": 9509 }, { "epoch": 1.7102400431538254, "grad_norm": 1.5258249044418335, "learning_rate": 7.657165579698413e-06, "loss": 0.7269, "step": 9510 }, { "epoch": 1.710419850759687, "grad_norm": 1.454419493675232, "learning_rate": 7.656672190530338e-06, "loss": 0.6979, "step": 9511 }, { "epoch": 1.710599658365549, "grad_norm": 1.4932500123977661, "learning_rate": 7.656178765314708e-06, "loss": 0.7379, "step": 9512 }, { "epoch": 1.7107794659714106, "grad_norm": 1.657121181488037, "learning_rate": 7.655685304058217e-06, "loss": 0.7774, "step": 9513 }, { "epoch": 1.7109592735772723, "grad_norm": 1.7041985988616943, "learning_rate": 7.65519180676756e-06, "loss": 0.7434, "step": 9514 }, { "epoch": 1.7111390811831342, "grad_norm": 1.527085542678833, "learning_rate": 7.654698273449435e-06, "loss": 0.7863, "step": 9515 }, { "epoch": 1.7113188887889956, "grad_norm": 1.4506587982177734, "learning_rate": 7.654204704110537e-06, "loss": 0.7188, "step": 9516 }, { "epoch": 1.7114986963948575, "grad_norm": 1.581845998764038, "learning_rate": 7.653711098757566e-06, "loss": 0.7276, "step": 9517 }, { "epoch": 1.7116785040007192, "grad_norm": 1.5441913604736328, "learning_rate": 7.653217457397215e-06, "loss": 0.7896, "step": 9518 }, { "epoch": 1.7118583116065809, "grad_norm": 1.528225064277649, "learning_rate": 7.652723780036187e-06, "loss": 0.8304, "step": 9519 }, { "epoch": 1.7120381192124428, "grad_norm": 1.532849669456482, "learning_rate": 7.652230066681174e-06, "loss": 0.7565, "step": 9520 }, { "epoch": 1.7122179268183044, "grad_norm": 1.4638336896896362, "learning_rate": 7.651736317338883e-06, "loss": 0.6682, "step": 9521 }, { "epoch": 1.712397734424166, "grad_norm": 1.494817852973938, "learning_rate": 7.651242532016007e-06, "loss": 0.7179, "step": 9522 }, { "epoch": 1.712577542030028, "grad_norm": 1.446022391319275, "learning_rate": 7.650748710719251e-06, "loss": 0.6926, "step": 9523 }, { "epoch": 1.7127573496358894, "grad_norm": 1.5970680713653564, "learning_rate": 7.650254853455313e-06, "loss": 0.6887, "step": 9524 }, { "epoch": 1.7129371572417513, "grad_norm": 1.525483250617981, "learning_rate": 7.649760960230893e-06, "loss": 0.6825, "step": 9525 }, { "epoch": 1.713116964847613, "grad_norm": 1.469682216644287, "learning_rate": 7.649267031052692e-06, "loss": 0.723, "step": 9526 }, { "epoch": 1.7132967724534747, "grad_norm": 1.1172302961349487, "learning_rate": 7.648773065927415e-06, "loss": 0.929, "step": 9527 }, { "epoch": 1.7134765800593366, "grad_norm": 1.5646966695785522, "learning_rate": 7.648279064861763e-06, "loss": 0.7687, "step": 9528 }, { "epoch": 1.7136563876651982, "grad_norm": 1.443150520324707, "learning_rate": 7.64778502786244e-06, "loss": 0.7726, "step": 9529 }, { "epoch": 1.71383619527106, "grad_norm": 1.6254230737686157, "learning_rate": 7.647290954936149e-06, "loss": 0.7204, "step": 9530 }, { "epoch": 1.7140160028769218, "grad_norm": 1.5408929586410522, "learning_rate": 7.646796846089593e-06, "loss": 0.685, "step": 9531 }, { "epoch": 1.7141958104827835, "grad_norm": 1.4817724227905273, "learning_rate": 7.646302701329474e-06, "loss": 0.7692, "step": 9532 }, { "epoch": 1.7143756180886451, "grad_norm": 3.04731822013855, "learning_rate": 7.645808520662504e-06, "loss": 0.7314, "step": 9533 }, { "epoch": 1.714555425694507, "grad_norm": 1.487980842590332, "learning_rate": 7.64531430409538e-06, "loss": 0.7425, "step": 9534 }, { "epoch": 1.7147352333003685, "grad_norm": 1.47427499294281, "learning_rate": 7.644820051634813e-06, "loss": 0.7522, "step": 9535 }, { "epoch": 1.7149150409062304, "grad_norm": 1.682055115699768, "learning_rate": 7.644325763287509e-06, "loss": 0.7866, "step": 9536 }, { "epoch": 1.715094848512092, "grad_norm": 1.5979613065719604, "learning_rate": 7.643831439060175e-06, "loss": 0.7236, "step": 9537 }, { "epoch": 1.7152746561179537, "grad_norm": 1.5265613794326782, "learning_rate": 7.643337078959515e-06, "loss": 0.6663, "step": 9538 }, { "epoch": 1.7154544637238156, "grad_norm": 1.5527195930480957, "learning_rate": 7.64284268299224e-06, "loss": 0.7188, "step": 9539 }, { "epoch": 1.7156342713296773, "grad_norm": 1.5229251384735107, "learning_rate": 7.642348251165058e-06, "loss": 0.7312, "step": 9540 }, { "epoch": 1.715814078935539, "grad_norm": 1.562731146812439, "learning_rate": 7.641853783484678e-06, "loss": 0.762, "step": 9541 }, { "epoch": 1.7159938865414008, "grad_norm": 1.5099304914474487, "learning_rate": 7.641359279957807e-06, "loss": 0.7571, "step": 9542 }, { "epoch": 1.7161736941472623, "grad_norm": 1.5118463039398193, "learning_rate": 7.640864740591158e-06, "loss": 0.6393, "step": 9543 }, { "epoch": 1.7163535017531242, "grad_norm": 1.4687100648880005, "learning_rate": 7.64037016539144e-06, "loss": 0.7536, "step": 9544 }, { "epoch": 1.7165333093589858, "grad_norm": 1.493607521057129, "learning_rate": 7.639875554365364e-06, "loss": 0.7262, "step": 9545 }, { "epoch": 1.7167131169648475, "grad_norm": 1.4613462686538696, "learning_rate": 7.639380907519638e-06, "loss": 0.7298, "step": 9546 }, { "epoch": 1.7168929245707094, "grad_norm": 1.1218647956848145, "learning_rate": 7.638886224860977e-06, "loss": 0.9424, "step": 9547 }, { "epoch": 1.717072732176571, "grad_norm": 1.5346825122833252, "learning_rate": 7.638391506396093e-06, "loss": 0.7368, "step": 9548 }, { "epoch": 1.7172525397824328, "grad_norm": 1.1718555688858032, "learning_rate": 7.637896752131699e-06, "loss": 0.8887, "step": 9549 }, { "epoch": 1.7174323473882946, "grad_norm": 1.4708560705184937, "learning_rate": 7.637401962074506e-06, "loss": 0.7608, "step": 9550 }, { "epoch": 1.717612154994156, "grad_norm": 1.5054810047149658, "learning_rate": 7.636907136231228e-06, "loss": 0.7291, "step": 9551 }, { "epoch": 1.717791962600018, "grad_norm": 1.4507927894592285, "learning_rate": 7.636412274608583e-06, "loss": 0.717, "step": 9552 }, { "epoch": 1.7179717702058797, "grad_norm": 1.592686653137207, "learning_rate": 7.635917377213283e-06, "loss": 0.7756, "step": 9553 }, { "epoch": 1.7181515778117413, "grad_norm": 1.0824263095855713, "learning_rate": 7.635422444052039e-06, "loss": 0.9722, "step": 9554 }, { "epoch": 1.7183313854176032, "grad_norm": 1.5113598108291626, "learning_rate": 7.634927475131574e-06, "loss": 0.724, "step": 9555 }, { "epoch": 1.718511193023465, "grad_norm": 1.5451525449752808, "learning_rate": 7.6344324704586e-06, "loss": 0.75, "step": 9556 }, { "epoch": 1.7186910006293266, "grad_norm": 1.4527812004089355, "learning_rate": 7.633937430039831e-06, "loss": 0.7813, "step": 9557 }, { "epoch": 1.7188708082351885, "grad_norm": 1.503665566444397, "learning_rate": 7.63344235388199e-06, "loss": 0.7019, "step": 9558 }, { "epoch": 1.7190506158410501, "grad_norm": 1.4270033836364746, "learning_rate": 7.632947241991792e-06, "loss": 0.7485, "step": 9559 }, { "epoch": 1.7192304234469118, "grad_norm": 1.0952821969985962, "learning_rate": 7.632452094375952e-06, "loss": 0.8967, "step": 9560 }, { "epoch": 1.7194102310527737, "grad_norm": 1.5762746334075928, "learning_rate": 7.631956911041195e-06, "loss": 0.7121, "step": 9561 }, { "epoch": 1.7195900386586351, "grad_norm": 1.052423119544983, "learning_rate": 7.631461691994233e-06, "loss": 0.9267, "step": 9562 }, { "epoch": 1.719769846264497, "grad_norm": 1.4929510354995728, "learning_rate": 7.630966437241791e-06, "loss": 0.8203, "step": 9563 }, { "epoch": 1.7199496538703587, "grad_norm": 1.0347630977630615, "learning_rate": 7.630471146790586e-06, "loss": 0.8973, "step": 9564 }, { "epoch": 1.7201294614762204, "grad_norm": 1.5406240224838257, "learning_rate": 7.629975820647339e-06, "loss": 0.6745, "step": 9565 }, { "epoch": 1.7203092690820823, "grad_norm": 1.4618513584136963, "learning_rate": 7.629480458818771e-06, "loss": 0.6866, "step": 9566 }, { "epoch": 1.720489076687944, "grad_norm": 1.0290231704711914, "learning_rate": 7.628985061311603e-06, "loss": 0.9076, "step": 9567 }, { "epoch": 1.7206688842938056, "grad_norm": 1.4546289443969727, "learning_rate": 7.628489628132558e-06, "loss": 0.6965, "step": 9568 }, { "epoch": 1.7208486918996675, "grad_norm": 1.4594758749008179, "learning_rate": 7.6279941592883564e-06, "loss": 0.7171, "step": 9569 }, { "epoch": 1.721028499505529, "grad_norm": 1.4224016666412354, "learning_rate": 7.627498654785724e-06, "loss": 0.7294, "step": 9570 }, { "epoch": 1.7212083071113908, "grad_norm": 1.5169926881790161, "learning_rate": 7.627003114631382e-06, "loss": 0.7615, "step": 9571 }, { "epoch": 1.7213881147172525, "grad_norm": 1.4770019054412842, "learning_rate": 7.626507538832053e-06, "loss": 0.7043, "step": 9572 }, { "epoch": 1.7215679223231142, "grad_norm": 1.481811285018921, "learning_rate": 7.626011927394466e-06, "loss": 0.7366, "step": 9573 }, { "epoch": 1.721747729928976, "grad_norm": 1.4855772256851196, "learning_rate": 7.62551628032534e-06, "loss": 0.7108, "step": 9574 }, { "epoch": 1.7219275375348377, "grad_norm": 1.4790689945220947, "learning_rate": 7.625020597631405e-06, "loss": 0.7194, "step": 9575 }, { "epoch": 1.7221073451406994, "grad_norm": 1.089300274848938, "learning_rate": 7.624524879319384e-06, "loss": 0.9037, "step": 9576 }, { "epoch": 1.7222871527465613, "grad_norm": 1.1520472764968872, "learning_rate": 7.624029125396004e-06, "loss": 0.8904, "step": 9577 }, { "epoch": 1.7224669603524227, "grad_norm": 1.1424901485443115, "learning_rate": 7.623533335867992e-06, "loss": 0.9319, "step": 9578 }, { "epoch": 1.7226467679582846, "grad_norm": 1.5159213542938232, "learning_rate": 7.623037510742075e-06, "loss": 0.7889, "step": 9579 }, { "epoch": 1.7228265755641463, "grad_norm": 1.5551340579986572, "learning_rate": 7.62254165002498e-06, "loss": 0.7405, "step": 9580 }, { "epoch": 1.723006383170008, "grad_norm": 1.4645164012908936, "learning_rate": 7.6220457537234384e-06, "loss": 0.7531, "step": 9581 }, { "epoch": 1.7231861907758699, "grad_norm": 1.882377028465271, "learning_rate": 7.621549821844174e-06, "loss": 0.7046, "step": 9582 }, { "epoch": 1.7233659983817315, "grad_norm": 1.538437843322754, "learning_rate": 7.621053854393921e-06, "loss": 0.7044, "step": 9583 }, { "epoch": 1.7235458059875932, "grad_norm": 1.5290454626083374, "learning_rate": 7.620557851379403e-06, "loss": 0.7554, "step": 9584 }, { "epoch": 1.723725613593455, "grad_norm": 1.2799632549285889, "learning_rate": 7.6200618128073555e-06, "loss": 0.9532, "step": 9585 }, { "epoch": 1.7239054211993168, "grad_norm": 1.5383533239364624, "learning_rate": 7.619565738684507e-06, "loss": 0.7778, "step": 9586 }, { "epoch": 1.7240852288051784, "grad_norm": 1.452824592590332, "learning_rate": 7.619069629017589e-06, "loss": 0.7657, "step": 9587 }, { "epoch": 1.7242650364110403, "grad_norm": 1.4763877391815186, "learning_rate": 7.618573483813332e-06, "loss": 0.7283, "step": 9588 }, { "epoch": 1.7244448440169018, "grad_norm": 1.542962908744812, "learning_rate": 7.618077303078469e-06, "loss": 0.7175, "step": 9589 }, { "epoch": 1.7246246516227637, "grad_norm": 1.5775245428085327, "learning_rate": 7.617581086819732e-06, "loss": 0.7141, "step": 9590 }, { "epoch": 1.7248044592286254, "grad_norm": 1.4879223108291626, "learning_rate": 7.617084835043853e-06, "loss": 0.7458, "step": 9591 }, { "epoch": 1.724984266834487, "grad_norm": 1.4676401615142822, "learning_rate": 7.616588547757569e-06, "loss": 0.745, "step": 9592 }, { "epoch": 1.725164074440349, "grad_norm": 1.2641085386276245, "learning_rate": 7.61609222496761e-06, "loss": 0.9164, "step": 9593 }, { "epoch": 1.7253438820462106, "grad_norm": 1.438889741897583, "learning_rate": 7.615595866680714e-06, "loss": 0.7274, "step": 9594 }, { "epoch": 1.7255236896520723, "grad_norm": 1.4821715354919434, "learning_rate": 7.615099472903613e-06, "loss": 0.6951, "step": 9595 }, { "epoch": 1.7257034972579341, "grad_norm": 1.4758223295211792, "learning_rate": 7.614603043643044e-06, "loss": 0.762, "step": 9596 }, { "epoch": 1.7258833048637956, "grad_norm": 1.09391450881958, "learning_rate": 7.614106578905742e-06, "loss": 0.9314, "step": 9597 }, { "epoch": 1.7260631124696575, "grad_norm": 1.1363087892532349, "learning_rate": 7.613610078698444e-06, "loss": 0.9016, "step": 9598 }, { "epoch": 1.7262429200755192, "grad_norm": 1.023240327835083, "learning_rate": 7.613113543027888e-06, "loss": 0.948, "step": 9599 }, { "epoch": 1.7264227276813808, "grad_norm": 1.3981502056121826, "learning_rate": 7.612616971900808e-06, "loss": 0.7596, "step": 9600 }, { "epoch": 1.7266025352872427, "grad_norm": 1.478158712387085, "learning_rate": 7.612120365323943e-06, "loss": 0.6893, "step": 9601 }, { "epoch": 1.7267823428931044, "grad_norm": 1.4844560623168945, "learning_rate": 7.611623723304034e-06, "loss": 0.709, "step": 9602 }, { "epoch": 1.726962150498966, "grad_norm": 1.5543699264526367, "learning_rate": 7.611127045847817e-06, "loss": 0.6976, "step": 9603 }, { "epoch": 1.727141958104828, "grad_norm": 1.5297818183898926, "learning_rate": 7.610630332962032e-06, "loss": 0.753, "step": 9604 }, { "epoch": 1.7273217657106894, "grad_norm": 1.4918149709701538, "learning_rate": 7.610133584653421e-06, "loss": 0.7636, "step": 9605 }, { "epoch": 1.7275015733165513, "grad_norm": 1.563568353652954, "learning_rate": 7.609636800928719e-06, "loss": 0.7984, "step": 9606 }, { "epoch": 1.727681380922413, "grad_norm": 1.158875584602356, "learning_rate": 7.609139981794672e-06, "loss": 0.9456, "step": 9607 }, { "epoch": 1.7278611885282746, "grad_norm": 1.2617828845977783, "learning_rate": 7.608643127258018e-06, "loss": 0.9142, "step": 9608 }, { "epoch": 1.7280409961341365, "grad_norm": 1.5750901699066162, "learning_rate": 7.6081462373255e-06, "loss": 0.7794, "step": 9609 }, { "epoch": 1.7282208037399982, "grad_norm": 1.4220490455627441, "learning_rate": 7.60764931200386e-06, "loss": 0.7013, "step": 9610 }, { "epoch": 1.7284006113458599, "grad_norm": 1.5080052614212036, "learning_rate": 7.607152351299841e-06, "loss": 0.6793, "step": 9611 }, { "epoch": 1.7285804189517218, "grad_norm": 1.5701146125793457, "learning_rate": 7.606655355220183e-06, "loss": 0.7565, "step": 9612 }, { "epoch": 1.7287602265575834, "grad_norm": 1.607469916343689, "learning_rate": 7.606158323771634e-06, "loss": 0.7702, "step": 9613 }, { "epoch": 1.728940034163445, "grad_norm": 1.4986008405685425, "learning_rate": 7.605661256960936e-06, "loss": 0.7343, "step": 9614 }, { "epoch": 1.729119841769307, "grad_norm": 1.6289793252944946, "learning_rate": 7.605164154794834e-06, "loss": 0.7291, "step": 9615 }, { "epoch": 1.7292996493751684, "grad_norm": 1.1764695644378662, "learning_rate": 7.604667017280072e-06, "loss": 0.9086, "step": 9616 }, { "epoch": 1.7294794569810303, "grad_norm": 1.5264861583709717, "learning_rate": 7.604169844423397e-06, "loss": 0.6961, "step": 9617 }, { "epoch": 1.729659264586892, "grad_norm": 1.4780386686325073, "learning_rate": 7.603672636231554e-06, "loss": 0.7651, "step": 9618 }, { "epoch": 1.7298390721927537, "grad_norm": 1.3792699575424194, "learning_rate": 7.603175392711289e-06, "loss": 0.6926, "step": 9619 }, { "epoch": 1.7300188797986156, "grad_norm": 1.1807711124420166, "learning_rate": 7.60267811386935e-06, "loss": 0.9032, "step": 9620 }, { "epoch": 1.7301986874044772, "grad_norm": 1.2751671075820923, "learning_rate": 7.602180799712485e-06, "loss": 0.9216, "step": 9621 }, { "epoch": 1.730378495010339, "grad_norm": 1.491129994392395, "learning_rate": 7.6016834502474415e-06, "loss": 0.7257, "step": 9622 }, { "epoch": 1.7305583026162008, "grad_norm": 1.1194509267807007, "learning_rate": 7.6011860654809655e-06, "loss": 0.8841, "step": 9623 }, { "epoch": 1.7307381102220623, "grad_norm": 1.481885552406311, "learning_rate": 7.600688645419807e-06, "loss": 0.7121, "step": 9624 }, { "epoch": 1.7309179178279241, "grad_norm": 1.535595178604126, "learning_rate": 7.600191190070718e-06, "loss": 0.7589, "step": 9625 }, { "epoch": 1.7310977254337858, "grad_norm": 1.591878890991211, "learning_rate": 7.5996936994404465e-06, "loss": 0.7014, "step": 9626 }, { "epoch": 1.7312775330396475, "grad_norm": 1.49924635887146, "learning_rate": 7.599196173535741e-06, "loss": 0.722, "step": 9627 }, { "epoch": 1.7314573406455094, "grad_norm": 1.4629435539245605, "learning_rate": 7.598698612363355e-06, "loss": 0.7073, "step": 9628 }, { "epoch": 1.731637148251371, "grad_norm": 1.5802209377288818, "learning_rate": 7.598201015930038e-06, "loss": 0.7339, "step": 9629 }, { "epoch": 1.7318169558572327, "grad_norm": 1.581957459449768, "learning_rate": 7.597703384242544e-06, "loss": 0.8089, "step": 9630 }, { "epoch": 1.7319967634630946, "grad_norm": 1.681734561920166, "learning_rate": 7.597205717307623e-06, "loss": 0.8048, "step": 9631 }, { "epoch": 1.732176571068956, "grad_norm": 1.2217530012130737, "learning_rate": 7.596708015132028e-06, "loss": 0.9261, "step": 9632 }, { "epoch": 1.732356378674818, "grad_norm": 1.4553371667861938, "learning_rate": 7.596210277722511e-06, "loss": 0.6979, "step": 9633 }, { "epoch": 1.7325361862806796, "grad_norm": 1.380274772644043, "learning_rate": 7.595712505085828e-06, "loss": 0.7399, "step": 9634 }, { "epoch": 1.7327159938865413, "grad_norm": 1.1453702449798584, "learning_rate": 7.595214697228732e-06, "loss": 0.8928, "step": 9635 }, { "epoch": 1.7328958014924032, "grad_norm": 1.4020919799804688, "learning_rate": 7.59471685415798e-06, "loss": 0.6876, "step": 9636 }, { "epoch": 1.7330756090982649, "grad_norm": 1.5215498208999634, "learning_rate": 7.594218975880323e-06, "loss": 0.7784, "step": 9637 }, { "epoch": 1.7332554167041265, "grad_norm": 1.4456101655960083, "learning_rate": 7.5937210624025196e-06, "loss": 0.6986, "step": 9638 }, { "epoch": 1.7334352243099884, "grad_norm": 1.6950796842575073, "learning_rate": 7.593223113731323e-06, "loss": 0.7801, "step": 9639 }, { "epoch": 1.73361503191585, "grad_norm": 1.7980250120162964, "learning_rate": 7.592725129873493e-06, "loss": 0.7324, "step": 9640 }, { "epoch": 1.7337948395217118, "grad_norm": 1.4732208251953125, "learning_rate": 7.592227110835784e-06, "loss": 0.6481, "step": 9641 }, { "epoch": 1.7339746471275737, "grad_norm": 1.1067544221878052, "learning_rate": 7.591729056624955e-06, "loss": 0.9471, "step": 9642 }, { "epoch": 1.734154454733435, "grad_norm": 1.3599936962127686, "learning_rate": 7.5912309672477635e-06, "loss": 0.6636, "step": 9643 }, { "epoch": 1.734334262339297, "grad_norm": 1.1661601066589355, "learning_rate": 7.5907328427109685e-06, "loss": 0.8879, "step": 9644 }, { "epoch": 1.7345140699451587, "grad_norm": 1.4984009265899658, "learning_rate": 7.590234683021327e-06, "loss": 0.7051, "step": 9645 }, { "epoch": 1.7346938775510203, "grad_norm": 1.5929569005966187, "learning_rate": 7.5897364881856e-06, "loss": 0.7289, "step": 9646 }, { "epoch": 1.7348736851568822, "grad_norm": 1.5089844465255737, "learning_rate": 7.589238258210545e-06, "loss": 0.7068, "step": 9647 }, { "epoch": 1.735053492762744, "grad_norm": 1.5461664199829102, "learning_rate": 7.588739993102927e-06, "loss": 0.7208, "step": 9648 }, { "epoch": 1.7352333003686056, "grad_norm": 1.026774525642395, "learning_rate": 7.5882416928695035e-06, "loss": 0.9338, "step": 9649 }, { "epoch": 1.7354131079744675, "grad_norm": 1.4395300149917603, "learning_rate": 7.587743357517036e-06, "loss": 0.7864, "step": 9650 }, { "epoch": 1.735592915580329, "grad_norm": 2.0055558681488037, "learning_rate": 7.587244987052287e-06, "loss": 0.7046, "step": 9651 }, { "epoch": 1.7357727231861908, "grad_norm": 1.290809154510498, "learning_rate": 7.5867465814820185e-06, "loss": 0.9258, "step": 9652 }, { "epoch": 1.7359525307920525, "grad_norm": 1.5064153671264648, "learning_rate": 7.5862481408129916e-06, "loss": 0.7113, "step": 9653 }, { "epoch": 1.7361323383979141, "grad_norm": 1.48502516746521, "learning_rate": 7.585749665051972e-06, "loss": 0.7034, "step": 9654 }, { "epoch": 1.736312146003776, "grad_norm": 1.5649218559265137, "learning_rate": 7.585251154205722e-06, "loss": 0.7561, "step": 9655 }, { "epoch": 1.7364919536096377, "grad_norm": 1.3410766124725342, "learning_rate": 7.584752608281006e-06, "loss": 0.6767, "step": 9656 }, { "epoch": 1.7366717612154994, "grad_norm": 1.4867678880691528, "learning_rate": 7.584254027284588e-06, "loss": 0.7644, "step": 9657 }, { "epoch": 1.7368515688213613, "grad_norm": 1.50770103931427, "learning_rate": 7.583755411223236e-06, "loss": 0.7107, "step": 9658 }, { "epoch": 1.7370313764272227, "grad_norm": 1.5942158699035645, "learning_rate": 7.583256760103712e-06, "loss": 0.8135, "step": 9659 }, { "epoch": 1.7372111840330846, "grad_norm": 1.601393699645996, "learning_rate": 7.5827580739327835e-06, "loss": 0.7005, "step": 9660 }, { "epoch": 1.7373909916389463, "grad_norm": 1.47701895236969, "learning_rate": 7.582259352717216e-06, "loss": 0.7203, "step": 9661 }, { "epoch": 1.737570799244808, "grad_norm": 1.5142738819122314, "learning_rate": 7.581760596463778e-06, "loss": 0.7489, "step": 9662 }, { "epoch": 1.7377506068506698, "grad_norm": 1.5300887823104858, "learning_rate": 7.581261805179236e-06, "loss": 0.7392, "step": 9663 }, { "epoch": 1.7379304144565315, "grad_norm": 1.5360053777694702, "learning_rate": 7.58076297887036e-06, "loss": 0.6669, "step": 9664 }, { "epoch": 1.7381102220623932, "grad_norm": 1.5151009559631348, "learning_rate": 7.580264117543914e-06, "loss": 0.7516, "step": 9665 }, { "epoch": 1.738290029668255, "grad_norm": 1.5523549318313599, "learning_rate": 7.579765221206672e-06, "loss": 0.768, "step": 9666 }, { "epoch": 1.7384698372741167, "grad_norm": 1.4690735340118408, "learning_rate": 7.579266289865399e-06, "loss": 0.7538, "step": 9667 }, { "epoch": 1.7386496448799784, "grad_norm": 1.4635401964187622, "learning_rate": 7.5787673235268675e-06, "loss": 0.7562, "step": 9668 }, { "epoch": 1.7388294524858403, "grad_norm": 1.6463611125946045, "learning_rate": 7.578268322197847e-06, "loss": 0.687, "step": 9669 }, { "epoch": 1.7390092600917018, "grad_norm": 1.5110501050949097, "learning_rate": 7.57776928588511e-06, "loss": 0.76, "step": 9670 }, { "epoch": 1.7391890676975637, "grad_norm": 1.4862900972366333, "learning_rate": 7.577270214595424e-06, "loss": 0.74, "step": 9671 }, { "epoch": 1.7393688753034253, "grad_norm": 1.4964402914047241, "learning_rate": 7.576771108335565e-06, "loss": 0.7306, "step": 9672 }, { "epoch": 1.739548682909287, "grad_norm": 1.475831151008606, "learning_rate": 7.576271967112301e-06, "loss": 0.82, "step": 9673 }, { "epoch": 1.7397284905151489, "grad_norm": 1.4576349258422852, "learning_rate": 7.575772790932407e-06, "loss": 0.7281, "step": 9674 }, { "epoch": 1.7399082981210106, "grad_norm": 1.652455449104309, "learning_rate": 7.575273579802658e-06, "loss": 0.771, "step": 9675 }, { "epoch": 1.7400881057268722, "grad_norm": 1.5246609449386597, "learning_rate": 7.574774333729824e-06, "loss": 0.7237, "step": 9676 }, { "epoch": 1.7402679133327341, "grad_norm": 1.5502080917358398, "learning_rate": 7.574275052720681e-06, "loss": 0.7865, "step": 9677 }, { "epoch": 1.7404477209385956, "grad_norm": 1.4389753341674805, "learning_rate": 7.573775736782003e-06, "loss": 0.7197, "step": 9678 }, { "epoch": 1.7406275285444575, "grad_norm": 1.424812912940979, "learning_rate": 7.573276385920565e-06, "loss": 0.7267, "step": 9679 }, { "epoch": 1.7408073361503191, "grad_norm": 1.56007719039917, "learning_rate": 7.572777000143145e-06, "loss": 0.7129, "step": 9680 }, { "epoch": 1.7409871437561808, "grad_norm": 1.482908010482788, "learning_rate": 7.572277579456515e-06, "loss": 0.7135, "step": 9681 }, { "epoch": 1.7411669513620427, "grad_norm": 1.2745171785354614, "learning_rate": 7.5717781238674545e-06, "loss": 0.9233, "step": 9682 }, { "epoch": 1.7413467589679044, "grad_norm": 1.2986273765563965, "learning_rate": 7.571278633382739e-06, "loss": 0.9022, "step": 9683 }, { "epoch": 1.741526566573766, "grad_norm": 1.5570189952850342, "learning_rate": 7.5707791080091476e-06, "loss": 0.7655, "step": 9684 }, { "epoch": 1.741706374179628, "grad_norm": 1.435353398323059, "learning_rate": 7.570279547753454e-06, "loss": 0.7039, "step": 9685 }, { "epoch": 1.7418861817854894, "grad_norm": 1.5303641557693481, "learning_rate": 7.569779952622442e-06, "loss": 0.7391, "step": 9686 }, { "epoch": 1.7420659893913513, "grad_norm": 1.4077316522598267, "learning_rate": 7.569280322622887e-06, "loss": 0.7392, "step": 9687 }, { "epoch": 1.742245796997213, "grad_norm": 1.4348986148834229, "learning_rate": 7.568780657761569e-06, "loss": 0.7356, "step": 9688 }, { "epoch": 1.7424256046030746, "grad_norm": 1.4058345556259155, "learning_rate": 7.568280958045268e-06, "loss": 0.7387, "step": 9689 }, { "epoch": 1.7426054122089365, "grad_norm": 1.4674246311187744, "learning_rate": 7.567781223480766e-06, "loss": 0.7092, "step": 9690 }, { "epoch": 1.7427852198147982, "grad_norm": 1.5437595844268799, "learning_rate": 7.56728145407484e-06, "loss": 0.6969, "step": 9691 }, { "epoch": 1.7429650274206598, "grad_norm": 1.1493338346481323, "learning_rate": 7.566781649834274e-06, "loss": 0.9324, "step": 9692 }, { "epoch": 1.7431448350265217, "grad_norm": 1.4416537284851074, "learning_rate": 7.566281810765849e-06, "loss": 0.7392, "step": 9693 }, { "epoch": 1.7433246426323834, "grad_norm": 1.4529070854187012, "learning_rate": 7.565781936876349e-06, "loss": 0.7087, "step": 9694 }, { "epoch": 1.743504450238245, "grad_norm": 1.5324445962905884, "learning_rate": 7.5652820281725515e-06, "loss": 0.7775, "step": 9695 }, { "epoch": 1.743684257844107, "grad_norm": 1.5578705072402954, "learning_rate": 7.564782084661244e-06, "loss": 0.6824, "step": 9696 }, { "epoch": 1.7438640654499684, "grad_norm": 1.4447386264801025, "learning_rate": 7.56428210634921e-06, "loss": 0.6447, "step": 9697 }, { "epoch": 1.7440438730558303, "grad_norm": 1.6584094762802124, "learning_rate": 7.563782093243233e-06, "loss": 0.8167, "step": 9698 }, { "epoch": 1.744223680661692, "grad_norm": 1.5156899690628052, "learning_rate": 7.563282045350094e-06, "loss": 0.7595, "step": 9699 }, { "epoch": 1.7444034882675536, "grad_norm": 1.6269516944885254, "learning_rate": 7.562781962676583e-06, "loss": 0.728, "step": 9700 }, { "epoch": 1.7445832958734155, "grad_norm": 1.443882942199707, "learning_rate": 7.562281845229483e-06, "loss": 0.74, "step": 9701 }, { "epoch": 1.7447631034792772, "grad_norm": 1.4499872922897339, "learning_rate": 7.561781693015582e-06, "loss": 0.7068, "step": 9702 }, { "epoch": 1.7449429110851389, "grad_norm": 1.5730198621749878, "learning_rate": 7.5612815060416626e-06, "loss": 0.7559, "step": 9703 }, { "epoch": 1.7451227186910008, "grad_norm": 1.6719884872436523, "learning_rate": 7.560781284314516e-06, "loss": 0.6948, "step": 9704 }, { "epoch": 1.7453025262968622, "grad_norm": 1.4754184484481812, "learning_rate": 7.560281027840925e-06, "loss": 0.7105, "step": 9705 }, { "epoch": 1.7454823339027241, "grad_norm": 1.444893479347229, "learning_rate": 7.559780736627682e-06, "loss": 0.7599, "step": 9706 }, { "epoch": 1.7456621415085858, "grad_norm": 1.3883171081542969, "learning_rate": 7.559280410681573e-06, "loss": 0.766, "step": 9707 }, { "epoch": 1.7458419491144475, "grad_norm": 1.1663174629211426, "learning_rate": 7.558780050009387e-06, "loss": 0.9271, "step": 9708 }, { "epoch": 1.7460217567203093, "grad_norm": 2.3412232398986816, "learning_rate": 7.5582796546179125e-06, "loss": 0.8631, "step": 9709 }, { "epoch": 1.746201564326171, "grad_norm": 1.2528791427612305, "learning_rate": 7.557779224513939e-06, "loss": 0.9129, "step": 9710 }, { "epoch": 1.7463813719320327, "grad_norm": 1.1310533285140991, "learning_rate": 7.557278759704258e-06, "loss": 0.9423, "step": 9711 }, { "epoch": 1.7465611795378946, "grad_norm": 1.4089168310165405, "learning_rate": 7.556778260195661e-06, "loss": 0.7369, "step": 9712 }, { "epoch": 1.746740987143756, "grad_norm": 1.6945550441741943, "learning_rate": 7.556277725994937e-06, "loss": 0.7752, "step": 9713 }, { "epoch": 1.746920794749618, "grad_norm": 1.5182055234909058, "learning_rate": 7.555777157108879e-06, "loss": 0.7008, "step": 9714 }, { "epoch": 1.7471006023554796, "grad_norm": 1.5909367799758911, "learning_rate": 7.555276553544277e-06, "loss": 0.8042, "step": 9715 }, { "epoch": 1.7472804099613413, "grad_norm": 1.4105902910232544, "learning_rate": 7.554775915307928e-06, "loss": 0.7245, "step": 9716 }, { "epoch": 1.7474602175672032, "grad_norm": 1.6199764013290405, "learning_rate": 7.5542752424066194e-06, "loss": 0.711, "step": 9717 }, { "epoch": 1.7476400251730648, "grad_norm": 1.5318269729614258, "learning_rate": 7.5537745348471496e-06, "loss": 0.734, "step": 9718 }, { "epoch": 1.7478198327789265, "grad_norm": 1.5167254209518433, "learning_rate": 7.553273792636307e-06, "loss": 0.7291, "step": 9719 }, { "epoch": 1.7479996403847884, "grad_norm": 1.359133005142212, "learning_rate": 7.552773015780892e-06, "loss": 0.7605, "step": 9720 }, { "epoch": 1.74817944799065, "grad_norm": 1.3925347328186035, "learning_rate": 7.5522722042876965e-06, "loss": 0.6919, "step": 9721 }, { "epoch": 1.7483592555965117, "grad_norm": 1.4024029970169067, "learning_rate": 7.5517713581635145e-06, "loss": 0.9543, "step": 9722 }, { "epoch": 1.7485390632023736, "grad_norm": 1.5197736024856567, "learning_rate": 7.551270477415145e-06, "loss": 0.7756, "step": 9723 }, { "epoch": 1.748718870808235, "grad_norm": 1.4655321836471558, "learning_rate": 7.550769562049381e-06, "loss": 0.7896, "step": 9724 }, { "epoch": 1.748898678414097, "grad_norm": 1.3104063272476196, "learning_rate": 7.5502686120730215e-06, "loss": 0.9241, "step": 9725 }, { "epoch": 1.7490784860199586, "grad_norm": 1.3699969053268433, "learning_rate": 7.549767627492865e-06, "loss": 0.6922, "step": 9726 }, { "epoch": 1.7492582936258203, "grad_norm": 1.47098970413208, "learning_rate": 7.549266608315706e-06, "loss": 0.7448, "step": 9727 }, { "epoch": 1.7494381012316822, "grad_norm": 1.5548152923583984, "learning_rate": 7.548765554548345e-06, "loss": 0.7584, "step": 9728 }, { "epoch": 1.7496179088375439, "grad_norm": 1.555484414100647, "learning_rate": 7.548264466197579e-06, "loss": 0.7074, "step": 9729 }, { "epoch": 1.7497977164434055, "grad_norm": 0.9463289380073547, "learning_rate": 7.547763343270209e-06, "loss": 0.9042, "step": 9730 }, { "epoch": 1.7499775240492674, "grad_norm": 1.487107515335083, "learning_rate": 7.547262185773032e-06, "loss": 0.6948, "step": 9731 }, { "epoch": 1.7501573316551289, "grad_norm": 1.5172115564346313, "learning_rate": 7.546760993712849e-06, "loss": 0.7423, "step": 9732 }, { "epoch": 1.7503371392609908, "grad_norm": 1.535721778869629, "learning_rate": 7.546259767096462e-06, "loss": 0.7498, "step": 9733 }, { "epoch": 1.7505169468668524, "grad_norm": 1.2321054935455322, "learning_rate": 7.545758505930672e-06, "loss": 0.921, "step": 9734 }, { "epoch": 1.7506967544727141, "grad_norm": 1.4280418157577515, "learning_rate": 7.5452572102222775e-06, "loss": 0.6897, "step": 9735 }, { "epoch": 1.750876562078576, "grad_norm": 1.5035654306411743, "learning_rate": 7.544755879978084e-06, "loss": 0.7037, "step": 9736 }, { "epoch": 1.7510563696844377, "grad_norm": 1.4945391416549683, "learning_rate": 7.5442545152048915e-06, "loss": 0.7763, "step": 9737 }, { "epoch": 1.7512361772902993, "grad_norm": 1.5583027601242065, "learning_rate": 7.543753115909504e-06, "loss": 0.7041, "step": 9738 }, { "epoch": 1.7514159848961612, "grad_norm": 1.6199119091033936, "learning_rate": 7.543251682098724e-06, "loss": 0.6861, "step": 9739 }, { "epoch": 1.7515957925020227, "grad_norm": 1.44894278049469, "learning_rate": 7.542750213779357e-06, "loss": 0.6844, "step": 9740 }, { "epoch": 1.7517756001078846, "grad_norm": 1.517850399017334, "learning_rate": 7.542248710958205e-06, "loss": 0.7172, "step": 9741 }, { "epoch": 1.7519554077137462, "grad_norm": 1.4800819158554077, "learning_rate": 7.541747173642073e-06, "loss": 0.7689, "step": 9742 }, { "epoch": 1.752135215319608, "grad_norm": 1.2121683359146118, "learning_rate": 7.541245601837768e-06, "loss": 0.91, "step": 9743 }, { "epoch": 1.7523150229254698, "grad_norm": 1.4848450422286987, "learning_rate": 7.540743995552094e-06, "loss": 0.6819, "step": 9744 }, { "epoch": 1.7524948305313315, "grad_norm": 1.6344102621078491, "learning_rate": 7.540242354791858e-06, "loss": 0.75, "step": 9745 }, { "epoch": 1.7526746381371932, "grad_norm": 1.6581298112869263, "learning_rate": 7.539740679563866e-06, "loss": 0.7623, "step": 9746 }, { "epoch": 1.752854445743055, "grad_norm": 1.5191705226898193, "learning_rate": 7.5392389698749266e-06, "loss": 0.7787, "step": 9747 }, { "epoch": 1.7530342533489165, "grad_norm": 1.503421425819397, "learning_rate": 7.538737225731845e-06, "loss": 0.7561, "step": 9748 }, { "epoch": 1.7532140609547784, "grad_norm": 1.4547467231750488, "learning_rate": 7.538235447141432e-06, "loss": 0.6865, "step": 9749 }, { "epoch": 1.7533938685606403, "grad_norm": 1.5332658290863037, "learning_rate": 7.537733634110493e-06, "loss": 0.7315, "step": 9750 }, { "epoch": 1.7535736761665017, "grad_norm": 1.5518590211868286, "learning_rate": 7.53723178664584e-06, "loss": 0.7324, "step": 9751 }, { "epoch": 1.7537534837723636, "grad_norm": 1.4907314777374268, "learning_rate": 7.536729904754279e-06, "loss": 0.7468, "step": 9752 }, { "epoch": 1.7539332913782253, "grad_norm": 1.6100667715072632, "learning_rate": 7.536227988442624e-06, "loss": 0.7454, "step": 9753 }, { "epoch": 1.754113098984087, "grad_norm": 1.5118616819381714, "learning_rate": 7.535726037717681e-06, "loss": 0.7363, "step": 9754 }, { "epoch": 1.7542929065899489, "grad_norm": 1.4551371335983276, "learning_rate": 7.535224052586263e-06, "loss": 0.6835, "step": 9755 }, { "epoch": 1.7544727141958105, "grad_norm": 1.4430558681488037, "learning_rate": 7.5347220330551815e-06, "loss": 0.6847, "step": 9756 }, { "epoch": 1.7546525218016722, "grad_norm": 1.4198521375656128, "learning_rate": 7.534219979131247e-06, "loss": 0.6935, "step": 9757 }, { "epoch": 1.754832329407534, "grad_norm": 1.5523138046264648, "learning_rate": 7.5337178908212745e-06, "loss": 0.7747, "step": 9758 }, { "epoch": 1.7550121370133955, "grad_norm": 1.564029574394226, "learning_rate": 7.533215768132075e-06, "loss": 0.6799, "step": 9759 }, { "epoch": 1.7551919446192574, "grad_norm": 1.6543124914169312, "learning_rate": 7.532713611070459e-06, "loss": 0.7359, "step": 9760 }, { "epoch": 1.755371752225119, "grad_norm": 1.4913930892944336, "learning_rate": 7.532211419643245e-06, "loss": 0.7352, "step": 9761 }, { "epoch": 1.7555515598309808, "grad_norm": 1.5415321588516235, "learning_rate": 7.531709193857244e-06, "loss": 0.7482, "step": 9762 }, { "epoch": 1.7557313674368427, "grad_norm": 1.4936507940292358, "learning_rate": 7.53120693371927e-06, "loss": 0.745, "step": 9763 }, { "epoch": 1.7559111750427043, "grad_norm": 1.5300947427749634, "learning_rate": 7.530704639236142e-06, "loss": 0.7578, "step": 9764 }, { "epoch": 1.756090982648566, "grad_norm": 1.3701677322387695, "learning_rate": 7.53020231041467e-06, "loss": 0.6864, "step": 9765 }, { "epoch": 1.756270790254428, "grad_norm": 1.5247920751571655, "learning_rate": 7.529699947261673e-06, "loss": 0.7582, "step": 9766 }, { "epoch": 1.7564505978602893, "grad_norm": 1.5817959308624268, "learning_rate": 7.529197549783967e-06, "loss": 0.7391, "step": 9767 }, { "epoch": 1.7566304054661512, "grad_norm": 1.4787193536758423, "learning_rate": 7.528695117988369e-06, "loss": 0.7076, "step": 9768 }, { "epoch": 1.756810213072013, "grad_norm": 1.1125035285949707, "learning_rate": 7.5281926518816985e-06, "loss": 0.9153, "step": 9769 }, { "epoch": 1.7569900206778746, "grad_norm": 1.6154487133026123, "learning_rate": 7.527690151470768e-06, "loss": 0.7018, "step": 9770 }, { "epoch": 1.7571698282837365, "grad_norm": 1.4910039901733398, "learning_rate": 7.5271876167624005e-06, "loss": 0.7752, "step": 9771 }, { "epoch": 1.7573496358895981, "grad_norm": 1.331161618232727, "learning_rate": 7.526685047763411e-06, "loss": 0.7673, "step": 9772 }, { "epoch": 1.7575294434954598, "grad_norm": 0.9966378211975098, "learning_rate": 7.526182444480623e-06, "loss": 0.9212, "step": 9773 }, { "epoch": 1.7577092511013217, "grad_norm": 1.4306564331054688, "learning_rate": 7.525679806920854e-06, "loss": 0.7721, "step": 9774 }, { "epoch": 1.7578890587071831, "grad_norm": 1.0122617483139038, "learning_rate": 7.525177135090923e-06, "loss": 0.9456, "step": 9775 }, { "epoch": 1.758068866313045, "grad_norm": 1.4807977676391602, "learning_rate": 7.524674428997652e-06, "loss": 0.7407, "step": 9776 }, { "epoch": 1.758248673918907, "grad_norm": 1.7746820449829102, "learning_rate": 7.524171688647861e-06, "loss": 0.706, "step": 9777 }, { "epoch": 1.7584284815247684, "grad_norm": 1.4594520330429077, "learning_rate": 7.523668914048372e-06, "loss": 0.7085, "step": 9778 }, { "epoch": 1.7586082891306303, "grad_norm": 1.617226481437683, "learning_rate": 7.523166105206009e-06, "loss": 0.722, "step": 9779 }, { "epoch": 1.758788096736492, "grad_norm": 1.1968153715133667, "learning_rate": 7.522663262127592e-06, "loss": 0.9447, "step": 9780 }, { "epoch": 1.7589679043423536, "grad_norm": 1.4384772777557373, "learning_rate": 7.522160384819944e-06, "loss": 0.7021, "step": 9781 }, { "epoch": 1.7591477119482155, "grad_norm": 1.3968400955200195, "learning_rate": 7.521657473289889e-06, "loss": 0.7119, "step": 9782 }, { "epoch": 1.7593275195540772, "grad_norm": 1.3956809043884277, "learning_rate": 7.5211545275442525e-06, "loss": 0.722, "step": 9783 }, { "epoch": 1.7595073271599388, "grad_norm": 1.4836453199386597, "learning_rate": 7.520651547589855e-06, "loss": 0.7583, "step": 9784 }, { "epoch": 1.7596871347658007, "grad_norm": 1.4744502305984497, "learning_rate": 7.520148533433524e-06, "loss": 0.6706, "step": 9785 }, { "epoch": 1.7598669423716622, "grad_norm": 1.436359167098999, "learning_rate": 7.519645485082086e-06, "loss": 0.7353, "step": 9786 }, { "epoch": 1.760046749977524, "grad_norm": 1.5438785552978516, "learning_rate": 7.519142402542362e-06, "loss": 0.792, "step": 9787 }, { "epoch": 1.7602265575833858, "grad_norm": 1.5091536045074463, "learning_rate": 7.518639285821182e-06, "loss": 0.6722, "step": 9788 }, { "epoch": 1.7604063651892474, "grad_norm": 1.6232922077178955, "learning_rate": 7.518136134925373e-06, "loss": 0.777, "step": 9789 }, { "epoch": 1.7605861727951093, "grad_norm": 1.1505569219589233, "learning_rate": 7.517632949861759e-06, "loss": 0.943, "step": 9790 }, { "epoch": 1.760765980400971, "grad_norm": 1.5184855461120605, "learning_rate": 7.517129730637172e-06, "loss": 0.7258, "step": 9791 }, { "epoch": 1.7609457880068327, "grad_norm": 1.445935606956482, "learning_rate": 7.516626477258435e-06, "loss": 0.773, "step": 9792 }, { "epoch": 1.7611255956126945, "grad_norm": 1.648044228553772, "learning_rate": 7.51612318973238e-06, "loss": 0.7358, "step": 9793 }, { "epoch": 1.761305403218556, "grad_norm": 1.4336096048355103, "learning_rate": 7.515619868065833e-06, "loss": 0.6894, "step": 9794 }, { "epoch": 1.761485210824418, "grad_norm": 1.4965258836746216, "learning_rate": 7.515116512265628e-06, "loss": 0.7806, "step": 9795 }, { "epoch": 1.7616650184302796, "grad_norm": 1.096378207206726, "learning_rate": 7.5146131223385895e-06, "loss": 0.9568, "step": 9796 }, { "epoch": 1.7618448260361412, "grad_norm": 1.3588805198669434, "learning_rate": 7.514109698291553e-06, "loss": 0.6902, "step": 9797 }, { "epoch": 1.7620246336420031, "grad_norm": 1.3881107568740845, "learning_rate": 7.5136062401313444e-06, "loss": 0.7222, "step": 9798 }, { "epoch": 1.7622044412478648, "grad_norm": 1.4460835456848145, "learning_rate": 7.513102747864798e-06, "loss": 0.7663, "step": 9799 }, { "epoch": 1.7623842488537265, "grad_norm": 1.509711742401123, "learning_rate": 7.512599221498744e-06, "loss": 0.7391, "step": 9800 }, { "epoch": 1.7625640564595884, "grad_norm": 1.1247423887252808, "learning_rate": 7.512095661040018e-06, "loss": 0.9758, "step": 9801 }, { "epoch": 1.7627438640654498, "grad_norm": 1.4610121250152588, "learning_rate": 7.511592066495448e-06, "loss": 0.7225, "step": 9802 }, { "epoch": 1.7629236716713117, "grad_norm": 1.4367042779922485, "learning_rate": 7.511088437871871e-06, "loss": 0.7292, "step": 9803 }, { "epoch": 1.7631034792771736, "grad_norm": 1.4754122495651245, "learning_rate": 7.510584775176118e-06, "loss": 0.6797, "step": 9804 }, { "epoch": 1.763283286883035, "grad_norm": 1.4931252002716064, "learning_rate": 7.510081078415024e-06, "loss": 0.7773, "step": 9805 }, { "epoch": 1.763463094488897, "grad_norm": 1.0772457122802734, "learning_rate": 7.509577347595421e-06, "loss": 0.9387, "step": 9806 }, { "epoch": 1.7636429020947586, "grad_norm": 1.3837950229644775, "learning_rate": 7.509073582724149e-06, "loss": 0.7612, "step": 9807 }, { "epoch": 1.7638227097006203, "grad_norm": 1.4679960012435913, "learning_rate": 7.5085697838080395e-06, "loss": 0.7951, "step": 9808 }, { "epoch": 1.7640025173064822, "grad_norm": 1.47291100025177, "learning_rate": 7.508065950853929e-06, "loss": 0.7499, "step": 9809 }, { "epoch": 1.7641823249123438, "grad_norm": 1.058293342590332, "learning_rate": 7.507562083868656e-06, "loss": 0.9489, "step": 9810 }, { "epoch": 1.7643621325182055, "grad_norm": 1.5081299543380737, "learning_rate": 7.507058182859055e-06, "loss": 0.7272, "step": 9811 }, { "epoch": 1.7645419401240674, "grad_norm": 1.54210364818573, "learning_rate": 7.506554247831964e-06, "loss": 0.739, "step": 9812 }, { "epoch": 1.7647217477299288, "grad_norm": 1.5645220279693604, "learning_rate": 7.5060502787942216e-06, "loss": 0.7412, "step": 9813 }, { "epoch": 1.7649015553357907, "grad_norm": 2.92865252494812, "learning_rate": 7.505546275752664e-06, "loss": 0.7774, "step": 9814 }, { "epoch": 1.7650813629416524, "grad_norm": 1.4847877025604248, "learning_rate": 7.505042238714133e-06, "loss": 0.7616, "step": 9815 }, { "epoch": 1.765261170547514, "grad_norm": 1.5092376470565796, "learning_rate": 7.504538167685465e-06, "loss": 0.7456, "step": 9816 }, { "epoch": 1.765440978153376, "grad_norm": 1.5891034603118896, "learning_rate": 7.504034062673499e-06, "loss": 0.7681, "step": 9817 }, { "epoch": 1.7656207857592376, "grad_norm": 1.5020064115524292, "learning_rate": 7.503529923685078e-06, "loss": 0.7589, "step": 9818 }, { "epoch": 1.7658005933650993, "grad_norm": 1.4425709247589111, "learning_rate": 7.50302575072704e-06, "loss": 0.7101, "step": 9819 }, { "epoch": 1.7659804009709612, "grad_norm": 1.216159462928772, "learning_rate": 7.502521543806226e-06, "loss": 0.9639, "step": 9820 }, { "epoch": 1.7661602085768227, "grad_norm": 1.1820259094238281, "learning_rate": 7.5020173029294795e-06, "loss": 0.9708, "step": 9821 }, { "epoch": 1.7663400161826845, "grad_norm": 1.1730387210845947, "learning_rate": 7.501513028103641e-06, "loss": 0.9244, "step": 9822 }, { "epoch": 1.7665198237885462, "grad_norm": 1.4010149240493774, "learning_rate": 7.5010087193355545e-06, "loss": 0.6923, "step": 9823 }, { "epoch": 1.7666996313944079, "grad_norm": 1.1159080266952515, "learning_rate": 7.500504376632059e-06, "loss": 0.8968, "step": 9824 }, { "epoch": 1.7668794390002698, "grad_norm": 1.502500057220459, "learning_rate": 7.500000000000001e-06, "loss": 0.7696, "step": 9825 }, { "epoch": 1.7670592466061315, "grad_norm": 1.4506134986877441, "learning_rate": 7.499495589446223e-06, "loss": 0.717, "step": 9826 }, { "epoch": 1.7672390542119931, "grad_norm": 1.1825125217437744, "learning_rate": 7.498991144977571e-06, "loss": 0.944, "step": 9827 }, { "epoch": 1.767418861817855, "grad_norm": 1.4514198303222656, "learning_rate": 7.498486666600886e-06, "loss": 0.7026, "step": 9828 }, { "epoch": 1.7675986694237165, "grad_norm": 1.424949288368225, "learning_rate": 7.497982154323017e-06, "loss": 0.6926, "step": 9829 }, { "epoch": 1.7677784770295784, "grad_norm": 1.4580893516540527, "learning_rate": 7.497477608150807e-06, "loss": 0.7722, "step": 9830 }, { "epoch": 1.76795828463544, "grad_norm": 1.1146186590194702, "learning_rate": 7.496973028091102e-06, "loss": 0.9475, "step": 9831 }, { "epoch": 1.7681380922413017, "grad_norm": 1.0999712944030762, "learning_rate": 7.496468414150751e-06, "loss": 0.9343, "step": 9832 }, { "epoch": 1.7683178998471636, "grad_norm": 1.019752025604248, "learning_rate": 7.495963766336599e-06, "loss": 0.9129, "step": 9833 }, { "epoch": 1.7684977074530253, "grad_norm": 1.4946165084838867, "learning_rate": 7.495459084655493e-06, "loss": 0.684, "step": 9834 }, { "epoch": 1.768677515058887, "grad_norm": 1.6033213138580322, "learning_rate": 7.494954369114284e-06, "loss": 0.7915, "step": 9835 }, { "epoch": 1.7688573226647488, "grad_norm": 1.4367432594299316, "learning_rate": 7.494449619719815e-06, "loss": 0.767, "step": 9836 }, { "epoch": 1.7690371302706105, "grad_norm": 1.3637561798095703, "learning_rate": 7.4939448364789395e-06, "loss": 0.7459, "step": 9837 }, { "epoch": 1.7692169378764722, "grad_norm": 1.4979411363601685, "learning_rate": 7.493440019398503e-06, "loss": 0.735, "step": 9838 }, { "epoch": 1.769396745482334, "grad_norm": 1.3543158769607544, "learning_rate": 7.4929351684853604e-06, "loss": 0.6621, "step": 9839 }, { "epoch": 1.7695765530881955, "grad_norm": 1.4842102527618408, "learning_rate": 7.492430283746356e-06, "loss": 0.7284, "step": 9840 }, { "epoch": 1.7697563606940574, "grad_norm": 1.4059169292449951, "learning_rate": 7.491925365188343e-06, "loss": 0.7519, "step": 9841 }, { "epoch": 1.769936168299919, "grad_norm": 1.4296900033950806, "learning_rate": 7.491420412818174e-06, "loss": 0.7384, "step": 9842 }, { "epoch": 1.7701159759057807, "grad_norm": 1.4891396760940552, "learning_rate": 7.490915426642698e-06, "loss": 0.7238, "step": 9843 }, { "epoch": 1.7702957835116426, "grad_norm": 1.3308120965957642, "learning_rate": 7.490410406668767e-06, "loss": 0.9597, "step": 9844 }, { "epoch": 1.7704755911175043, "grad_norm": 1.4970840215682983, "learning_rate": 7.489905352903237e-06, "loss": 0.7189, "step": 9845 }, { "epoch": 1.770655398723366, "grad_norm": 1.5595906972885132, "learning_rate": 7.489400265352957e-06, "loss": 0.7362, "step": 9846 }, { "epoch": 1.7708352063292279, "grad_norm": 1.5322518348693848, "learning_rate": 7.488895144024784e-06, "loss": 0.7481, "step": 9847 }, { "epoch": 1.7710150139350893, "grad_norm": 1.6138336658477783, "learning_rate": 7.488389988925567e-06, "loss": 0.7412, "step": 9848 }, { "epoch": 1.7711948215409512, "grad_norm": 1.483580231666565, "learning_rate": 7.487884800062164e-06, "loss": 0.707, "step": 9849 }, { "epoch": 1.7713746291468129, "grad_norm": 1.4645003080368042, "learning_rate": 7.487379577441429e-06, "loss": 0.7489, "step": 9850 }, { "epoch": 1.7715544367526745, "grad_norm": 1.1340922117233276, "learning_rate": 7.486874321070216e-06, "loss": 0.9563, "step": 9851 }, { "epoch": 1.7717342443585364, "grad_norm": 1.438195824623108, "learning_rate": 7.4863690309553826e-06, "loss": 0.7163, "step": 9852 }, { "epoch": 1.771914051964398, "grad_norm": 1.4809935092926025, "learning_rate": 7.485863707103783e-06, "loss": 0.6978, "step": 9853 }, { "epoch": 1.7720938595702598, "grad_norm": 1.0197296142578125, "learning_rate": 7.4853583495222745e-06, "loss": 0.9031, "step": 9854 }, { "epoch": 1.7722736671761217, "grad_norm": 1.6020479202270508, "learning_rate": 7.484852958217715e-06, "loss": 0.6922, "step": 9855 }, { "epoch": 1.7724534747819831, "grad_norm": 1.5367858409881592, "learning_rate": 7.4843475331969614e-06, "loss": 0.7129, "step": 9856 }, { "epoch": 1.772633282387845, "grad_norm": 1.4839938879013062, "learning_rate": 7.483842074466871e-06, "loss": 0.7945, "step": 9857 }, { "epoch": 1.7728130899937067, "grad_norm": 1.1461336612701416, "learning_rate": 7.483336582034304e-06, "loss": 0.9495, "step": 9858 }, { "epoch": 1.7729928975995684, "grad_norm": 1.5674221515655518, "learning_rate": 7.482831055906118e-06, "loss": 0.7745, "step": 9859 }, { "epoch": 1.7731727052054302, "grad_norm": 1.5081968307495117, "learning_rate": 7.482325496089171e-06, "loss": 0.7617, "step": 9860 }, { "epoch": 1.773352512811292, "grad_norm": 1.1753240823745728, "learning_rate": 7.481819902590326e-06, "loss": 0.9225, "step": 9861 }, { "epoch": 1.7735323204171536, "grad_norm": 1.037613034248352, "learning_rate": 7.48131427541644e-06, "loss": 0.9255, "step": 9862 }, { "epoch": 1.7737121280230155, "grad_norm": 1.4428726434707642, "learning_rate": 7.4808086145743744e-06, "loss": 0.7313, "step": 9863 }, { "epoch": 1.7738919356288771, "grad_norm": 1.3862603902816772, "learning_rate": 7.480302920070992e-06, "loss": 0.7352, "step": 9864 }, { "epoch": 1.7740717432347388, "grad_norm": 1.4487954378128052, "learning_rate": 7.479797191913154e-06, "loss": 0.7084, "step": 9865 }, { "epoch": 1.7742515508406007, "grad_norm": 1.4590102434158325, "learning_rate": 7.47929143010772e-06, "loss": 0.701, "step": 9866 }, { "epoch": 1.7744313584464622, "grad_norm": 1.6212661266326904, "learning_rate": 7.478785634661556e-06, "loss": 0.7287, "step": 9867 }, { "epoch": 1.774611166052324, "grad_norm": 1.3941030502319336, "learning_rate": 7.478279805581524e-06, "loss": 0.734, "step": 9868 }, { "epoch": 1.7747909736581857, "grad_norm": 1.4649230241775513, "learning_rate": 7.477773942874486e-06, "loss": 0.7574, "step": 9869 }, { "epoch": 1.7749707812640474, "grad_norm": 1.5722692012786865, "learning_rate": 7.477268046547307e-06, "loss": 0.7598, "step": 9870 }, { "epoch": 1.7751505888699093, "grad_norm": 1.578902244567871, "learning_rate": 7.47676211660685e-06, "loss": 0.7338, "step": 9871 }, { "epoch": 1.775330396475771, "grad_norm": 1.4641929864883423, "learning_rate": 7.476256153059984e-06, "loss": 0.7324, "step": 9872 }, { "epoch": 1.7755102040816326, "grad_norm": 1.6180452108383179, "learning_rate": 7.4757501559135684e-06, "loss": 0.6751, "step": 9873 }, { "epoch": 1.7756900116874945, "grad_norm": 1.434963583946228, "learning_rate": 7.4752441251744734e-06, "loss": 0.6816, "step": 9874 }, { "epoch": 1.775869819293356, "grad_norm": 1.8265998363494873, "learning_rate": 7.474738060849562e-06, "loss": 0.7606, "step": 9875 }, { "epoch": 1.7760496268992179, "grad_norm": 2.0550272464752197, "learning_rate": 7.474231962945703e-06, "loss": 0.7524, "step": 9876 }, { "epoch": 1.7762294345050795, "grad_norm": 1.1037063598632812, "learning_rate": 7.473725831469761e-06, "loss": 0.9444, "step": 9877 }, { "epoch": 1.7764092421109412, "grad_norm": 1.452285647392273, "learning_rate": 7.473219666428609e-06, "loss": 0.6774, "step": 9878 }, { "epoch": 1.776589049716803, "grad_norm": 1.4668182134628296, "learning_rate": 7.472713467829108e-06, "loss": 0.7299, "step": 9879 }, { "epoch": 1.7767688573226648, "grad_norm": 1.4773629903793335, "learning_rate": 7.4722072356781315e-06, "loss": 0.7958, "step": 9880 }, { "epoch": 1.7769486649285264, "grad_norm": 1.4807716608047485, "learning_rate": 7.471700969982547e-06, "loss": 0.7541, "step": 9881 }, { "epoch": 1.7771284725343883, "grad_norm": 1.451330542564392, "learning_rate": 7.471194670749222e-06, "loss": 0.7089, "step": 9882 }, { "epoch": 1.7773082801402498, "grad_norm": 1.5266631841659546, "learning_rate": 7.470688337985029e-06, "loss": 0.6742, "step": 9883 }, { "epoch": 1.7774880877461117, "grad_norm": 1.4904170036315918, "learning_rate": 7.470181971696837e-06, "loss": 0.7671, "step": 9884 }, { "epoch": 1.7776678953519733, "grad_norm": 1.7228883504867554, "learning_rate": 7.469675571891517e-06, "loss": 0.8044, "step": 9885 }, { "epoch": 1.777847702957835, "grad_norm": 1.5008641481399536, "learning_rate": 7.469169138575939e-06, "loss": 0.7541, "step": 9886 }, { "epoch": 1.778027510563697, "grad_norm": 1.562893271446228, "learning_rate": 7.468662671756976e-06, "loss": 0.7458, "step": 9887 }, { "epoch": 1.7782073181695586, "grad_norm": 1.505632758140564, "learning_rate": 7.468156171441501e-06, "loss": 0.6998, "step": 9888 }, { "epoch": 1.7783871257754202, "grad_norm": 1.5691335201263428, "learning_rate": 7.467649637636385e-06, "loss": 0.7571, "step": 9889 }, { "epoch": 1.7785669333812821, "grad_norm": 1.573852777481079, "learning_rate": 7.4671430703485005e-06, "loss": 0.7109, "step": 9890 }, { "epoch": 1.7787467409871438, "grad_norm": 1.60556960105896, "learning_rate": 7.466636469584723e-06, "loss": 0.7944, "step": 9891 }, { "epoch": 1.7789265485930055, "grad_norm": 1.549225926399231, "learning_rate": 7.466129835351924e-06, "loss": 0.6953, "step": 9892 }, { "epoch": 1.7791063561988674, "grad_norm": 1.4722098112106323, "learning_rate": 7.465623167656979e-06, "loss": 0.6975, "step": 9893 }, { "epoch": 1.7792861638047288, "grad_norm": 1.4894362688064575, "learning_rate": 7.465116466506763e-06, "loss": 0.7354, "step": 9894 }, { "epoch": 1.7794659714105907, "grad_norm": 1.551640510559082, "learning_rate": 7.464609731908151e-06, "loss": 0.7544, "step": 9895 }, { "epoch": 1.7796457790164524, "grad_norm": 1.4747017621994019, "learning_rate": 7.464102963868018e-06, "loss": 0.7803, "step": 9896 }, { "epoch": 1.779825586622314, "grad_norm": 1.5677450895309448, "learning_rate": 7.463596162393243e-06, "loss": 0.7073, "step": 9897 }, { "epoch": 1.780005394228176, "grad_norm": 1.4354534149169922, "learning_rate": 7.4630893274907e-06, "loss": 0.7373, "step": 9898 }, { "epoch": 1.7801852018340376, "grad_norm": 1.5390281677246094, "learning_rate": 7.4625824591672665e-06, "loss": 0.7436, "step": 9899 }, { "epoch": 1.7803650094398993, "grad_norm": 1.335435390472412, "learning_rate": 7.462075557429821e-06, "loss": 0.6769, "step": 9900 }, { "epoch": 1.7805448170457612, "grad_norm": 1.460780382156372, "learning_rate": 7.461568622285239e-06, "loss": 0.721, "step": 9901 }, { "epoch": 1.7807246246516226, "grad_norm": 1.4245991706848145, "learning_rate": 7.461061653740403e-06, "loss": 0.6898, "step": 9902 }, { "epoch": 1.7809044322574845, "grad_norm": 1.4201825857162476, "learning_rate": 7.460554651802188e-06, "loss": 0.7557, "step": 9903 }, { "epoch": 1.7810842398633462, "grad_norm": 1.5300794839859009, "learning_rate": 7.460047616477476e-06, "loss": 0.7527, "step": 9904 }, { "epoch": 1.7812640474692079, "grad_norm": 1.6814608573913574, "learning_rate": 7.459540547773144e-06, "loss": 0.7293, "step": 9905 }, { "epoch": 1.7814438550750697, "grad_norm": 1.1092419624328613, "learning_rate": 7.459033445696076e-06, "loss": 0.9511, "step": 9906 }, { "epoch": 1.7816236626809314, "grad_norm": 1.3853895664215088, "learning_rate": 7.458526310253149e-06, "loss": 0.9276, "step": 9907 }, { "epoch": 1.781803470286793, "grad_norm": 1.4606473445892334, "learning_rate": 7.458019141451247e-06, "loss": 0.7553, "step": 9908 }, { "epoch": 1.781983277892655, "grad_norm": 1.5059643983840942, "learning_rate": 7.45751193929725e-06, "loss": 0.7815, "step": 9909 }, { "epoch": 1.7821630854985164, "grad_norm": 1.4277108907699585, "learning_rate": 7.457004703798041e-06, "loss": 0.7592, "step": 9910 }, { "epoch": 1.7823428931043783, "grad_norm": 1.47733736038208, "learning_rate": 7.456497434960501e-06, "loss": 0.753, "step": 9911 }, { "epoch": 1.78252270071024, "grad_norm": 1.5456030368804932, "learning_rate": 7.455990132791516e-06, "loss": 0.7585, "step": 9912 }, { "epoch": 1.7827025083161017, "grad_norm": 1.503458857536316, "learning_rate": 7.455482797297966e-06, "loss": 0.7418, "step": 9913 }, { "epoch": 1.7828823159219636, "grad_norm": 1.529945731163025, "learning_rate": 7.454975428486737e-06, "loss": 0.8136, "step": 9914 }, { "epoch": 1.7830621235278252, "grad_norm": 1.4969090223312378, "learning_rate": 7.454468026364713e-06, "loss": 0.7835, "step": 9915 }, { "epoch": 1.783241931133687, "grad_norm": 1.4950532913208008, "learning_rate": 7.453960590938778e-06, "loss": 0.7444, "step": 9916 }, { "epoch": 1.7834217387395488, "grad_norm": 1.47063148021698, "learning_rate": 7.453453122215818e-06, "loss": 0.6793, "step": 9917 }, { "epoch": 1.7836015463454105, "grad_norm": 1.4515591859817505, "learning_rate": 7.452945620202717e-06, "loss": 0.7902, "step": 9918 }, { "epoch": 1.7837813539512721, "grad_norm": 1.473689079284668, "learning_rate": 7.452438084906364e-06, "loss": 0.6945, "step": 9919 }, { "epoch": 1.783961161557134, "grad_norm": 1.4754985570907593, "learning_rate": 7.4519305163336445e-06, "loss": 0.7191, "step": 9920 }, { "epoch": 1.7841409691629955, "grad_norm": 1.4615296125411987, "learning_rate": 7.451422914491444e-06, "loss": 0.7697, "step": 9921 }, { "epoch": 1.7843207767688574, "grad_norm": 1.5532022714614868, "learning_rate": 7.450915279386652e-06, "loss": 0.7477, "step": 9922 }, { "epoch": 1.784500584374719, "grad_norm": 1.551584005355835, "learning_rate": 7.450407611026155e-06, "loss": 0.7393, "step": 9923 }, { "epoch": 1.7846803919805807, "grad_norm": 1.2174009084701538, "learning_rate": 7.449899909416842e-06, "loss": 0.9014, "step": 9924 }, { "epoch": 1.7848601995864426, "grad_norm": 1.0997066497802734, "learning_rate": 7.449392174565602e-06, "loss": 0.9291, "step": 9925 }, { "epoch": 1.7850400071923043, "grad_norm": 1.6234487295150757, "learning_rate": 7.4488844064793244e-06, "loss": 0.6711, "step": 9926 }, { "epoch": 1.785219814798166, "grad_norm": 1.5367801189422607, "learning_rate": 7.448376605164899e-06, "loss": 0.7325, "step": 9927 }, { "epoch": 1.7853996224040278, "grad_norm": 1.6956526041030884, "learning_rate": 7.447868770629215e-06, "loss": 0.7929, "step": 9928 }, { "epoch": 1.7855794300098893, "grad_norm": 1.53714120388031, "learning_rate": 7.447360902879164e-06, "loss": 0.6991, "step": 9929 }, { "epoch": 1.7857592376157512, "grad_norm": 1.4825613498687744, "learning_rate": 7.446853001921635e-06, "loss": 0.7265, "step": 9930 }, { "epoch": 1.7859390452216128, "grad_norm": 1.2154136896133423, "learning_rate": 7.4463450677635226e-06, "loss": 0.8755, "step": 9931 }, { "epoch": 1.7861188528274745, "grad_norm": 1.1708862781524658, "learning_rate": 7.445837100411719e-06, "loss": 0.9057, "step": 9932 }, { "epoch": 1.7862986604333364, "grad_norm": 1.6785476207733154, "learning_rate": 7.445329099873114e-06, "loss": 0.6754, "step": 9933 }, { "epoch": 1.786478468039198, "grad_norm": 1.5560989379882812, "learning_rate": 7.444821066154602e-06, "loss": 0.733, "step": 9934 }, { "epoch": 1.7866582756450597, "grad_norm": 1.502388596534729, "learning_rate": 7.444312999263077e-06, "loss": 0.6885, "step": 9935 }, { "epoch": 1.7868380832509216, "grad_norm": 1.4564098119735718, "learning_rate": 7.443804899205432e-06, "loss": 0.7068, "step": 9936 }, { "epoch": 1.787017890856783, "grad_norm": 1.452533483505249, "learning_rate": 7.443296765988558e-06, "loss": 0.7534, "step": 9937 }, { "epoch": 1.787197698462645, "grad_norm": 1.5187536478042603, "learning_rate": 7.442788599619356e-06, "loss": 0.7045, "step": 9938 }, { "epoch": 1.7873775060685066, "grad_norm": 1.4492179155349731, "learning_rate": 7.442280400104715e-06, "loss": 0.7094, "step": 9939 }, { "epoch": 1.7875573136743683, "grad_norm": 1.6511540412902832, "learning_rate": 7.441772167451536e-06, "loss": 0.7398, "step": 9940 }, { "epoch": 1.7877371212802302, "grad_norm": 1.551408290863037, "learning_rate": 7.441263901666711e-06, "loss": 0.772, "step": 9941 }, { "epoch": 1.7879169288860919, "grad_norm": 1.4912302494049072, "learning_rate": 7.44075560275714e-06, "loss": 0.7882, "step": 9942 }, { "epoch": 1.7880967364919536, "grad_norm": 1.4931787252426147, "learning_rate": 7.440247270729717e-06, "loss": 0.7841, "step": 9943 }, { "epoch": 1.7882765440978154, "grad_norm": 1.3246897459030151, "learning_rate": 7.439738905591342e-06, "loss": 0.9262, "step": 9944 }, { "epoch": 1.7884563517036771, "grad_norm": 1.5355052947998047, "learning_rate": 7.4392305073489095e-06, "loss": 0.7021, "step": 9945 }, { "epoch": 1.7886361593095388, "grad_norm": 1.4325307607650757, "learning_rate": 7.43872207600932e-06, "loss": 0.767, "step": 9946 }, { "epoch": 1.7888159669154007, "grad_norm": 1.5415663719177246, "learning_rate": 7.438213611579472e-06, "loss": 0.7253, "step": 9947 }, { "epoch": 1.7889957745212621, "grad_norm": 1.4917364120483398, "learning_rate": 7.437705114066265e-06, "loss": 0.7422, "step": 9948 }, { "epoch": 1.789175582127124, "grad_norm": 1.5472307205200195, "learning_rate": 7.437196583476597e-06, "loss": 0.7043, "step": 9949 }, { "epoch": 1.7893553897329857, "grad_norm": 1.2547369003295898, "learning_rate": 7.43668801981737e-06, "loss": 0.9327, "step": 9950 }, { "epoch": 1.7895351973388474, "grad_norm": 1.5304908752441406, "learning_rate": 7.436179423095484e-06, "loss": 0.7657, "step": 9951 }, { "epoch": 1.7897150049447093, "grad_norm": 1.5325130224227905, "learning_rate": 7.43567079331784e-06, "loss": 0.7594, "step": 9952 }, { "epoch": 1.789894812550571, "grad_norm": 1.4610236883163452, "learning_rate": 7.435162130491338e-06, "loss": 0.6755, "step": 9953 }, { "epoch": 1.7900746201564326, "grad_norm": 1.4251643419265747, "learning_rate": 7.434653434622883e-06, "loss": 0.737, "step": 9954 }, { "epoch": 1.7902544277622945, "grad_norm": 1.4411803483963013, "learning_rate": 7.434144705719374e-06, "loss": 0.7692, "step": 9955 }, { "epoch": 1.790434235368156, "grad_norm": 1.2027087211608887, "learning_rate": 7.433635943787716e-06, "loss": 0.9268, "step": 9956 }, { "epoch": 1.7906140429740178, "grad_norm": 1.5390928983688354, "learning_rate": 7.433127148834811e-06, "loss": 0.7156, "step": 9957 }, { "epoch": 1.7907938505798795, "grad_norm": 1.536441445350647, "learning_rate": 7.432618320867564e-06, "loss": 0.7306, "step": 9958 }, { "epoch": 1.7909736581857412, "grad_norm": 1.4035996198654175, "learning_rate": 7.432109459892878e-06, "loss": 0.713, "step": 9959 }, { "epoch": 1.791153465791603, "grad_norm": 1.0937329530715942, "learning_rate": 7.431600565917658e-06, "loss": 0.9293, "step": 9960 }, { "epoch": 1.7913332733974647, "grad_norm": 1.4709153175354004, "learning_rate": 7.4310916389488084e-06, "loss": 0.7168, "step": 9961 }, { "epoch": 1.7915130810033264, "grad_norm": 1.6159083843231201, "learning_rate": 7.430582678993236e-06, "loss": 0.7358, "step": 9962 }, { "epoch": 1.7916928886091883, "grad_norm": 1.3603936433792114, "learning_rate": 7.430073686057844e-06, "loss": 0.635, "step": 9963 }, { "epoch": 1.7918726962150497, "grad_norm": 1.1228314638137817, "learning_rate": 7.429564660149543e-06, "loss": 0.9409, "step": 9964 }, { "epoch": 1.7920525038209116, "grad_norm": 1.4751509428024292, "learning_rate": 7.429055601275236e-06, "loss": 0.6969, "step": 9965 }, { "epoch": 1.7922323114267733, "grad_norm": 1.6256211996078491, "learning_rate": 7.428546509441833e-06, "loss": 0.7552, "step": 9966 }, { "epoch": 1.792412119032635, "grad_norm": 1.4208229780197144, "learning_rate": 7.4280373846562396e-06, "loss": 0.7503, "step": 9967 }, { "epoch": 1.7925919266384969, "grad_norm": 1.437863826751709, "learning_rate": 7.427528226925364e-06, "loss": 0.6878, "step": 9968 }, { "epoch": 1.7927717342443585, "grad_norm": 1.5599925518035889, "learning_rate": 7.427019036256118e-06, "loss": 0.7412, "step": 9969 }, { "epoch": 1.7929515418502202, "grad_norm": 1.0656081438064575, "learning_rate": 7.4265098126554065e-06, "loss": 0.9723, "step": 9970 }, { "epoch": 1.793131349456082, "grad_norm": 1.519660472869873, "learning_rate": 7.42600055613014e-06, "loss": 0.7483, "step": 9971 }, { "epoch": 1.7933111570619438, "grad_norm": 1.6336216926574707, "learning_rate": 7.425491266687231e-06, "loss": 0.7595, "step": 9972 }, { "epoch": 1.7934909646678054, "grad_norm": 1.4918954372406006, "learning_rate": 7.424981944333587e-06, "loss": 0.7175, "step": 9973 }, { "epoch": 1.7936707722736673, "grad_norm": 1.5486587285995483, "learning_rate": 7.4244725890761205e-06, "loss": 0.733, "step": 9974 }, { "epoch": 1.7938505798795288, "grad_norm": 1.4850459098815918, "learning_rate": 7.423963200921741e-06, "loss": 0.7622, "step": 9975 }, { "epoch": 1.7940303874853907, "grad_norm": 1.2361986637115479, "learning_rate": 7.423453779877363e-06, "loss": 0.9387, "step": 9976 }, { "epoch": 1.7942101950912523, "grad_norm": 1.099563479423523, "learning_rate": 7.422944325949897e-06, "loss": 0.9552, "step": 9977 }, { "epoch": 1.794390002697114, "grad_norm": 1.514748454093933, "learning_rate": 7.422434839146256e-06, "loss": 0.7578, "step": 9978 }, { "epoch": 1.794569810302976, "grad_norm": 1.6884454488754272, "learning_rate": 7.421925319473351e-06, "loss": 0.7974, "step": 9979 }, { "epoch": 1.7947496179088376, "grad_norm": 1.1367710828781128, "learning_rate": 7.421415766938098e-06, "loss": 0.906, "step": 9980 }, { "epoch": 1.7949294255146993, "grad_norm": 1.439007043838501, "learning_rate": 7.420906181547412e-06, "loss": 0.6987, "step": 9981 }, { "epoch": 1.7951092331205611, "grad_norm": 2.173187017440796, "learning_rate": 7.4203965633082044e-06, "loss": 0.7661, "step": 9982 }, { "epoch": 1.7952890407264226, "grad_norm": 1.4051182270050049, "learning_rate": 7.41988691222739e-06, "loss": 0.696, "step": 9983 }, { "epoch": 1.7954688483322845, "grad_norm": 1.455989956855774, "learning_rate": 7.419377228311886e-06, "loss": 0.7274, "step": 9984 }, { "epoch": 1.7956486559381462, "grad_norm": 1.4632683992385864, "learning_rate": 7.418867511568608e-06, "loss": 0.7419, "step": 9985 }, { "epoch": 1.7958284635440078, "grad_norm": 1.5495424270629883, "learning_rate": 7.418357762004473e-06, "loss": 0.7066, "step": 9986 }, { "epoch": 1.7960082711498697, "grad_norm": 1.057944655418396, "learning_rate": 7.4178479796263944e-06, "loss": 0.9642, "step": 9987 }, { "epoch": 1.7961880787557314, "grad_norm": 1.4521217346191406, "learning_rate": 7.417338164441293e-06, "loss": 0.7874, "step": 9988 }, { "epoch": 1.796367886361593, "grad_norm": 1.0868220329284668, "learning_rate": 7.416828316456084e-06, "loss": 0.9531, "step": 9989 }, { "epoch": 1.796547693967455, "grad_norm": 1.4571176767349243, "learning_rate": 7.416318435677685e-06, "loss": 0.7311, "step": 9990 }, { "epoch": 1.7967275015733164, "grad_norm": 1.4462146759033203, "learning_rate": 7.4158085221130175e-06, "loss": 0.7214, "step": 9991 }, { "epoch": 1.7969073091791783, "grad_norm": 1.5422143936157227, "learning_rate": 7.415298575768995e-06, "loss": 0.715, "step": 9992 }, { "epoch": 1.79708711678504, "grad_norm": 1.4668174982070923, "learning_rate": 7.414788596652543e-06, "loss": 0.7151, "step": 9993 }, { "epoch": 1.7972669243909016, "grad_norm": 1.3616820573806763, "learning_rate": 7.414278584770577e-06, "loss": 0.7213, "step": 9994 }, { "epoch": 1.7974467319967635, "grad_norm": 1.0791616439819336, "learning_rate": 7.413768540130018e-06, "loss": 0.9618, "step": 9995 }, { "epoch": 1.7976265396026252, "grad_norm": 1.4384407997131348, "learning_rate": 7.413258462737787e-06, "loss": 0.7522, "step": 9996 }, { "epoch": 1.7978063472084869, "grad_norm": 1.6918296813964844, "learning_rate": 7.412748352600807e-06, "loss": 0.7195, "step": 9997 }, { "epoch": 1.7979861548143488, "grad_norm": 1.53178071975708, "learning_rate": 7.412238209725996e-06, "loss": 0.6478, "step": 9998 }, { "epoch": 1.7981659624202104, "grad_norm": 1.1788604259490967, "learning_rate": 7.411728034120279e-06, "loss": 0.9137, "step": 9999 }, { "epoch": 1.798345770026072, "grad_norm": 1.5867319107055664, "learning_rate": 7.411217825790576e-06, "loss": 0.6907, "step": 10000 }, { "epoch": 1.798345770026072, "eval_loss": 0.7888384461402893, "eval_runtime": 148.5639, "eval_samples_per_second": 96.807, "eval_steps_per_second": 1.515, "step": 10000 }, { "epoch": 1.798525577631934, "grad_norm": 1.1473429203033447, "learning_rate": 7.410707584743811e-06, "loss": 0.9295, "step": 10001 }, { "epoch": 1.7987053852377954, "grad_norm": 1.423117756843567, "learning_rate": 7.410197310986908e-06, "loss": 0.738, "step": 10002 }, { "epoch": 1.7988851928436573, "grad_norm": 1.6053658723831177, "learning_rate": 7.4096870045267895e-06, "loss": 0.7195, "step": 10003 }, { "epoch": 1.799065000449519, "grad_norm": 1.4651738405227661, "learning_rate": 7.409176665370381e-06, "loss": 0.7632, "step": 10004 }, { "epoch": 1.7992448080553807, "grad_norm": 1.4785490036010742, "learning_rate": 7.408666293524606e-06, "loss": 0.6981, "step": 10005 }, { "epoch": 1.7994246156612426, "grad_norm": 1.5415799617767334, "learning_rate": 7.408155888996389e-06, "loss": 0.7195, "step": 10006 }, { "epoch": 1.7996044232671042, "grad_norm": 1.4976682662963867, "learning_rate": 7.407645451792657e-06, "loss": 0.7143, "step": 10007 }, { "epoch": 1.799784230872966, "grad_norm": 1.6509935855865479, "learning_rate": 7.407134981920334e-06, "loss": 0.7237, "step": 10008 }, { "epoch": 1.7999640384788278, "grad_norm": 1.5418081283569336, "learning_rate": 7.4066244793863494e-06, "loss": 0.7628, "step": 10009 }, { "epoch": 1.8001438460846892, "grad_norm": 1.494583249092102, "learning_rate": 7.406113944197628e-06, "loss": 0.7834, "step": 10010 }, { "epoch": 1.8003236536905511, "grad_norm": 1.5007047653198242, "learning_rate": 7.405603376361098e-06, "loss": 0.7347, "step": 10011 }, { "epoch": 1.8005034612964128, "grad_norm": 1.6393799781799316, "learning_rate": 7.405092775883687e-06, "loss": 0.7358, "step": 10012 }, { "epoch": 1.8006832689022745, "grad_norm": 1.251140832901001, "learning_rate": 7.404582142772322e-06, "loss": 0.9675, "step": 10013 }, { "epoch": 1.8008630765081364, "grad_norm": 1.404118537902832, "learning_rate": 7.404071477033932e-06, "loss": 0.741, "step": 10014 }, { "epoch": 1.801042884113998, "grad_norm": 1.0617538690567017, "learning_rate": 7.403560778675448e-06, "loss": 0.8958, "step": 10015 }, { "epoch": 1.8012226917198597, "grad_norm": 1.113785743713379, "learning_rate": 7.403050047703797e-06, "loss": 0.8816, "step": 10016 }, { "epoch": 1.8014024993257216, "grad_norm": 1.582520604133606, "learning_rate": 7.402539284125909e-06, "loss": 0.7805, "step": 10017 }, { "epoch": 1.801582306931583, "grad_norm": 1.5593969821929932, "learning_rate": 7.402028487948716e-06, "loss": 0.7368, "step": 10018 }, { "epoch": 1.801762114537445, "grad_norm": 1.5713757276535034, "learning_rate": 7.401517659179149e-06, "loss": 0.7285, "step": 10019 }, { "epoch": 1.8019419221433066, "grad_norm": 1.5552929639816284, "learning_rate": 7.4010067978241384e-06, "loss": 0.7438, "step": 10020 }, { "epoch": 1.8021217297491683, "grad_norm": 1.1602691411972046, "learning_rate": 7.400495903890617e-06, "loss": 0.9454, "step": 10021 }, { "epoch": 1.8023015373550302, "grad_norm": 1.5802370309829712, "learning_rate": 7.399984977385514e-06, "loss": 0.7458, "step": 10022 }, { "epoch": 1.8024813449608919, "grad_norm": 1.4853929281234741, "learning_rate": 7.399474018315765e-06, "loss": 0.8004, "step": 10023 }, { "epoch": 1.8026611525667535, "grad_norm": 1.6100187301635742, "learning_rate": 7.398963026688302e-06, "loss": 0.7758, "step": 10024 }, { "epoch": 1.8028409601726154, "grad_norm": 1.3373874425888062, "learning_rate": 7.398452002510058e-06, "loss": 0.6471, "step": 10025 }, { "epoch": 1.803020767778477, "grad_norm": 1.4844434261322021, "learning_rate": 7.397940945787968e-06, "loss": 0.7446, "step": 10026 }, { "epoch": 1.8032005753843388, "grad_norm": 1.5193185806274414, "learning_rate": 7.397429856528965e-06, "loss": 0.7833, "step": 10027 }, { "epoch": 1.8033803829902006, "grad_norm": 1.3963830471038818, "learning_rate": 7.396918734739985e-06, "loss": 0.7313, "step": 10028 }, { "epoch": 1.803560190596062, "grad_norm": 1.125872254371643, "learning_rate": 7.3964075804279625e-06, "loss": 0.9161, "step": 10029 }, { "epoch": 1.803739998201924, "grad_norm": 1.7848246097564697, "learning_rate": 7.395896393599834e-06, "loss": 0.7419, "step": 10030 }, { "epoch": 1.8039198058077857, "grad_norm": 1.4567688703536987, "learning_rate": 7.395385174262536e-06, "loss": 0.8156, "step": 10031 }, { "epoch": 1.8040996134136473, "grad_norm": 1.4552628993988037, "learning_rate": 7.3948739224230025e-06, "loss": 0.7077, "step": 10032 }, { "epoch": 1.8042794210195092, "grad_norm": 1.597786545753479, "learning_rate": 7.394362638088174e-06, "loss": 0.7385, "step": 10033 }, { "epoch": 1.804459228625371, "grad_norm": 1.180655598640442, "learning_rate": 7.3938513212649845e-06, "loss": 0.9343, "step": 10034 }, { "epoch": 1.8046390362312326, "grad_norm": 1.4686325788497925, "learning_rate": 7.393339971960376e-06, "loss": 0.7461, "step": 10035 }, { "epoch": 1.8048188438370945, "grad_norm": 1.4303135871887207, "learning_rate": 7.392828590181282e-06, "loss": 0.7192, "step": 10036 }, { "epoch": 1.804998651442956, "grad_norm": 1.4525790214538574, "learning_rate": 7.3923171759346455e-06, "loss": 0.704, "step": 10037 }, { "epoch": 1.8051784590488178, "grad_norm": 1.4948967695236206, "learning_rate": 7.391805729227403e-06, "loss": 0.7385, "step": 10038 }, { "epoch": 1.8053582666546795, "grad_norm": 1.4283970594406128, "learning_rate": 7.391294250066494e-06, "loss": 0.806, "step": 10039 }, { "epoch": 1.8055380742605411, "grad_norm": 1.0752403736114502, "learning_rate": 7.390782738458862e-06, "loss": 0.9371, "step": 10040 }, { "epoch": 1.805717881866403, "grad_norm": 1.4847220182418823, "learning_rate": 7.390271194411445e-06, "loss": 0.6697, "step": 10041 }, { "epoch": 1.8058976894722647, "grad_norm": 1.4605711698532104, "learning_rate": 7.389759617931183e-06, "loss": 0.6605, "step": 10042 }, { "epoch": 1.8060774970781264, "grad_norm": 1.4758702516555786, "learning_rate": 7.38924800902502e-06, "loss": 0.7113, "step": 10043 }, { "epoch": 1.8062573046839883, "grad_norm": 1.4255610704421997, "learning_rate": 7.388736367699894e-06, "loss": 0.7664, "step": 10044 }, { "epoch": 1.8064371122898497, "grad_norm": 1.5832359790802002, "learning_rate": 7.388224693962753e-06, "loss": 0.8133, "step": 10045 }, { "epoch": 1.8066169198957116, "grad_norm": 1.497421145439148, "learning_rate": 7.387712987820535e-06, "loss": 0.6821, "step": 10046 }, { "epoch": 1.8067967275015733, "grad_norm": 1.4112645387649536, "learning_rate": 7.387201249280186e-06, "loss": 0.7029, "step": 10047 }, { "epoch": 1.806976535107435, "grad_norm": 1.8705792427062988, "learning_rate": 7.3866894783486465e-06, "loss": 0.7183, "step": 10048 }, { "epoch": 1.8071563427132968, "grad_norm": 1.5417609214782715, "learning_rate": 7.3861776750328625e-06, "loss": 0.7735, "step": 10049 }, { "epoch": 1.8073361503191585, "grad_norm": 1.5077041387557983, "learning_rate": 7.385665839339779e-06, "loss": 0.7101, "step": 10050 }, { "epoch": 1.8075159579250202, "grad_norm": 1.5516672134399414, "learning_rate": 7.385153971276342e-06, "loss": 0.7301, "step": 10051 }, { "epoch": 1.807695765530882, "grad_norm": 1.4721423387527466, "learning_rate": 7.384642070849493e-06, "loss": 0.7424, "step": 10052 }, { "epoch": 1.8078755731367435, "grad_norm": 1.1242620944976807, "learning_rate": 7.384130138066181e-06, "loss": 0.9433, "step": 10053 }, { "epoch": 1.8080553807426054, "grad_norm": 1.588387370109558, "learning_rate": 7.383618172933351e-06, "loss": 0.7199, "step": 10054 }, { "epoch": 1.8082351883484673, "grad_norm": 1.6323058605194092, "learning_rate": 7.3831061754579515e-06, "loss": 0.7601, "step": 10055 }, { "epoch": 1.8084149959543288, "grad_norm": 1.0563417673110962, "learning_rate": 7.382594145646926e-06, "loss": 0.9165, "step": 10056 }, { "epoch": 1.8085948035601906, "grad_norm": 1.479970932006836, "learning_rate": 7.382082083507226e-06, "loss": 0.7276, "step": 10057 }, { "epoch": 1.8087746111660523, "grad_norm": 1.5717123746871948, "learning_rate": 7.3815699890457974e-06, "loss": 0.7597, "step": 10058 }, { "epoch": 1.808954418771914, "grad_norm": 1.4221614599227905, "learning_rate": 7.381057862269588e-06, "loss": 0.7471, "step": 10059 }, { "epoch": 1.8091342263777759, "grad_norm": 1.5970691442489624, "learning_rate": 7.380545703185549e-06, "loss": 0.7424, "step": 10060 }, { "epoch": 1.8093140339836375, "grad_norm": 1.4282946586608887, "learning_rate": 7.380033511800626e-06, "loss": 0.7355, "step": 10061 }, { "epoch": 1.8094938415894992, "grad_norm": 1.4869356155395508, "learning_rate": 7.379521288121774e-06, "loss": 0.7565, "step": 10062 }, { "epoch": 1.8096736491953611, "grad_norm": 1.6029503345489502, "learning_rate": 7.379009032155939e-06, "loss": 0.7615, "step": 10063 }, { "epoch": 1.8098534568012226, "grad_norm": 1.4227839708328247, "learning_rate": 7.378496743910073e-06, "loss": 0.7116, "step": 10064 }, { "epoch": 1.8100332644070845, "grad_norm": 1.4800447225570679, "learning_rate": 7.377984423391128e-06, "loss": 0.7227, "step": 10065 }, { "epoch": 1.8102130720129461, "grad_norm": 1.6714415550231934, "learning_rate": 7.3774720706060536e-06, "loss": 0.7128, "step": 10066 }, { "epoch": 1.8103928796188078, "grad_norm": 1.165722131729126, "learning_rate": 7.376959685561803e-06, "loss": 0.9126, "step": 10067 }, { "epoch": 1.8105726872246697, "grad_norm": 1.448097586631775, "learning_rate": 7.376447268265329e-06, "loss": 0.7602, "step": 10068 }, { "epoch": 1.8107524948305314, "grad_norm": 1.223430871963501, "learning_rate": 7.375934818723584e-06, "loss": 0.9145, "step": 10069 }, { "epoch": 1.810932302436393, "grad_norm": 1.4648966789245605, "learning_rate": 7.375422336943519e-06, "loss": 0.6926, "step": 10070 }, { "epoch": 1.811112110042255, "grad_norm": 1.471686601638794, "learning_rate": 7.37490982293209e-06, "loss": 0.713, "step": 10071 }, { "epoch": 1.8112919176481164, "grad_norm": 1.4223252534866333, "learning_rate": 7.3743972766962525e-06, "loss": 0.706, "step": 10072 }, { "epoch": 1.8114717252539783, "grad_norm": 1.1554056406021118, "learning_rate": 7.373884698242959e-06, "loss": 0.9258, "step": 10073 }, { "epoch": 1.81165153285984, "grad_norm": 1.557162880897522, "learning_rate": 7.373372087579165e-06, "loss": 0.7024, "step": 10074 }, { "epoch": 1.8118313404657016, "grad_norm": 1.5460247993469238, "learning_rate": 7.372859444711826e-06, "loss": 0.7946, "step": 10075 }, { "epoch": 1.8120111480715635, "grad_norm": 1.4618796110153198, "learning_rate": 7.3723467696478975e-06, "loss": 0.7494, "step": 10076 }, { "epoch": 1.8121909556774252, "grad_norm": 1.439682960510254, "learning_rate": 7.3718340623943374e-06, "loss": 0.7275, "step": 10077 }, { "epoch": 1.8123707632832868, "grad_norm": 1.100791573524475, "learning_rate": 7.3713213229581e-06, "loss": 0.9661, "step": 10078 }, { "epoch": 1.8125505708891487, "grad_norm": 1.5463813543319702, "learning_rate": 7.370808551346145e-06, "loss": 0.7403, "step": 10079 }, { "epoch": 1.8127303784950102, "grad_norm": 1.628953218460083, "learning_rate": 7.370295747565427e-06, "loss": 0.7305, "step": 10080 }, { "epoch": 1.812910186100872, "grad_norm": 1.0716112852096558, "learning_rate": 7.369782911622907e-06, "loss": 0.9148, "step": 10081 }, { "epoch": 1.813089993706734, "grad_norm": 1.5017518997192383, "learning_rate": 7.369270043525543e-06, "loss": 0.68, "step": 10082 }, { "epoch": 1.8132698013125954, "grad_norm": 0.9730829000473022, "learning_rate": 7.368757143280291e-06, "loss": 0.9795, "step": 10083 }, { "epoch": 1.8134496089184573, "grad_norm": 1.4380897283554077, "learning_rate": 7.368244210894113e-06, "loss": 0.7061, "step": 10084 }, { "epoch": 1.813629416524319, "grad_norm": 1.5059142112731934, "learning_rate": 7.367731246373972e-06, "loss": 0.7571, "step": 10085 }, { "epoch": 1.8138092241301806, "grad_norm": 1.5817742347717285, "learning_rate": 7.367218249726821e-06, "loss": 0.7398, "step": 10086 }, { "epoch": 1.8139890317360425, "grad_norm": 1.5476956367492676, "learning_rate": 7.3667052209596265e-06, "loss": 0.6767, "step": 10087 }, { "epoch": 1.8141688393419042, "grad_norm": 1.4967005252838135, "learning_rate": 7.366192160079346e-06, "loss": 0.7668, "step": 10088 }, { "epoch": 1.8143486469477659, "grad_norm": 1.4587457180023193, "learning_rate": 7.365679067092945e-06, "loss": 0.7808, "step": 10089 }, { "epoch": 1.8145284545536278, "grad_norm": 1.4016011953353882, "learning_rate": 7.365165942007381e-06, "loss": 0.7503, "step": 10090 }, { "epoch": 1.8147082621594892, "grad_norm": 1.057353138923645, "learning_rate": 7.36465278482962e-06, "loss": 0.9394, "step": 10091 }, { "epoch": 1.814888069765351, "grad_norm": 1.4885698556900024, "learning_rate": 7.364139595566622e-06, "loss": 0.7013, "step": 10092 }, { "epoch": 1.8150678773712128, "grad_norm": 1.5061348676681519, "learning_rate": 7.3636263742253525e-06, "loss": 0.7071, "step": 10093 }, { "epoch": 1.8152476849770744, "grad_norm": 1.1298590898513794, "learning_rate": 7.363113120812774e-06, "loss": 0.9198, "step": 10094 }, { "epoch": 1.8154274925829363, "grad_norm": 1.4079562425613403, "learning_rate": 7.362599835335853e-06, "loss": 0.6659, "step": 10095 }, { "epoch": 1.815607300188798, "grad_norm": 1.450853705406189, "learning_rate": 7.36208651780155e-06, "loss": 0.7396, "step": 10096 }, { "epoch": 1.8157871077946597, "grad_norm": 1.1881263256072998, "learning_rate": 7.361573168216834e-06, "loss": 0.9555, "step": 10097 }, { "epoch": 1.8159669154005216, "grad_norm": 1.9806113243103027, "learning_rate": 7.361059786588668e-06, "loss": 0.7138, "step": 10098 }, { "epoch": 1.816146723006383, "grad_norm": 1.211962103843689, "learning_rate": 7.360546372924019e-06, "loss": 0.9195, "step": 10099 }, { "epoch": 1.816326530612245, "grad_norm": 1.5530803203582764, "learning_rate": 7.360032927229853e-06, "loss": 0.6934, "step": 10100 }, { "epoch": 1.8165063382181066, "grad_norm": 1.6137142181396484, "learning_rate": 7.359519449513137e-06, "loss": 0.7221, "step": 10101 }, { "epoch": 1.8166861458239683, "grad_norm": 1.5870929956436157, "learning_rate": 7.359005939780838e-06, "loss": 0.7615, "step": 10102 }, { "epoch": 1.8168659534298301, "grad_norm": 1.5662670135498047, "learning_rate": 7.358492398039923e-06, "loss": 0.6803, "step": 10103 }, { "epoch": 1.8170457610356918, "grad_norm": 1.1136598587036133, "learning_rate": 7.357978824297362e-06, "loss": 0.88, "step": 10104 }, { "epoch": 1.8172255686415535, "grad_norm": 1.7073711156845093, "learning_rate": 7.357465218560122e-06, "loss": 0.7015, "step": 10105 }, { "epoch": 1.8174053762474154, "grad_norm": 1.5704330205917358, "learning_rate": 7.356951580835171e-06, "loss": 0.7454, "step": 10106 }, { "epoch": 1.8175851838532768, "grad_norm": 1.049546241760254, "learning_rate": 7.356437911129481e-06, "loss": 0.9341, "step": 10107 }, { "epoch": 1.8177649914591387, "grad_norm": 1.7064533233642578, "learning_rate": 7.35592420945002e-06, "loss": 0.7349, "step": 10108 }, { "epoch": 1.8179447990650004, "grad_norm": 1.1833854913711548, "learning_rate": 7.3554104758037605e-06, "loss": 0.9302, "step": 10109 }, { "epoch": 1.818124606670862, "grad_norm": 1.6040935516357422, "learning_rate": 7.35489671019767e-06, "loss": 0.7853, "step": 10110 }, { "epoch": 1.818304414276724, "grad_norm": 1.5013599395751953, "learning_rate": 7.354382912638721e-06, "loss": 0.7385, "step": 10111 }, { "epoch": 1.8184842218825856, "grad_norm": 1.2921693325042725, "learning_rate": 7.353869083133885e-06, "loss": 0.9291, "step": 10112 }, { "epoch": 1.8186640294884473, "grad_norm": 1.7836124897003174, "learning_rate": 7.353355221690135e-06, "loss": 0.6842, "step": 10113 }, { "epoch": 1.8188438370943092, "grad_norm": 1.4342089891433716, "learning_rate": 7.352841328314442e-06, "loss": 0.7066, "step": 10114 }, { "epoch": 1.8190236447001709, "grad_norm": 1.5219264030456543, "learning_rate": 7.352327403013779e-06, "loss": 0.7134, "step": 10115 }, { "epoch": 1.8192034523060325, "grad_norm": 2.0688438415527344, "learning_rate": 7.351813445795119e-06, "loss": 0.7426, "step": 10116 }, { "epoch": 1.8193832599118944, "grad_norm": 1.5968554019927979, "learning_rate": 7.3512994566654375e-06, "loss": 0.7435, "step": 10117 }, { "epoch": 1.8195630675177559, "grad_norm": 1.5417375564575195, "learning_rate": 7.3507854356317085e-06, "loss": 0.744, "step": 10118 }, { "epoch": 1.8197428751236178, "grad_norm": 1.6464262008666992, "learning_rate": 7.350271382700904e-06, "loss": 0.7252, "step": 10119 }, { "epoch": 1.8199226827294794, "grad_norm": 1.0511083602905273, "learning_rate": 7.349757297880003e-06, "loss": 0.8731, "step": 10120 }, { "epoch": 1.820102490335341, "grad_norm": 1.5670655965805054, "learning_rate": 7.349243181175977e-06, "loss": 0.7312, "step": 10121 }, { "epoch": 1.820282297941203, "grad_norm": 1.126356601715088, "learning_rate": 7.348729032595804e-06, "loss": 0.9306, "step": 10122 }, { "epoch": 1.8204621055470647, "grad_norm": 1.3843467235565186, "learning_rate": 7.348214852146459e-06, "loss": 0.6455, "step": 10123 }, { "epoch": 1.8206419131529263, "grad_norm": 1.479141354560852, "learning_rate": 7.347700639834921e-06, "loss": 0.6917, "step": 10124 }, { "epoch": 1.8208217207587882, "grad_norm": 1.46963632106781, "learning_rate": 7.347186395668165e-06, "loss": 0.6719, "step": 10125 }, { "epoch": 1.8210015283646497, "grad_norm": 1.5977145433425903, "learning_rate": 7.346672119653169e-06, "loss": 0.6979, "step": 10126 }, { "epoch": 1.8211813359705116, "grad_norm": 1.5136847496032715, "learning_rate": 7.346157811796913e-06, "loss": 0.7585, "step": 10127 }, { "epoch": 1.8213611435763732, "grad_norm": 1.470820665359497, "learning_rate": 7.345643472106372e-06, "loss": 0.7084, "step": 10128 }, { "epoch": 1.821540951182235, "grad_norm": 1.566365361213684, "learning_rate": 7.345129100588528e-06, "loss": 0.7628, "step": 10129 }, { "epoch": 1.8217207587880968, "grad_norm": 1.4302433729171753, "learning_rate": 7.3446146972503594e-06, "loss": 0.7647, "step": 10130 }, { "epoch": 1.8219005663939585, "grad_norm": 1.4675339460372925, "learning_rate": 7.344100262098845e-06, "loss": 0.674, "step": 10131 }, { "epoch": 1.8220803739998201, "grad_norm": 1.5502159595489502, "learning_rate": 7.343585795140967e-06, "loss": 0.7021, "step": 10132 }, { "epoch": 1.822260181605682, "grad_norm": 1.6227428913116455, "learning_rate": 7.343071296383704e-06, "loss": 0.8268, "step": 10133 }, { "epoch": 1.8224399892115435, "grad_norm": 1.5946444272994995, "learning_rate": 7.342556765834039e-06, "loss": 0.744, "step": 10134 }, { "epoch": 1.8226197968174054, "grad_norm": 1.5592615604400635, "learning_rate": 7.342042203498952e-06, "loss": 0.6776, "step": 10135 }, { "epoch": 1.822799604423267, "grad_norm": 1.4903264045715332, "learning_rate": 7.341527609385425e-06, "loss": 0.7106, "step": 10136 }, { "epoch": 1.8229794120291287, "grad_norm": 1.2194443941116333, "learning_rate": 7.3410129835004405e-06, "loss": 0.923, "step": 10137 }, { "epoch": 1.8231592196349906, "grad_norm": 1.494834065437317, "learning_rate": 7.340498325850981e-06, "loss": 0.7705, "step": 10138 }, { "epoch": 1.8233390272408523, "grad_norm": 1.2154066562652588, "learning_rate": 7.339983636444031e-06, "loss": 0.9361, "step": 10139 }, { "epoch": 1.823518834846714, "grad_norm": 1.134406328201294, "learning_rate": 7.339468915286574e-06, "loss": 0.9348, "step": 10140 }, { "epoch": 1.8236986424525758, "grad_norm": 1.533646583557129, "learning_rate": 7.338954162385593e-06, "loss": 0.696, "step": 10141 }, { "epoch": 1.8238784500584375, "grad_norm": 1.6395894289016724, "learning_rate": 7.338439377748073e-06, "loss": 0.7416, "step": 10142 }, { "epoch": 1.8240582576642992, "grad_norm": 1.4437178373336792, "learning_rate": 7.337924561380999e-06, "loss": 0.712, "step": 10143 }, { "epoch": 1.824238065270161, "grad_norm": 1.539250135421753, "learning_rate": 7.337409713291357e-06, "loss": 0.6838, "step": 10144 }, { "epoch": 1.8244178728760225, "grad_norm": 1.4348658323287964, "learning_rate": 7.336894833486131e-06, "loss": 0.7364, "step": 10145 }, { "epoch": 1.8245976804818844, "grad_norm": 1.4789458513259888, "learning_rate": 7.33637992197231e-06, "loss": 0.7107, "step": 10146 }, { "epoch": 1.824777488087746, "grad_norm": 1.4847759008407593, "learning_rate": 7.335864978756878e-06, "loss": 0.7282, "step": 10147 }, { "epoch": 1.8249572956936078, "grad_norm": 1.2770428657531738, "learning_rate": 7.335350003846823e-06, "loss": 0.9384, "step": 10148 }, { "epoch": 1.8251371032994697, "grad_norm": 1.2206858396530151, "learning_rate": 7.334834997249133e-06, "loss": 0.9393, "step": 10149 }, { "epoch": 1.8253169109053313, "grad_norm": 1.1487926244735718, "learning_rate": 7.3343199589707955e-06, "loss": 0.9316, "step": 10150 }, { "epoch": 1.825496718511193, "grad_norm": 1.6196801662445068, "learning_rate": 7.333804889018799e-06, "loss": 0.7949, "step": 10151 }, { "epoch": 1.8256765261170549, "grad_norm": 1.4680757522583008, "learning_rate": 7.333289787400134e-06, "loss": 0.7383, "step": 10152 }, { "epoch": 1.8258563337229163, "grad_norm": 1.57151198387146, "learning_rate": 7.332774654121787e-06, "loss": 0.7316, "step": 10153 }, { "epoch": 1.8260361413287782, "grad_norm": 1.5111749172210693, "learning_rate": 7.332259489190749e-06, "loss": 0.8332, "step": 10154 }, { "epoch": 1.82621594893464, "grad_norm": 1.5587272644042969, "learning_rate": 7.3317442926140106e-06, "loss": 0.7822, "step": 10155 }, { "epoch": 1.8263957565405016, "grad_norm": 1.5871212482452393, "learning_rate": 7.331229064398561e-06, "loss": 0.657, "step": 10156 }, { "epoch": 1.8265755641463635, "grad_norm": 1.558013677597046, "learning_rate": 7.330713804551392e-06, "loss": 0.7122, "step": 10157 }, { "epoch": 1.8267553717522251, "grad_norm": 1.6078892946243286, "learning_rate": 7.3301985130794955e-06, "loss": 0.7558, "step": 10158 }, { "epoch": 1.8269351793580868, "grad_norm": 1.5716335773468018, "learning_rate": 7.3296831899898615e-06, "loss": 0.7266, "step": 10159 }, { "epoch": 1.8271149869639487, "grad_norm": 1.3464707136154175, "learning_rate": 7.329167835289483e-06, "loss": 0.8985, "step": 10160 }, { "epoch": 1.8272947945698101, "grad_norm": 1.461653709411621, "learning_rate": 7.3286524489853535e-06, "loss": 0.7255, "step": 10161 }, { "epoch": 1.827474602175672, "grad_norm": 1.19361412525177, "learning_rate": 7.328137031084468e-06, "loss": 0.9113, "step": 10162 }, { "epoch": 1.8276544097815337, "grad_norm": 1.4825979471206665, "learning_rate": 7.327621581593816e-06, "loss": 0.7268, "step": 10163 }, { "epoch": 1.8278342173873954, "grad_norm": 1.4765392541885376, "learning_rate": 7.3271061005203935e-06, "loss": 0.7012, "step": 10164 }, { "epoch": 1.8280140249932573, "grad_norm": 1.498453974723816, "learning_rate": 7.326590587871194e-06, "loss": 0.7596, "step": 10165 }, { "epoch": 1.828193832599119, "grad_norm": 1.679129719734192, "learning_rate": 7.326075043653214e-06, "loss": 0.7856, "step": 10166 }, { "epoch": 1.8283736402049806, "grad_norm": 1.6757062673568726, "learning_rate": 7.325559467873448e-06, "loss": 0.7329, "step": 10167 }, { "epoch": 1.8285534478108425, "grad_norm": 1.5112545490264893, "learning_rate": 7.325043860538892e-06, "loss": 0.7063, "step": 10168 }, { "epoch": 1.8287332554167042, "grad_norm": 1.4118231534957886, "learning_rate": 7.324528221656539e-06, "loss": 0.6875, "step": 10169 }, { "epoch": 1.8289130630225658, "grad_norm": 1.4161064624786377, "learning_rate": 7.324012551233391e-06, "loss": 0.6952, "step": 10170 }, { "epoch": 1.8290928706284277, "grad_norm": 1.5188217163085938, "learning_rate": 7.3234968492764395e-06, "loss": 0.7845, "step": 10171 }, { "epoch": 1.8292726782342892, "grad_norm": 1.5179998874664307, "learning_rate": 7.322981115792687e-06, "loss": 0.7341, "step": 10172 }, { "epoch": 1.829452485840151, "grad_norm": 1.3911330699920654, "learning_rate": 7.322465350789126e-06, "loss": 0.937, "step": 10173 }, { "epoch": 1.8296322934460127, "grad_norm": 1.662523627281189, "learning_rate": 7.32194955427276e-06, "loss": 0.7816, "step": 10174 }, { "epoch": 1.8298121010518744, "grad_norm": 1.6249300241470337, "learning_rate": 7.321433726250584e-06, "loss": 0.7654, "step": 10175 }, { "epoch": 1.8299919086577363, "grad_norm": 1.4931188821792603, "learning_rate": 7.3209178667296e-06, "loss": 0.7866, "step": 10176 }, { "epoch": 1.830171716263598, "grad_norm": 1.5144970417022705, "learning_rate": 7.3204019757168045e-06, "loss": 0.7125, "step": 10177 }, { "epoch": 1.8303515238694597, "grad_norm": 1.4848566055297852, "learning_rate": 7.3198860532191995e-06, "loss": 0.7133, "step": 10178 }, { "epoch": 1.8305313314753215, "grad_norm": 1.4724127054214478, "learning_rate": 7.319370099243784e-06, "loss": 0.6639, "step": 10179 }, { "epoch": 1.830711139081183, "grad_norm": 1.09564208984375, "learning_rate": 7.31885411379756e-06, "loss": 0.9688, "step": 10180 }, { "epoch": 1.8308909466870449, "grad_norm": 1.5228509902954102, "learning_rate": 7.318338096887529e-06, "loss": 0.7366, "step": 10181 }, { "epoch": 1.8310707542929066, "grad_norm": 1.4628137350082397, "learning_rate": 7.317822048520691e-06, "loss": 0.7214, "step": 10182 }, { "epoch": 1.8312505618987682, "grad_norm": 1.5585150718688965, "learning_rate": 7.317305968704049e-06, "loss": 0.6672, "step": 10183 }, { "epoch": 1.8314303695046301, "grad_norm": 1.5621676445007324, "learning_rate": 7.316789857444606e-06, "loss": 0.7752, "step": 10184 }, { "epoch": 1.8316101771104918, "grad_norm": 1.5760080814361572, "learning_rate": 7.316273714749365e-06, "loss": 0.7409, "step": 10185 }, { "epoch": 1.8317899847163535, "grad_norm": 1.5455785989761353, "learning_rate": 7.315757540625329e-06, "loss": 0.7197, "step": 10186 }, { "epoch": 1.8319697923222154, "grad_norm": 1.5143046379089355, "learning_rate": 7.315241335079501e-06, "loss": 0.7104, "step": 10187 }, { "epoch": 1.8321495999280768, "grad_norm": 1.4237394332885742, "learning_rate": 7.314725098118887e-06, "loss": 0.7008, "step": 10188 }, { "epoch": 1.8323294075339387, "grad_norm": 1.561832070350647, "learning_rate": 7.31420882975049e-06, "loss": 0.7452, "step": 10189 }, { "epoch": 1.8325092151398004, "grad_norm": 1.0705426931381226, "learning_rate": 7.313692529981317e-06, "loss": 0.9385, "step": 10190 }, { "epoch": 1.832689022745662, "grad_norm": 1.5485601425170898, "learning_rate": 7.31317619881837e-06, "loss": 0.7027, "step": 10191 }, { "epoch": 1.832868830351524, "grad_norm": 1.513107419013977, "learning_rate": 7.3126598362686576e-06, "loss": 0.8192, "step": 10192 }, { "epoch": 1.8330486379573856, "grad_norm": 1.6539489030838013, "learning_rate": 7.3121434423391855e-06, "loss": 0.7843, "step": 10193 }, { "epoch": 1.8332284455632473, "grad_norm": 1.554068684577942, "learning_rate": 7.311627017036963e-06, "loss": 0.7686, "step": 10194 }, { "epoch": 1.8334082531691092, "grad_norm": 1.5224405527114868, "learning_rate": 7.3111105603689925e-06, "loss": 0.7951, "step": 10195 }, { "epoch": 1.8335880607749708, "grad_norm": 1.4587031602859497, "learning_rate": 7.3105940723422865e-06, "loss": 0.6793, "step": 10196 }, { "epoch": 1.8337678683808325, "grad_norm": 1.464555263519287, "learning_rate": 7.310077552963849e-06, "loss": 0.7509, "step": 10197 }, { "epoch": 1.8339476759866944, "grad_norm": 0.9948192834854126, "learning_rate": 7.309561002240691e-06, "loss": 0.9264, "step": 10198 }, { "epoch": 1.8341274835925558, "grad_norm": 1.5158843994140625, "learning_rate": 7.3090444201798204e-06, "loss": 0.7465, "step": 10199 }, { "epoch": 1.8343072911984177, "grad_norm": 1.3984653949737549, "learning_rate": 7.308527806788248e-06, "loss": 0.7175, "step": 10200 }, { "epoch": 1.8344870988042794, "grad_norm": 1.4604405164718628, "learning_rate": 7.308011162072981e-06, "loss": 0.7664, "step": 10201 }, { "epoch": 1.834666906410141, "grad_norm": 1.4772019386291504, "learning_rate": 7.30749448604103e-06, "loss": 0.6537, "step": 10202 }, { "epoch": 1.834846714016003, "grad_norm": 1.3810611963272095, "learning_rate": 7.306977778699408e-06, "loss": 0.7318, "step": 10203 }, { "epoch": 1.8350265216218646, "grad_norm": 12.598820686340332, "learning_rate": 7.306461040055125e-06, "loss": 0.7026, "step": 10204 }, { "epoch": 1.8352063292277263, "grad_norm": 1.4808366298675537, "learning_rate": 7.30594427011519e-06, "loss": 0.7, "step": 10205 }, { "epoch": 1.8353861368335882, "grad_norm": 1.5193147659301758, "learning_rate": 7.30542746888662e-06, "loss": 0.7334, "step": 10206 }, { "epoch": 1.8355659444394496, "grad_norm": 1.108715295791626, "learning_rate": 7.3049106363764225e-06, "loss": 0.895, "step": 10207 }, { "epoch": 1.8357457520453115, "grad_norm": 1.5302014350891113, "learning_rate": 7.3043937725916125e-06, "loss": 0.7704, "step": 10208 }, { "epoch": 1.8359255596511732, "grad_norm": 1.4733600616455078, "learning_rate": 7.303876877539202e-06, "loss": 0.7167, "step": 10209 }, { "epoch": 1.8361053672570349, "grad_norm": 1.540223479270935, "learning_rate": 7.303359951226206e-06, "loss": 0.73, "step": 10210 }, { "epoch": 1.8362851748628968, "grad_norm": 1.4878805875778198, "learning_rate": 7.302842993659638e-06, "loss": 0.7362, "step": 10211 }, { "epoch": 1.8364649824687584, "grad_norm": 1.4142099618911743, "learning_rate": 7.3023260048465114e-06, "loss": 0.7046, "step": 10212 }, { "epoch": 1.8366447900746201, "grad_norm": 1.5605673789978027, "learning_rate": 7.301808984793842e-06, "loss": 0.6698, "step": 10213 }, { "epoch": 1.836824597680482, "grad_norm": 1.4923259019851685, "learning_rate": 7.301291933508645e-06, "loss": 0.7299, "step": 10214 }, { "epoch": 1.8370044052863435, "grad_norm": 1.5676790475845337, "learning_rate": 7.300774850997936e-06, "loss": 0.7546, "step": 10215 }, { "epoch": 1.8371842128922053, "grad_norm": 1.5024855136871338, "learning_rate": 7.300257737268732e-06, "loss": 0.7258, "step": 10216 }, { "epoch": 1.837364020498067, "grad_norm": 1.08505117893219, "learning_rate": 7.299740592328047e-06, "loss": 0.938, "step": 10217 }, { "epoch": 1.8375438281039287, "grad_norm": 1.9083677530288696, "learning_rate": 7.299223416182902e-06, "loss": 0.7728, "step": 10218 }, { "epoch": 1.8377236357097906, "grad_norm": 1.4379756450653076, "learning_rate": 7.298706208840311e-06, "loss": 0.7685, "step": 10219 }, { "epoch": 1.8379034433156523, "grad_norm": 1.5223958492279053, "learning_rate": 7.298188970307294e-06, "loss": 0.7621, "step": 10220 }, { "epoch": 1.838083250921514, "grad_norm": 1.4378225803375244, "learning_rate": 7.297671700590866e-06, "loss": 0.6864, "step": 10221 }, { "epoch": 1.8382630585273758, "grad_norm": 1.4029197692871094, "learning_rate": 7.29715439969805e-06, "loss": 0.7395, "step": 10222 }, { "epoch": 1.8384428661332375, "grad_norm": 1.4465736150741577, "learning_rate": 7.296637067635861e-06, "loss": 0.7281, "step": 10223 }, { "epoch": 1.8386226737390992, "grad_norm": 1.5764542818069458, "learning_rate": 7.2961197044113215e-06, "loss": 0.7048, "step": 10224 }, { "epoch": 1.838802481344961, "grad_norm": 1.5313963890075684, "learning_rate": 7.29560231003145e-06, "loss": 0.7351, "step": 10225 }, { "epoch": 1.8389822889508225, "grad_norm": 1.547076940536499, "learning_rate": 7.2950848845032685e-06, "loss": 0.7144, "step": 10226 }, { "epoch": 1.8391620965566844, "grad_norm": 1.9476901292800903, "learning_rate": 7.2945674278337965e-06, "loss": 0.7231, "step": 10227 }, { "epoch": 1.839341904162546, "grad_norm": 1.557981252670288, "learning_rate": 7.294049940030055e-06, "loss": 0.7116, "step": 10228 }, { "epoch": 1.8395217117684077, "grad_norm": 1.1762884855270386, "learning_rate": 7.293532421099064e-06, "loss": 0.9205, "step": 10229 }, { "epoch": 1.8397015193742696, "grad_norm": 1.4964364767074585, "learning_rate": 7.2930148710478495e-06, "loss": 0.7901, "step": 10230 }, { "epoch": 1.8398813269801313, "grad_norm": 1.626671314239502, "learning_rate": 7.292497289883432e-06, "loss": 0.7314, "step": 10231 }, { "epoch": 1.840061134585993, "grad_norm": 1.9438289403915405, "learning_rate": 7.291979677612835e-06, "loss": 0.7415, "step": 10232 }, { "epoch": 1.8402409421918549, "grad_norm": 1.0772207975387573, "learning_rate": 7.2914620342430795e-06, "loss": 0.9113, "step": 10233 }, { "epoch": 1.8404207497977163, "grad_norm": 1.450842022895813, "learning_rate": 7.290944359781191e-06, "loss": 0.7596, "step": 10234 }, { "epoch": 1.8406005574035782, "grad_norm": 1.468239665031433, "learning_rate": 7.290426654234194e-06, "loss": 0.7392, "step": 10235 }, { "epoch": 1.8407803650094399, "grad_norm": 1.427599549293518, "learning_rate": 7.289908917609112e-06, "loss": 0.7415, "step": 10236 }, { "epoch": 1.8409601726153015, "grad_norm": 1.633904218673706, "learning_rate": 7.289391149912972e-06, "loss": 0.7678, "step": 10237 }, { "epoch": 1.8411399802211634, "grad_norm": 1.56283438205719, "learning_rate": 7.2888733511527965e-06, "loss": 0.753, "step": 10238 }, { "epoch": 1.841319787827025, "grad_norm": 1.5389716625213623, "learning_rate": 7.288355521335615e-06, "loss": 0.757, "step": 10239 }, { "epoch": 1.8414995954328868, "grad_norm": 1.474405288696289, "learning_rate": 7.28783766046845e-06, "loss": 0.7694, "step": 10240 }, { "epoch": 1.8416794030387487, "grad_norm": 1.4810248613357544, "learning_rate": 7.2873197685583305e-06, "loss": 0.7458, "step": 10241 }, { "epoch": 1.8418592106446101, "grad_norm": 1.4760215282440186, "learning_rate": 7.286801845612282e-06, "loss": 0.7285, "step": 10242 }, { "epoch": 1.842039018250472, "grad_norm": 1.5055854320526123, "learning_rate": 7.286283891637336e-06, "loss": 0.7308, "step": 10243 }, { "epoch": 1.8422188258563337, "grad_norm": 1.117045521736145, "learning_rate": 7.285765906640514e-06, "loss": 0.9327, "step": 10244 }, { "epoch": 1.8423986334621953, "grad_norm": 1.4648888111114502, "learning_rate": 7.285247890628851e-06, "loss": 0.7593, "step": 10245 }, { "epoch": 1.8425784410680572, "grad_norm": 1.528350830078125, "learning_rate": 7.284729843609371e-06, "loss": 0.7233, "step": 10246 }, { "epoch": 1.842758248673919, "grad_norm": 1.54295015335083, "learning_rate": 7.2842117655891045e-06, "loss": 0.7254, "step": 10247 }, { "epoch": 1.8429380562797806, "grad_norm": 1.4663976430892944, "learning_rate": 7.283693656575081e-06, "loss": 0.7916, "step": 10248 }, { "epoch": 1.8431178638856425, "grad_norm": 1.5001922845840454, "learning_rate": 7.283175516574332e-06, "loss": 0.7487, "step": 10249 }, { "epoch": 1.8432976714915041, "grad_norm": 1.5271663665771484, "learning_rate": 7.282657345593887e-06, "loss": 0.7187, "step": 10250 }, { "epoch": 1.8434774790973658, "grad_norm": 1.5579949617385864, "learning_rate": 7.282139143640778e-06, "loss": 0.7035, "step": 10251 }, { "epoch": 1.8436572867032277, "grad_norm": 1.5360676050186157, "learning_rate": 7.281620910722035e-06, "loss": 0.6889, "step": 10252 }, { "epoch": 1.8438370943090892, "grad_norm": 1.414421558380127, "learning_rate": 7.28110264684469e-06, "loss": 0.6651, "step": 10253 }, { "epoch": 1.844016901914951, "grad_norm": 1.074476957321167, "learning_rate": 7.280584352015774e-06, "loss": 0.9389, "step": 10254 }, { "epoch": 1.8441967095208127, "grad_norm": 1.4415242671966553, "learning_rate": 7.280066026242323e-06, "loss": 0.7355, "step": 10255 }, { "epoch": 1.8443765171266744, "grad_norm": 1.4917035102844238, "learning_rate": 7.279547669531365e-06, "loss": 0.7434, "step": 10256 }, { "epoch": 1.8445563247325363, "grad_norm": 1.5086281299591064, "learning_rate": 7.279029281889938e-06, "loss": 0.702, "step": 10257 }, { "epoch": 1.844736132338398, "grad_norm": 1.4655007123947144, "learning_rate": 7.278510863325073e-06, "loss": 0.7146, "step": 10258 }, { "epoch": 1.8449159399442596, "grad_norm": 1.4379961490631104, "learning_rate": 7.2779924138438065e-06, "loss": 0.7058, "step": 10259 }, { "epoch": 1.8450957475501215, "grad_norm": 1.5010120868682861, "learning_rate": 7.27747393345317e-06, "loss": 0.7004, "step": 10260 }, { "epoch": 1.845275555155983, "grad_norm": 1.1132128238677979, "learning_rate": 7.276955422160204e-06, "loss": 0.892, "step": 10261 }, { "epoch": 1.8454553627618449, "grad_norm": 1.3015471696853638, "learning_rate": 7.276436879971936e-06, "loss": 0.9631, "step": 10262 }, { "epoch": 1.8456351703677065, "grad_norm": 1.507936954498291, "learning_rate": 7.275918306895411e-06, "loss": 0.7116, "step": 10263 }, { "epoch": 1.8458149779735682, "grad_norm": 1.5005743503570557, "learning_rate": 7.275399702937658e-06, "loss": 0.759, "step": 10264 }, { "epoch": 1.84599478557943, "grad_norm": 1.457364559173584, "learning_rate": 7.274881068105718e-06, "loss": 0.7465, "step": 10265 }, { "epoch": 1.8461745931852918, "grad_norm": 1.4361366033554077, "learning_rate": 7.274362402406626e-06, "loss": 0.7408, "step": 10266 }, { "epoch": 1.8463544007911534, "grad_norm": 2.160539150238037, "learning_rate": 7.273843705847422e-06, "loss": 0.7057, "step": 10267 }, { "epoch": 1.8465342083970153, "grad_norm": 1.379018783569336, "learning_rate": 7.273324978435141e-06, "loss": 0.69, "step": 10268 }, { "epoch": 1.8467140160028768, "grad_norm": 1.5085697174072266, "learning_rate": 7.2728062201768225e-06, "loss": 0.7237, "step": 10269 }, { "epoch": 1.8468938236087387, "grad_norm": 1.4367973804473877, "learning_rate": 7.272287431079506e-06, "loss": 0.7281, "step": 10270 }, { "epoch": 1.8470736312146003, "grad_norm": 1.4824432134628296, "learning_rate": 7.2717686111502325e-06, "loss": 0.6803, "step": 10271 }, { "epoch": 1.847253438820462, "grad_norm": 1.5692678689956665, "learning_rate": 7.271249760396039e-06, "loss": 0.7421, "step": 10272 }, { "epoch": 1.847433246426324, "grad_norm": 1.5815820693969727, "learning_rate": 7.270730878823966e-06, "loss": 0.7529, "step": 10273 }, { "epoch": 1.8476130540321856, "grad_norm": 1.412538766860962, "learning_rate": 7.270211966441054e-06, "loss": 0.7227, "step": 10274 }, { "epoch": 1.8477928616380472, "grad_norm": 1.4663585424423218, "learning_rate": 7.269693023254346e-06, "loss": 0.744, "step": 10275 }, { "epoch": 1.8479726692439091, "grad_norm": 1.551208257675171, "learning_rate": 7.26917404927088e-06, "loss": 0.6774, "step": 10276 }, { "epoch": 1.8481524768497708, "grad_norm": 0.9969547986984253, "learning_rate": 7.268655044497701e-06, "loss": 0.96, "step": 10277 }, { "epoch": 1.8483322844556325, "grad_norm": 1.4499027729034424, "learning_rate": 7.26813600894185e-06, "loss": 0.7123, "step": 10278 }, { "epoch": 1.8485120920614944, "grad_norm": 1.0497273206710815, "learning_rate": 7.267616942610367e-06, "loss": 0.9687, "step": 10279 }, { "epoch": 1.8486918996673558, "grad_norm": 1.5553077459335327, "learning_rate": 7.267097845510299e-06, "loss": 0.7774, "step": 10280 }, { "epoch": 1.8488717072732177, "grad_norm": 1.0918822288513184, "learning_rate": 7.266578717648689e-06, "loss": 0.9023, "step": 10281 }, { "epoch": 1.8490515148790794, "grad_norm": 1.5093258619308472, "learning_rate": 7.266059559032579e-06, "loss": 0.6699, "step": 10282 }, { "epoch": 1.849231322484941, "grad_norm": 1.4989207983016968, "learning_rate": 7.265540369669015e-06, "loss": 0.7625, "step": 10283 }, { "epoch": 1.849411130090803, "grad_norm": 1.4574360847473145, "learning_rate": 7.265021149565039e-06, "loss": 0.687, "step": 10284 }, { "epoch": 1.8495909376966646, "grad_norm": 1.4334677457809448, "learning_rate": 7.264501898727701e-06, "loss": 0.698, "step": 10285 }, { "epoch": 1.8497707453025263, "grad_norm": 1.478428602218628, "learning_rate": 7.263982617164041e-06, "loss": 0.7348, "step": 10286 }, { "epoch": 1.8499505529083882, "grad_norm": 1.697331428527832, "learning_rate": 7.263463304881109e-06, "loss": 0.7954, "step": 10287 }, { "epoch": 1.8501303605142496, "grad_norm": 1.4944278001785278, "learning_rate": 7.262943961885949e-06, "loss": 0.752, "step": 10288 }, { "epoch": 1.8503101681201115, "grad_norm": 1.4821676015853882, "learning_rate": 7.2624245881856094e-06, "loss": 0.7817, "step": 10289 }, { "epoch": 1.8504899757259732, "grad_norm": 1.5071450471878052, "learning_rate": 7.261905183787136e-06, "loss": 0.7965, "step": 10290 }, { "epoch": 1.8506697833318349, "grad_norm": 1.5847034454345703, "learning_rate": 7.2613857486975765e-06, "loss": 0.7159, "step": 10291 }, { "epoch": 1.8508495909376967, "grad_norm": 1.5962433815002441, "learning_rate": 7.2608662829239805e-06, "loss": 0.735, "step": 10292 }, { "epoch": 1.8510293985435584, "grad_norm": 2.2000489234924316, "learning_rate": 7.2603467864733956e-06, "loss": 0.7144, "step": 10293 }, { "epoch": 1.85120920614942, "grad_norm": 1.4716962575912476, "learning_rate": 7.259827259352871e-06, "loss": 0.6914, "step": 10294 }, { "epoch": 1.851389013755282, "grad_norm": 1.5204476118087769, "learning_rate": 7.259307701569456e-06, "loss": 0.7097, "step": 10295 }, { "epoch": 1.8515688213611434, "grad_norm": 1.4989418983459473, "learning_rate": 7.258788113130199e-06, "loss": 0.7566, "step": 10296 }, { "epoch": 1.8517486289670053, "grad_norm": 1.4205050468444824, "learning_rate": 7.2582684940421525e-06, "loss": 0.7565, "step": 10297 }, { "epoch": 1.851928436572867, "grad_norm": 1.6245994567871094, "learning_rate": 7.257748844312364e-06, "loss": 0.7705, "step": 10298 }, { "epoch": 1.8521082441787287, "grad_norm": 1.5018113851547241, "learning_rate": 7.257229163947887e-06, "loss": 0.7139, "step": 10299 }, { "epoch": 1.8522880517845906, "grad_norm": 1.489034652709961, "learning_rate": 7.256709452955773e-06, "loss": 0.7317, "step": 10300 }, { "epoch": 1.8524678593904522, "grad_norm": 1.638431191444397, "learning_rate": 7.256189711343071e-06, "loss": 0.7298, "step": 10301 }, { "epoch": 1.852647666996314, "grad_norm": 1.3468042612075806, "learning_rate": 7.2556699391168365e-06, "loss": 0.9046, "step": 10302 }, { "epoch": 1.8528274746021758, "grad_norm": 1.5070359706878662, "learning_rate": 7.255150136284119e-06, "loss": 0.7058, "step": 10303 }, { "epoch": 1.8530072822080375, "grad_norm": 1.5590699911117554, "learning_rate": 7.2546303028519745e-06, "loss": 0.7608, "step": 10304 }, { "epoch": 1.8531870898138991, "grad_norm": 1.5243630409240723, "learning_rate": 7.254110438827455e-06, "loss": 0.6971, "step": 10305 }, { "epoch": 1.853366897419761, "grad_norm": 1.4766061305999756, "learning_rate": 7.2535905442176145e-06, "loss": 0.6829, "step": 10306 }, { "epoch": 1.8535467050256225, "grad_norm": 1.5884983539581299, "learning_rate": 7.253070619029508e-06, "loss": 0.7552, "step": 10307 }, { "epoch": 1.8537265126314844, "grad_norm": 1.490683674812317, "learning_rate": 7.252550663270189e-06, "loss": 0.7861, "step": 10308 }, { "epoch": 1.853906320237346, "grad_norm": 1.4865549802780151, "learning_rate": 7.252030676946713e-06, "loss": 0.7006, "step": 10309 }, { "epoch": 1.8540861278432077, "grad_norm": 1.5945638418197632, "learning_rate": 7.2515106600661356e-06, "loss": 0.7045, "step": 10310 }, { "epoch": 1.8542659354490696, "grad_norm": 1.4962197542190552, "learning_rate": 7.2509906126355135e-06, "loss": 0.746, "step": 10311 }, { "epoch": 1.8544457430549313, "grad_norm": 1.4705361127853394, "learning_rate": 7.250470534661902e-06, "loss": 0.7454, "step": 10312 }, { "epoch": 1.854625550660793, "grad_norm": 1.4617516994476318, "learning_rate": 7.249950426152357e-06, "loss": 0.6907, "step": 10313 }, { "epoch": 1.8548053582666548, "grad_norm": 1.4537663459777832, "learning_rate": 7.249430287113938e-06, "loss": 0.7197, "step": 10314 }, { "epoch": 1.8549851658725163, "grad_norm": 1.5324536561965942, "learning_rate": 7.248910117553702e-06, "loss": 0.7465, "step": 10315 }, { "epoch": 1.8551649734783782, "grad_norm": 1.3693337440490723, "learning_rate": 7.248389917478706e-06, "loss": 0.6449, "step": 10316 }, { "epoch": 1.8553447810842398, "grad_norm": 1.430230975151062, "learning_rate": 7.247869686896009e-06, "loss": 0.6964, "step": 10317 }, { "epoch": 1.8555245886901015, "grad_norm": 1.5152034759521484, "learning_rate": 7.247349425812671e-06, "loss": 0.742, "step": 10318 }, { "epoch": 1.8557043962959634, "grad_norm": 1.5997143983840942, "learning_rate": 7.24682913423575e-06, "loss": 0.7708, "step": 10319 }, { "epoch": 1.855884203901825, "grad_norm": 1.4943801164627075, "learning_rate": 7.246308812172305e-06, "loss": 0.7165, "step": 10320 }, { "epoch": 1.8560640115076867, "grad_norm": 1.65621817111969, "learning_rate": 7.245788459629397e-06, "loss": 0.7363, "step": 10321 }, { "epoch": 1.8562438191135486, "grad_norm": 1.413213849067688, "learning_rate": 7.245268076614086e-06, "loss": 0.7231, "step": 10322 }, { "epoch": 1.85642362671941, "grad_norm": 1.4386694431304932, "learning_rate": 7.244747663133433e-06, "loss": 0.7157, "step": 10323 }, { "epoch": 1.856603434325272, "grad_norm": 1.4151564836502075, "learning_rate": 7.2442272191945e-06, "loss": 0.7563, "step": 10324 }, { "epoch": 1.8567832419311336, "grad_norm": 1.739313006401062, "learning_rate": 7.243706744804349e-06, "loss": 0.7409, "step": 10325 }, { "epoch": 1.8569630495369953, "grad_norm": 1.4979983568191528, "learning_rate": 7.24318623997004e-06, "loss": 0.7452, "step": 10326 }, { "epoch": 1.8571428571428572, "grad_norm": 1.5842736959457397, "learning_rate": 7.242665704698639e-06, "loss": 0.7496, "step": 10327 }, { "epoch": 1.8573226647487189, "grad_norm": 1.549912929534912, "learning_rate": 7.2421451389972065e-06, "loss": 0.7443, "step": 10328 }, { "epoch": 1.8575024723545805, "grad_norm": 1.378426194190979, "learning_rate": 7.241624542872807e-06, "loss": 0.661, "step": 10329 }, { "epoch": 1.8576822799604424, "grad_norm": 1.4895249605178833, "learning_rate": 7.241103916332501e-06, "loss": 0.7587, "step": 10330 }, { "epoch": 1.857862087566304, "grad_norm": 1.1205816268920898, "learning_rate": 7.240583259383359e-06, "loss": 0.921, "step": 10331 }, { "epoch": 1.8580418951721658, "grad_norm": 1.6050394773483276, "learning_rate": 7.24006257203244e-06, "loss": 0.7496, "step": 10332 }, { "epoch": 1.8582217027780277, "grad_norm": 1.5132880210876465, "learning_rate": 7.239541854286812e-06, "loss": 0.701, "step": 10333 }, { "epoch": 1.8584015103838891, "grad_norm": 1.5428893566131592, "learning_rate": 7.239021106153539e-06, "loss": 0.8034, "step": 10334 }, { "epoch": 1.858581317989751, "grad_norm": 1.5335445404052734, "learning_rate": 7.238500327639688e-06, "loss": 0.7833, "step": 10335 }, { "epoch": 1.8587611255956127, "grad_norm": 1.3647902011871338, "learning_rate": 7.237979518752325e-06, "loss": 0.6913, "step": 10336 }, { "epoch": 1.8589409332014744, "grad_norm": 1.50932776927948, "learning_rate": 7.2374586794985165e-06, "loss": 0.7489, "step": 10337 }, { "epoch": 1.8591207408073362, "grad_norm": 1.4910850524902344, "learning_rate": 7.2369378098853285e-06, "loss": 0.7296, "step": 10338 }, { "epoch": 1.859300548413198, "grad_norm": 1.4432640075683594, "learning_rate": 7.236416909919831e-06, "loss": 0.7063, "step": 10339 }, { "epoch": 1.8594803560190596, "grad_norm": 1.7183880805969238, "learning_rate": 7.235895979609089e-06, "loss": 0.7479, "step": 10340 }, { "epoch": 1.8596601636249215, "grad_norm": 1.4438295364379883, "learning_rate": 7.235375018960174e-06, "loss": 0.6564, "step": 10341 }, { "epoch": 1.859839971230783, "grad_norm": 1.2008877992630005, "learning_rate": 7.234854027980152e-06, "loss": 0.8924, "step": 10342 }, { "epoch": 1.8600197788366448, "grad_norm": 1.543958306312561, "learning_rate": 7.234333006676094e-06, "loss": 0.7376, "step": 10343 }, { "epoch": 1.8601995864425065, "grad_norm": 1.4707233905792236, "learning_rate": 7.233811955055068e-06, "loss": 0.7427, "step": 10344 }, { "epoch": 1.8603793940483682, "grad_norm": 1.4321928024291992, "learning_rate": 7.233290873124145e-06, "loss": 0.7451, "step": 10345 }, { "epoch": 1.86055920165423, "grad_norm": 1.4938762187957764, "learning_rate": 7.232769760890394e-06, "loss": 0.696, "step": 10346 }, { "epoch": 1.8607390092600917, "grad_norm": 1.170570731163025, "learning_rate": 7.232248618360889e-06, "loss": 0.8831, "step": 10347 }, { "epoch": 1.8609188168659534, "grad_norm": 1.5800457000732422, "learning_rate": 7.2317274455427e-06, "loss": 0.7354, "step": 10348 }, { "epoch": 1.8610986244718153, "grad_norm": 1.457457184791565, "learning_rate": 7.2312062424428965e-06, "loss": 0.7487, "step": 10349 }, { "epoch": 1.8612784320776767, "grad_norm": 1.461995244026184, "learning_rate": 7.230685009068552e-06, "loss": 0.7126, "step": 10350 }, { "epoch": 1.8614582396835386, "grad_norm": 1.622671127319336, "learning_rate": 7.230163745426739e-06, "loss": 0.6864, "step": 10351 }, { "epoch": 1.8616380472894003, "grad_norm": 1.5144110918045044, "learning_rate": 7.22964245152453e-06, "loss": 0.696, "step": 10352 }, { "epoch": 1.861817854895262, "grad_norm": 1.4640765190124512, "learning_rate": 7.229121127369e-06, "loss": 0.7497, "step": 10353 }, { "epoch": 1.8619976625011239, "grad_norm": 1.5341858863830566, "learning_rate": 7.2285997729672194e-06, "loss": 0.7456, "step": 10354 }, { "epoch": 1.8621774701069855, "grad_norm": 1.5014560222625732, "learning_rate": 7.228078388326264e-06, "loss": 0.7473, "step": 10355 }, { "epoch": 1.8623572777128472, "grad_norm": 1.0227417945861816, "learning_rate": 7.227556973453209e-06, "loss": 0.9436, "step": 10356 }, { "epoch": 1.862537085318709, "grad_norm": 1.4514485597610474, "learning_rate": 7.227035528355129e-06, "loss": 0.7296, "step": 10357 }, { "epoch": 1.8627168929245705, "grad_norm": 1.0949214696884155, "learning_rate": 7.2265140530390984e-06, "loss": 0.9266, "step": 10358 }, { "epoch": 1.8628967005304324, "grad_norm": 1.478083848953247, "learning_rate": 7.225992547512195e-06, "loss": 0.7602, "step": 10359 }, { "epoch": 1.8630765081362943, "grad_norm": 1.4883607625961304, "learning_rate": 7.2254710117814934e-06, "loss": 0.7522, "step": 10360 }, { "epoch": 1.8632563157421558, "grad_norm": 1.4523370265960693, "learning_rate": 7.224949445854069e-06, "loss": 0.7078, "step": 10361 }, { "epoch": 1.8634361233480177, "grad_norm": 1.3937733173370361, "learning_rate": 7.224427849737e-06, "loss": 0.7024, "step": 10362 }, { "epoch": 1.8636159309538793, "grad_norm": 1.4915035963058472, "learning_rate": 7.223906223437364e-06, "loss": 0.6761, "step": 10363 }, { "epoch": 1.863795738559741, "grad_norm": 1.493349313735962, "learning_rate": 7.223384566962239e-06, "loss": 0.7751, "step": 10364 }, { "epoch": 1.863975546165603, "grad_norm": 1.7010523080825806, "learning_rate": 7.222862880318704e-06, "loss": 0.7418, "step": 10365 }, { "epoch": 1.8641553537714646, "grad_norm": 1.0633409023284912, "learning_rate": 7.222341163513835e-06, "loss": 0.9128, "step": 10366 }, { "epoch": 1.8643351613773262, "grad_norm": 1.6592049598693848, "learning_rate": 7.221819416554713e-06, "loss": 0.7425, "step": 10367 }, { "epoch": 1.8645149689831881, "grad_norm": 1.4365195035934448, "learning_rate": 7.221297639448416e-06, "loss": 0.7544, "step": 10368 }, { "epoch": 1.8646947765890496, "grad_norm": 1.489888310432434, "learning_rate": 7.220775832202025e-06, "loss": 0.7771, "step": 10369 }, { "epoch": 1.8648745841949115, "grad_norm": 1.1713531017303467, "learning_rate": 7.2202539948226205e-06, "loss": 0.9299, "step": 10370 }, { "epoch": 1.8650543918007731, "grad_norm": 1.6311712265014648, "learning_rate": 7.2197321273172815e-06, "loss": 0.7065, "step": 10371 }, { "epoch": 1.8652341994066348, "grad_norm": 1.434099555015564, "learning_rate": 7.219210229693091e-06, "loss": 0.7481, "step": 10372 }, { "epoch": 1.8654140070124967, "grad_norm": 1.4877870082855225, "learning_rate": 7.218688301957129e-06, "loss": 0.7059, "step": 10373 }, { "epoch": 1.8655938146183584, "grad_norm": 1.498700737953186, "learning_rate": 7.218166344116479e-06, "loss": 0.7396, "step": 10374 }, { "epoch": 1.86577362222422, "grad_norm": 1.357292890548706, "learning_rate": 7.217644356178221e-06, "loss": 0.7013, "step": 10375 }, { "epoch": 1.865953429830082, "grad_norm": 1.5057040452957153, "learning_rate": 7.217122338149441e-06, "loss": 0.7736, "step": 10376 }, { "epoch": 1.8661332374359434, "grad_norm": 1.584230899810791, "learning_rate": 7.216600290037218e-06, "loss": 0.72, "step": 10377 }, { "epoch": 1.8663130450418053, "grad_norm": 1.6715397834777832, "learning_rate": 7.216078211848638e-06, "loss": 0.7637, "step": 10378 }, { "epoch": 1.866492852647667, "grad_norm": 1.0579230785369873, "learning_rate": 7.215556103590784e-06, "loss": 0.9106, "step": 10379 }, { "epoch": 1.8666726602535286, "grad_norm": 1.3979597091674805, "learning_rate": 7.215033965270741e-06, "loss": 0.7189, "step": 10380 }, { "epoch": 1.8668524678593905, "grad_norm": 1.1354269981384277, "learning_rate": 7.214511796895594e-06, "loss": 0.8872, "step": 10381 }, { "epoch": 1.8670322754652522, "grad_norm": 0.981820285320282, "learning_rate": 7.213989598472428e-06, "loss": 0.9459, "step": 10382 }, { "epoch": 1.8672120830711139, "grad_norm": 1.384495496749878, "learning_rate": 7.213467370008328e-06, "loss": 0.6563, "step": 10383 }, { "epoch": 1.8673918906769758, "grad_norm": 1.054746389389038, "learning_rate": 7.212945111510381e-06, "loss": 0.9673, "step": 10384 }, { "epoch": 1.8675716982828372, "grad_norm": 1.2012779712677002, "learning_rate": 7.212422822985671e-06, "loss": 0.9575, "step": 10385 }, { "epoch": 1.867751505888699, "grad_norm": 1.5857000350952148, "learning_rate": 7.211900504441287e-06, "loss": 0.7318, "step": 10386 }, { "epoch": 1.867931313494561, "grad_norm": 1.1472655534744263, "learning_rate": 7.211378155884314e-06, "loss": 0.9161, "step": 10387 }, { "epoch": 1.8681111211004224, "grad_norm": 1.4622554779052734, "learning_rate": 7.210855777321843e-06, "loss": 0.7418, "step": 10388 }, { "epoch": 1.8682909287062843, "grad_norm": 1.5626554489135742, "learning_rate": 7.21033336876096e-06, "loss": 0.7896, "step": 10389 }, { "epoch": 1.868470736312146, "grad_norm": 1.4857176542282104, "learning_rate": 7.209810930208752e-06, "loss": 0.7811, "step": 10390 }, { "epoch": 1.8686505439180077, "grad_norm": 1.4328261613845825, "learning_rate": 7.209288461672309e-06, "loss": 0.696, "step": 10391 }, { "epoch": 1.8688303515238696, "grad_norm": 1.4948875904083252, "learning_rate": 7.208765963158723e-06, "loss": 0.7427, "step": 10392 }, { "epoch": 1.8690101591297312, "grad_norm": 1.5926241874694824, "learning_rate": 7.208243434675078e-06, "loss": 0.7879, "step": 10393 }, { "epoch": 1.869189966735593, "grad_norm": 1.4241564273834229, "learning_rate": 7.20772087622847e-06, "loss": 0.6522, "step": 10394 }, { "epoch": 1.8693697743414548, "grad_norm": 1.5711944103240967, "learning_rate": 7.207198287825985e-06, "loss": 0.6899, "step": 10395 }, { "epoch": 1.8695495819473162, "grad_norm": 1.3906207084655762, "learning_rate": 7.206675669474717e-06, "loss": 0.6928, "step": 10396 }, { "epoch": 1.8697293895531781, "grad_norm": 1.4204730987548828, "learning_rate": 7.206153021181752e-06, "loss": 0.7245, "step": 10397 }, { "epoch": 1.8699091971590398, "grad_norm": 1.48167884349823, "learning_rate": 7.205630342954189e-06, "loss": 0.6829, "step": 10398 }, { "epoch": 1.8700890047649015, "grad_norm": 1.1144943237304688, "learning_rate": 7.205107634799115e-06, "loss": 0.9494, "step": 10399 }, { "epoch": 1.8702688123707634, "grad_norm": 1.5286332368850708, "learning_rate": 7.204584896723622e-06, "loss": 0.7592, "step": 10400 }, { "epoch": 1.870448619976625, "grad_norm": 1.5874212980270386, "learning_rate": 7.204062128734805e-06, "loss": 0.7681, "step": 10401 }, { "epoch": 1.8706284275824867, "grad_norm": 1.5182703733444214, "learning_rate": 7.203539330839759e-06, "loss": 0.7946, "step": 10402 }, { "epoch": 1.8708082351883486, "grad_norm": 1.494375467300415, "learning_rate": 7.203016503045576e-06, "loss": 0.6926, "step": 10403 }, { "epoch": 1.87098804279421, "grad_norm": 1.4571971893310547, "learning_rate": 7.2024936453593484e-06, "loss": 0.7334, "step": 10404 }, { "epoch": 1.871167850400072, "grad_norm": 1.4164546728134155, "learning_rate": 7.201970757788172e-06, "loss": 0.6805, "step": 10405 }, { "epoch": 1.8713476580059336, "grad_norm": 1.5357248783111572, "learning_rate": 7.201447840339142e-06, "loss": 0.7767, "step": 10406 }, { "epoch": 1.8715274656117953, "grad_norm": 2.2225396633148193, "learning_rate": 7.200924893019353e-06, "loss": 0.7641, "step": 10407 }, { "epoch": 1.8717072732176572, "grad_norm": 1.465378761291504, "learning_rate": 7.200401915835902e-06, "loss": 0.7369, "step": 10408 }, { "epoch": 1.8718870808235188, "grad_norm": 1.400793194770813, "learning_rate": 7.199878908795883e-06, "loss": 0.7072, "step": 10409 }, { "epoch": 1.8720668884293805, "grad_norm": 1.4382061958312988, "learning_rate": 7.199355871906395e-06, "loss": 0.7588, "step": 10410 }, { "epoch": 1.8722466960352424, "grad_norm": 1.4851244688034058, "learning_rate": 7.198832805174533e-06, "loss": 0.7509, "step": 10411 }, { "epoch": 1.8724265036411039, "grad_norm": 1.1101698875427246, "learning_rate": 7.198309708607395e-06, "loss": 0.9426, "step": 10412 }, { "epoch": 1.8726063112469657, "grad_norm": 1.3858927488327026, "learning_rate": 7.197786582212078e-06, "loss": 0.6894, "step": 10413 }, { "epoch": 1.8727861188528274, "grad_norm": 1.349870204925537, "learning_rate": 7.197263425995682e-06, "loss": 0.7156, "step": 10414 }, { "epoch": 1.872965926458689, "grad_norm": 1.1246813535690308, "learning_rate": 7.196740239965304e-06, "loss": 0.9218, "step": 10415 }, { "epoch": 1.873145734064551, "grad_norm": 1.371105670928955, "learning_rate": 7.196217024128045e-06, "loss": 0.6701, "step": 10416 }, { "epoch": 1.8733255416704127, "grad_norm": 1.5739704370498657, "learning_rate": 7.195693778491e-06, "loss": 0.7162, "step": 10417 }, { "epoch": 1.8735053492762743, "grad_norm": 0.9696927666664124, "learning_rate": 7.195170503061273e-06, "loss": 0.9288, "step": 10418 }, { "epoch": 1.8736851568821362, "grad_norm": 1.1596225500106812, "learning_rate": 7.194647197845962e-06, "loss": 0.8964, "step": 10419 }, { "epoch": 1.8738649644879979, "grad_norm": 1.546182632446289, "learning_rate": 7.194123862852169e-06, "loss": 0.7247, "step": 10420 }, { "epoch": 1.8740447720938596, "grad_norm": 1.5619821548461914, "learning_rate": 7.193600498086994e-06, "loss": 0.7837, "step": 10421 }, { "epoch": 1.8742245796997214, "grad_norm": 1.4951521158218384, "learning_rate": 7.193077103557538e-06, "loss": 0.6663, "step": 10422 }, { "epoch": 1.874404387305583, "grad_norm": 1.407670021057129, "learning_rate": 7.192553679270903e-06, "loss": 0.7648, "step": 10423 }, { "epoch": 1.8745841949114448, "grad_norm": 1.4899924993515015, "learning_rate": 7.1920302252341925e-06, "loss": 0.7277, "step": 10424 }, { "epoch": 1.8747640025173065, "grad_norm": 1.457079529762268, "learning_rate": 7.191506741454507e-06, "loss": 0.7629, "step": 10425 }, { "epoch": 1.8749438101231681, "grad_norm": 1.53261137008667, "learning_rate": 7.190983227938951e-06, "loss": 0.7313, "step": 10426 }, { "epoch": 1.87512361772903, "grad_norm": 1.564475417137146, "learning_rate": 7.190459684694629e-06, "loss": 0.7206, "step": 10427 }, { "epoch": 1.8753034253348917, "grad_norm": 1.4801651239395142, "learning_rate": 7.189936111728641e-06, "loss": 0.7293, "step": 10428 }, { "epoch": 1.8754832329407534, "grad_norm": 1.568942904472351, "learning_rate": 7.189412509048095e-06, "loss": 0.742, "step": 10429 }, { "epoch": 1.8756630405466153, "grad_norm": 1.697901725769043, "learning_rate": 7.188888876660094e-06, "loss": 0.7271, "step": 10430 }, { "epoch": 1.8758428481524767, "grad_norm": 1.4327242374420166, "learning_rate": 7.188365214571742e-06, "loss": 0.7009, "step": 10431 }, { "epoch": 1.8760226557583386, "grad_norm": 1.4469623565673828, "learning_rate": 7.187841522790144e-06, "loss": 0.7057, "step": 10432 }, { "epoch": 1.8762024633642003, "grad_norm": 1.5340327024459839, "learning_rate": 7.18731780132241e-06, "loss": 0.7227, "step": 10433 }, { "epoch": 1.876382270970062, "grad_norm": 1.4873405694961548, "learning_rate": 7.186794050175643e-06, "loss": 0.7071, "step": 10434 }, { "epoch": 1.8765620785759238, "grad_norm": 1.4326608180999756, "learning_rate": 7.18627026935695e-06, "loss": 0.7057, "step": 10435 }, { "epoch": 1.8767418861817855, "grad_norm": 1.4187778234481812, "learning_rate": 7.185746458873439e-06, "loss": 0.7155, "step": 10436 }, { "epoch": 1.8769216937876472, "grad_norm": 1.5176116228103638, "learning_rate": 7.185222618732215e-06, "loss": 0.7368, "step": 10437 }, { "epoch": 1.877101501393509, "grad_norm": 1.4772615432739258, "learning_rate": 7.184698748940389e-06, "loss": 0.7392, "step": 10438 }, { "epoch": 1.8772813089993705, "grad_norm": 1.5501788854599, "learning_rate": 7.184174849505066e-06, "loss": 0.7039, "step": 10439 }, { "epoch": 1.8774611166052324, "grad_norm": 1.4823222160339355, "learning_rate": 7.183650920433356e-06, "loss": 0.75, "step": 10440 }, { "epoch": 1.877640924211094, "grad_norm": 1.5445256233215332, "learning_rate": 7.18312696173237e-06, "loss": 0.7549, "step": 10441 }, { "epoch": 1.8778207318169557, "grad_norm": 1.7443852424621582, "learning_rate": 7.182602973409214e-06, "loss": 0.6947, "step": 10442 }, { "epoch": 1.8780005394228176, "grad_norm": 1.445761799812317, "learning_rate": 7.1820789554710005e-06, "loss": 0.7648, "step": 10443 }, { "epoch": 1.8781803470286793, "grad_norm": 1.5191019773483276, "learning_rate": 7.181554907924837e-06, "loss": 0.7573, "step": 10444 }, { "epoch": 1.878360154634541, "grad_norm": 1.432591438293457, "learning_rate": 7.181030830777838e-06, "loss": 0.7248, "step": 10445 }, { "epoch": 1.8785399622404029, "grad_norm": 1.4839168787002563, "learning_rate": 7.180506724037111e-06, "loss": 0.723, "step": 10446 }, { "epoch": 1.8787197698462645, "grad_norm": 1.4214340448379517, "learning_rate": 7.179982587709771e-06, "loss": 0.715, "step": 10447 }, { "epoch": 1.8788995774521262, "grad_norm": 1.3938007354736328, "learning_rate": 7.1794584218029265e-06, "loss": 0.7158, "step": 10448 }, { "epoch": 1.879079385057988, "grad_norm": 1.3921129703521729, "learning_rate": 7.1789342263236905e-06, "loss": 0.604, "step": 10449 }, { "epoch": 1.8792591926638496, "grad_norm": 1.4488025903701782, "learning_rate": 7.178410001279177e-06, "loss": 0.7699, "step": 10450 }, { "epoch": 1.8794390002697114, "grad_norm": 1.581182599067688, "learning_rate": 7.177885746676497e-06, "loss": 0.7246, "step": 10451 }, { "epoch": 1.8796188078755731, "grad_norm": 1.1528571844100952, "learning_rate": 7.177361462522766e-06, "loss": 0.9232, "step": 10452 }, { "epoch": 1.8797986154814348, "grad_norm": 1.4941047430038452, "learning_rate": 7.176837148825097e-06, "loss": 0.733, "step": 10453 }, { "epoch": 1.8799784230872967, "grad_norm": 1.4845497608184814, "learning_rate": 7.176312805590603e-06, "loss": 0.761, "step": 10454 }, { "epoch": 1.8801582306931583, "grad_norm": 1.4989873170852661, "learning_rate": 7.1757884328264004e-06, "loss": 0.7048, "step": 10455 }, { "epoch": 1.88033803829902, "grad_norm": 1.4724103212356567, "learning_rate": 7.175264030539605e-06, "loss": 0.7349, "step": 10456 }, { "epoch": 1.880517845904882, "grad_norm": 1.4644418954849243, "learning_rate": 7.1747395987373294e-06, "loss": 0.9861, "step": 10457 }, { "epoch": 1.8806976535107434, "grad_norm": 1.534589409828186, "learning_rate": 7.174215137426692e-06, "loss": 0.7111, "step": 10458 }, { "epoch": 1.8808774611166053, "grad_norm": 1.1429284811019897, "learning_rate": 7.173690646614807e-06, "loss": 0.9008, "step": 10459 }, { "epoch": 1.881057268722467, "grad_norm": 1.5074951648712158, "learning_rate": 7.173166126308794e-06, "loss": 0.6977, "step": 10460 }, { "epoch": 1.8812370763283286, "grad_norm": 1.044882893562317, "learning_rate": 7.172641576515767e-06, "loss": 0.9413, "step": 10461 }, { "epoch": 1.8814168839341905, "grad_norm": 1.1199109554290771, "learning_rate": 7.1721169972428435e-06, "loss": 0.9168, "step": 10462 }, { "epoch": 1.8815966915400522, "grad_norm": 1.5798660516738892, "learning_rate": 7.171592388497144e-06, "loss": 0.7555, "step": 10463 }, { "epoch": 1.8817764991459138, "grad_norm": 1.469335675239563, "learning_rate": 7.171067750285784e-06, "loss": 0.7292, "step": 10464 }, { "epoch": 1.8819563067517757, "grad_norm": 1.4909937381744385, "learning_rate": 7.170543082615884e-06, "loss": 0.7317, "step": 10465 }, { "epoch": 1.8821361143576372, "grad_norm": 1.4813134670257568, "learning_rate": 7.170018385494562e-06, "loss": 0.7384, "step": 10466 }, { "epoch": 1.882315921963499, "grad_norm": 1.1343046426773071, "learning_rate": 7.169493658928939e-06, "loss": 0.9437, "step": 10467 }, { "epoch": 1.8824957295693607, "grad_norm": 1.69294011592865, "learning_rate": 7.1689689029261335e-06, "loss": 0.736, "step": 10468 }, { "epoch": 1.8826755371752224, "grad_norm": 1.516400694847107, "learning_rate": 7.168444117493265e-06, "loss": 0.7305, "step": 10469 }, { "epoch": 1.8828553447810843, "grad_norm": 1.4716973304748535, "learning_rate": 7.167919302637456e-06, "loss": 0.7244, "step": 10470 }, { "epoch": 1.883035152386946, "grad_norm": 1.423091173171997, "learning_rate": 7.167394458365826e-06, "loss": 0.755, "step": 10471 }, { "epoch": 1.8832149599928076, "grad_norm": 1.4632372856140137, "learning_rate": 7.166869584685498e-06, "loss": 0.7164, "step": 10472 }, { "epoch": 1.8833947675986695, "grad_norm": 1.1213656663894653, "learning_rate": 7.166344681603592e-06, "loss": 0.91, "step": 10473 }, { "epoch": 1.8835745752045312, "grad_norm": 1.4414684772491455, "learning_rate": 7.165819749127232e-06, "loss": 0.6997, "step": 10474 }, { "epoch": 1.8837543828103929, "grad_norm": 1.6910864114761353, "learning_rate": 7.1652947872635396e-06, "loss": 0.7497, "step": 10475 }, { "epoch": 1.8839341904162548, "grad_norm": 1.4674324989318848, "learning_rate": 7.164769796019637e-06, "loss": 0.6937, "step": 10476 }, { "epoch": 1.8841139980221162, "grad_norm": 1.5035679340362549, "learning_rate": 7.164244775402649e-06, "loss": 0.7255, "step": 10477 }, { "epoch": 1.884293805627978, "grad_norm": 1.6000463962554932, "learning_rate": 7.1637197254197014e-06, "loss": 0.811, "step": 10478 }, { "epoch": 1.8844736132338398, "grad_norm": 1.5040338039398193, "learning_rate": 7.163194646077913e-06, "loss": 0.7377, "step": 10479 }, { "epoch": 1.8846534208397014, "grad_norm": 1.5318959951400757, "learning_rate": 7.162669537384415e-06, "loss": 0.7381, "step": 10480 }, { "epoch": 1.8848332284455633, "grad_norm": 1.4809000492095947, "learning_rate": 7.162144399346327e-06, "loss": 0.7149, "step": 10481 }, { "epoch": 1.885013036051425, "grad_norm": 1.551202416419983, "learning_rate": 7.161619231970778e-06, "loss": 0.7485, "step": 10482 }, { "epoch": 1.8851928436572867, "grad_norm": 1.365957498550415, "learning_rate": 7.1610940352648905e-06, "loss": 0.8543, "step": 10483 }, { "epoch": 1.8853726512631486, "grad_norm": 1.453352928161621, "learning_rate": 7.160568809235794e-06, "loss": 0.7133, "step": 10484 }, { "epoch": 1.88555245886901, "grad_norm": 1.4524006843566895, "learning_rate": 7.160043553890612e-06, "loss": 0.7319, "step": 10485 }, { "epoch": 1.885732266474872, "grad_norm": 1.4784958362579346, "learning_rate": 7.159518269236475e-06, "loss": 0.703, "step": 10486 }, { "epoch": 1.8859120740807336, "grad_norm": 1.1572083234786987, "learning_rate": 7.158992955280507e-06, "loss": 0.9705, "step": 10487 }, { "epoch": 1.8860918816865953, "grad_norm": 1.4774869680404663, "learning_rate": 7.1584676120298376e-06, "loss": 0.7855, "step": 10488 }, { "epoch": 1.8862716892924571, "grad_norm": 1.4198558330535889, "learning_rate": 7.157942239491598e-06, "loss": 0.7763, "step": 10489 }, { "epoch": 1.8864514968983188, "grad_norm": 1.5386738777160645, "learning_rate": 7.15741683767291e-06, "loss": 0.7254, "step": 10490 }, { "epoch": 1.8866313045041805, "grad_norm": 1.5159653425216675, "learning_rate": 7.156891406580909e-06, "loss": 0.7336, "step": 10491 }, { "epoch": 1.8868111121100424, "grad_norm": 1.5696113109588623, "learning_rate": 7.156365946222721e-06, "loss": 0.7252, "step": 10492 }, { "epoch": 1.8869909197159038, "grad_norm": 1.516042709350586, "learning_rate": 7.1558404566054765e-06, "loss": 0.7415, "step": 10493 }, { "epoch": 1.8871707273217657, "grad_norm": 1.5417944192886353, "learning_rate": 7.155314937736305e-06, "loss": 0.7584, "step": 10494 }, { "epoch": 1.8873505349276274, "grad_norm": 1.5232412815093994, "learning_rate": 7.154789389622339e-06, "loss": 0.7508, "step": 10495 }, { "epoch": 1.887530342533489, "grad_norm": 1.12357497215271, "learning_rate": 7.154263812270707e-06, "loss": 0.9267, "step": 10496 }, { "epoch": 1.887710150139351, "grad_norm": 1.5879825353622437, "learning_rate": 7.153738205688543e-06, "loss": 0.7631, "step": 10497 }, { "epoch": 1.8878899577452126, "grad_norm": 1.5752782821655273, "learning_rate": 7.1532125698829765e-06, "loss": 0.7492, "step": 10498 }, { "epoch": 1.8880697653510743, "grad_norm": 1.5444060564041138, "learning_rate": 7.152686904861141e-06, "loss": 0.717, "step": 10499 }, { "epoch": 1.8882495729569362, "grad_norm": 1.5067079067230225, "learning_rate": 7.152161210630168e-06, "loss": 0.755, "step": 10500 }, { "epoch": 1.8882495729569362, "eval_loss": 0.7859435677528381, "eval_runtime": 148.5071, "eval_samples_per_second": 96.844, "eval_steps_per_second": 1.515, "step": 10500 }, { "epoch": 1.8884293805627979, "grad_norm": 1.562415361404419, "learning_rate": 7.1516354871971934e-06, "loss": 0.7563, "step": 10501 }, { "epoch": 1.8886091881686595, "grad_norm": 1.0649296045303345, "learning_rate": 7.151109734569348e-06, "loss": 0.9396, "step": 10502 }, { "epoch": 1.8887889957745214, "grad_norm": 1.419073224067688, "learning_rate": 7.150583952753765e-06, "loss": 0.7408, "step": 10503 }, { "epoch": 1.8889688033803829, "grad_norm": 1.7295849323272705, "learning_rate": 7.150058141757581e-06, "loss": 0.7801, "step": 10504 }, { "epoch": 1.8891486109862448, "grad_norm": 1.1096062660217285, "learning_rate": 7.149532301587928e-06, "loss": 0.9393, "step": 10505 }, { "epoch": 1.8893284185921064, "grad_norm": 1.1018122434616089, "learning_rate": 7.1490064322519424e-06, "loss": 0.8962, "step": 10506 }, { "epoch": 1.889508226197968, "grad_norm": 1.400427222251892, "learning_rate": 7.148480533756759e-06, "loss": 0.702, "step": 10507 }, { "epoch": 1.88968803380383, "grad_norm": 1.7601361274719238, "learning_rate": 7.147954606109515e-06, "loss": 0.7638, "step": 10508 }, { "epoch": 1.8898678414096917, "grad_norm": 1.4616262912750244, "learning_rate": 7.147428649317344e-06, "loss": 0.7129, "step": 10509 }, { "epoch": 1.8900476490155533, "grad_norm": 1.0932953357696533, "learning_rate": 7.146902663387384e-06, "loss": 0.9025, "step": 10510 }, { "epoch": 1.8902274566214152, "grad_norm": 1.4148998260498047, "learning_rate": 7.146376648326774e-06, "loss": 0.7439, "step": 10511 }, { "epoch": 1.8904072642272767, "grad_norm": 1.6340230703353882, "learning_rate": 7.145850604142647e-06, "loss": 0.677, "step": 10512 }, { "epoch": 1.8905870718331386, "grad_norm": 1.5662020444869995, "learning_rate": 7.145324530842144e-06, "loss": 0.7421, "step": 10513 }, { "epoch": 1.8907668794390002, "grad_norm": 1.5620944499969482, "learning_rate": 7.144798428432401e-06, "loss": 0.7465, "step": 10514 }, { "epoch": 1.890946687044862, "grad_norm": 1.4893829822540283, "learning_rate": 7.1442722969205595e-06, "loss": 0.7498, "step": 10515 }, { "epoch": 1.8911264946507238, "grad_norm": 1.510504126548767, "learning_rate": 7.143746136313754e-06, "loss": 0.7436, "step": 10516 }, { "epoch": 1.8913063022565855, "grad_norm": 1.3449937105178833, "learning_rate": 7.143219946619128e-06, "loss": 0.9195, "step": 10517 }, { "epoch": 1.8914861098624471, "grad_norm": 1.5610920190811157, "learning_rate": 7.142693727843819e-06, "loss": 0.8041, "step": 10518 }, { "epoch": 1.891665917468309, "grad_norm": 1.44779634475708, "learning_rate": 7.142167479994969e-06, "loss": 0.7527, "step": 10519 }, { "epoch": 1.8918457250741705, "grad_norm": 1.4541370868682861, "learning_rate": 7.141641203079715e-06, "loss": 0.7034, "step": 10520 }, { "epoch": 1.8920255326800324, "grad_norm": 1.4591065645217896, "learning_rate": 7.141114897105202e-06, "loss": 0.7314, "step": 10521 }, { "epoch": 1.892205340285894, "grad_norm": 1.4805001020431519, "learning_rate": 7.140588562078566e-06, "loss": 0.7657, "step": 10522 }, { "epoch": 1.8923851478917557, "grad_norm": 1.4662848711013794, "learning_rate": 7.140062198006955e-06, "loss": 0.7333, "step": 10523 }, { "epoch": 1.8925649554976176, "grad_norm": 1.4937384128570557, "learning_rate": 7.1395358048975075e-06, "loss": 0.6842, "step": 10524 }, { "epoch": 1.8927447631034793, "grad_norm": 1.5742664337158203, "learning_rate": 7.139009382757367e-06, "loss": 0.7977, "step": 10525 }, { "epoch": 1.892924570709341, "grad_norm": 1.4479265213012695, "learning_rate": 7.138482931593675e-06, "loss": 0.7399, "step": 10526 }, { "epoch": 1.8931043783152028, "grad_norm": 1.5146714448928833, "learning_rate": 7.137956451413575e-06, "loss": 0.7044, "step": 10527 }, { "epoch": 1.8932841859210645, "grad_norm": 1.4607248306274414, "learning_rate": 7.137429942224212e-06, "loss": 0.653, "step": 10528 }, { "epoch": 1.8934639935269262, "grad_norm": 1.5764104127883911, "learning_rate": 7.136903404032729e-06, "loss": 0.7135, "step": 10529 }, { "epoch": 1.893643801132788, "grad_norm": 1.5534747838974, "learning_rate": 7.136376836846271e-06, "loss": 0.7412, "step": 10530 }, { "epoch": 1.8938236087386495, "grad_norm": 1.2792878150939941, "learning_rate": 7.135850240671982e-06, "loss": 0.9352, "step": 10531 }, { "epoch": 1.8940034163445114, "grad_norm": 1.5369253158569336, "learning_rate": 7.135323615517007e-06, "loss": 0.7403, "step": 10532 }, { "epoch": 1.894183223950373, "grad_norm": 1.0494771003723145, "learning_rate": 7.134796961388495e-06, "loss": 0.9445, "step": 10533 }, { "epoch": 1.8943630315562348, "grad_norm": 1.8168433904647827, "learning_rate": 7.134270278293587e-06, "loss": 0.794, "step": 10534 }, { "epoch": 1.8945428391620966, "grad_norm": 1.1024038791656494, "learning_rate": 7.133743566239433e-06, "loss": 0.8868, "step": 10535 }, { "epoch": 1.8947226467679583, "grad_norm": 1.7269845008850098, "learning_rate": 7.133216825233178e-06, "loss": 0.7116, "step": 10536 }, { "epoch": 1.89490245437382, "grad_norm": 1.48149836063385, "learning_rate": 7.132690055281971e-06, "loss": 0.7377, "step": 10537 }, { "epoch": 1.8950822619796819, "grad_norm": 1.482701063156128, "learning_rate": 7.132163256392957e-06, "loss": 0.6951, "step": 10538 }, { "epoch": 1.8952620695855433, "grad_norm": 1.4031262397766113, "learning_rate": 7.1316364285732855e-06, "loss": 0.6454, "step": 10539 }, { "epoch": 1.8954418771914052, "grad_norm": 1.419724941253662, "learning_rate": 7.131109571830105e-06, "loss": 0.7374, "step": 10540 }, { "epoch": 1.895621684797267, "grad_norm": 1.0504436492919922, "learning_rate": 7.130582686170563e-06, "loss": 0.9476, "step": 10541 }, { "epoch": 1.8958014924031286, "grad_norm": 1.1661001443862915, "learning_rate": 7.1300557716018105e-06, "loss": 0.9102, "step": 10542 }, { "epoch": 1.8959813000089905, "grad_norm": 1.5318686962127686, "learning_rate": 7.129528828130996e-06, "loss": 0.7461, "step": 10543 }, { "epoch": 1.8961611076148521, "grad_norm": 1.453918695449829, "learning_rate": 7.129001855765269e-06, "loss": 0.6997, "step": 10544 }, { "epoch": 1.8963409152207138, "grad_norm": 1.5939147472381592, "learning_rate": 7.128474854511781e-06, "loss": 0.7021, "step": 10545 }, { "epoch": 1.8965207228265757, "grad_norm": 1.3171792030334473, "learning_rate": 7.127947824377681e-06, "loss": 0.911, "step": 10546 }, { "epoch": 1.8967005304324371, "grad_norm": 1.3713454008102417, "learning_rate": 7.127420765370123e-06, "loss": 0.7264, "step": 10547 }, { "epoch": 1.896880338038299, "grad_norm": 1.5302391052246094, "learning_rate": 7.1268936774962564e-06, "loss": 0.7126, "step": 10548 }, { "epoch": 1.8970601456441607, "grad_norm": 1.5245606899261475, "learning_rate": 7.1263665607632325e-06, "loss": 0.7167, "step": 10549 }, { "epoch": 1.8972399532500224, "grad_norm": 1.4903998374938965, "learning_rate": 7.125839415178204e-06, "loss": 0.8005, "step": 10550 }, { "epoch": 1.8974197608558843, "grad_norm": 1.306392788887024, "learning_rate": 7.125312240748325e-06, "loss": 0.9257, "step": 10551 }, { "epoch": 1.897599568461746, "grad_norm": 1.4284732341766357, "learning_rate": 7.124785037480748e-06, "loss": 0.6623, "step": 10552 }, { "epoch": 1.8977793760676076, "grad_norm": 1.6176297664642334, "learning_rate": 7.124257805382624e-06, "loss": 0.7253, "step": 10553 }, { "epoch": 1.8979591836734695, "grad_norm": 1.5199605226516724, "learning_rate": 7.12373054446111e-06, "loss": 0.7215, "step": 10554 }, { "epoch": 1.8981389912793312, "grad_norm": 1.422135591506958, "learning_rate": 7.12320325472336e-06, "loss": 0.7402, "step": 10555 }, { "epoch": 1.8983187988851928, "grad_norm": 1.451433539390564, "learning_rate": 7.122675936176526e-06, "loss": 0.6942, "step": 10556 }, { "epoch": 1.8984986064910547, "grad_norm": 1.6145650148391724, "learning_rate": 7.122148588827768e-06, "loss": 0.719, "step": 10557 }, { "epoch": 1.8986784140969162, "grad_norm": 1.4604640007019043, "learning_rate": 7.121621212684236e-06, "loss": 0.7218, "step": 10558 }, { "epoch": 1.898858221702778, "grad_norm": 1.5001083612442017, "learning_rate": 7.121093807753088e-06, "loss": 0.7072, "step": 10559 }, { "epoch": 1.8990380293086397, "grad_norm": 1.483798623085022, "learning_rate": 7.1205663740414795e-06, "loss": 0.6968, "step": 10560 }, { "epoch": 1.8992178369145014, "grad_norm": 1.4972566366195679, "learning_rate": 7.120038911556569e-06, "loss": 0.7912, "step": 10561 }, { "epoch": 1.8993976445203633, "grad_norm": 1.478919506072998, "learning_rate": 7.119511420305512e-06, "loss": 0.7178, "step": 10562 }, { "epoch": 1.899577452126225, "grad_norm": 1.117372989654541, "learning_rate": 7.118983900295465e-06, "loss": 0.9017, "step": 10563 }, { "epoch": 1.8997572597320866, "grad_norm": 1.5153898000717163, "learning_rate": 7.118456351533587e-06, "loss": 0.7748, "step": 10564 }, { "epoch": 1.8999370673379485, "grad_norm": 1.488427758216858, "learning_rate": 7.117928774027039e-06, "loss": 0.7324, "step": 10565 }, { "epoch": 1.90011687494381, "grad_norm": 1.489262342453003, "learning_rate": 7.117401167782974e-06, "loss": 0.7583, "step": 10566 }, { "epoch": 1.9002966825496719, "grad_norm": 1.4558464288711548, "learning_rate": 7.116873532808554e-06, "loss": 0.7049, "step": 10567 }, { "epoch": 1.9004764901555335, "grad_norm": 1.476671576499939, "learning_rate": 7.116345869110937e-06, "loss": 0.6404, "step": 10568 }, { "epoch": 1.9006562977613952, "grad_norm": 1.5453733205795288, "learning_rate": 7.115818176697285e-06, "loss": 0.8098, "step": 10569 }, { "epoch": 1.9008361053672571, "grad_norm": 1.4496920108795166, "learning_rate": 7.115290455574755e-06, "loss": 0.7551, "step": 10570 }, { "epoch": 1.9010159129731188, "grad_norm": 1.4517971277236938, "learning_rate": 7.11476270575051e-06, "loss": 0.7252, "step": 10571 }, { "epoch": 1.9011957205789805, "grad_norm": 1.5722277164459229, "learning_rate": 7.114234927231709e-06, "loss": 0.7791, "step": 10572 }, { "epoch": 1.9013755281848423, "grad_norm": 1.6100339889526367, "learning_rate": 7.113707120025516e-06, "loss": 0.786, "step": 10573 }, { "epoch": 1.9015553357907038, "grad_norm": 1.6009727716445923, "learning_rate": 7.113179284139089e-06, "loss": 0.7195, "step": 10574 }, { "epoch": 1.9017351433965657, "grad_norm": 1.565840721130371, "learning_rate": 7.112651419579592e-06, "loss": 0.6752, "step": 10575 }, { "epoch": 1.9019149510024274, "grad_norm": 1.5110944509506226, "learning_rate": 7.112123526354188e-06, "loss": 0.7184, "step": 10576 }, { "epoch": 1.902094758608289, "grad_norm": 1.022121787071228, "learning_rate": 7.111595604470039e-06, "loss": 0.8834, "step": 10577 }, { "epoch": 1.902274566214151, "grad_norm": 1.4721471071243286, "learning_rate": 7.111067653934309e-06, "loss": 0.7712, "step": 10578 }, { "epoch": 1.9024543738200126, "grad_norm": 1.3477470874786377, "learning_rate": 7.11053967475416e-06, "loss": 0.714, "step": 10579 }, { "epoch": 1.9026341814258743, "grad_norm": 1.387147307395935, "learning_rate": 7.110011666936758e-06, "loss": 0.7339, "step": 10580 }, { "epoch": 1.9028139890317362, "grad_norm": 1.4330761432647705, "learning_rate": 7.109483630489265e-06, "loss": 0.7229, "step": 10581 }, { "epoch": 1.9029937966375978, "grad_norm": 1.201435923576355, "learning_rate": 7.108955565418848e-06, "loss": 0.9097, "step": 10582 }, { "epoch": 1.9031736042434595, "grad_norm": 1.458938479423523, "learning_rate": 7.1084274717326714e-06, "loss": 0.773, "step": 10583 }, { "epoch": 1.9033534118493214, "grad_norm": 1.4503573179244995, "learning_rate": 7.1078993494379e-06, "loss": 0.6346, "step": 10584 }, { "epoch": 1.9035332194551828, "grad_norm": 1.1175469160079956, "learning_rate": 7.1073711985416994e-06, "loss": 0.9668, "step": 10585 }, { "epoch": 1.9037130270610447, "grad_norm": 1.2049205303192139, "learning_rate": 7.106843019051237e-06, "loss": 0.9214, "step": 10586 }, { "epoch": 1.9038928346669064, "grad_norm": 1.3868571519851685, "learning_rate": 7.1063148109736815e-06, "loss": 0.6986, "step": 10587 }, { "epoch": 1.904072642272768, "grad_norm": 1.1243833303451538, "learning_rate": 7.105786574316196e-06, "loss": 0.8972, "step": 10588 }, { "epoch": 1.90425244987863, "grad_norm": 1.1539963483810425, "learning_rate": 7.105258309085951e-06, "loss": 0.9232, "step": 10589 }, { "epoch": 1.9044322574844916, "grad_norm": 1.5148483514785767, "learning_rate": 7.104730015290111e-06, "loss": 0.6969, "step": 10590 }, { "epoch": 1.9046120650903533, "grad_norm": 1.4235448837280273, "learning_rate": 7.104201692935848e-06, "loss": 0.7303, "step": 10591 }, { "epoch": 1.9047918726962152, "grad_norm": 1.523414134979248, "learning_rate": 7.103673342030328e-06, "loss": 0.8198, "step": 10592 }, { "epoch": 1.9049716803020766, "grad_norm": 1.4348909854888916, "learning_rate": 7.103144962580723e-06, "loss": 0.698, "step": 10593 }, { "epoch": 1.9051514879079385, "grad_norm": 1.4593071937561035, "learning_rate": 7.1026165545942e-06, "loss": 0.6896, "step": 10594 }, { "epoch": 1.9053312955138002, "grad_norm": 1.5538060665130615, "learning_rate": 7.102088118077927e-06, "loss": 0.7115, "step": 10595 }, { "epoch": 1.9055111031196619, "grad_norm": 1.5390479564666748, "learning_rate": 7.101559653039079e-06, "loss": 0.7504, "step": 10596 }, { "epoch": 1.9056909107255238, "grad_norm": 1.4849309921264648, "learning_rate": 7.101031159484822e-06, "loss": 0.748, "step": 10597 }, { "epoch": 1.9058707183313854, "grad_norm": 1.4927356243133545, "learning_rate": 7.10050263742233e-06, "loss": 0.7216, "step": 10598 }, { "epoch": 1.906050525937247, "grad_norm": 1.4866585731506348, "learning_rate": 7.099974086858774e-06, "loss": 0.7467, "step": 10599 }, { "epoch": 1.906230333543109, "grad_norm": 1.5562536716461182, "learning_rate": 7.099445507801324e-06, "loss": 0.7416, "step": 10600 }, { "epoch": 1.9064101411489704, "grad_norm": 1.2075833082199097, "learning_rate": 7.098916900257153e-06, "loss": 0.9196, "step": 10601 }, { "epoch": 1.9065899487548323, "grad_norm": 1.335059642791748, "learning_rate": 7.098388264233434e-06, "loss": 0.6751, "step": 10602 }, { "epoch": 1.906769756360694, "grad_norm": 1.549225926399231, "learning_rate": 7.097859599737341e-06, "loss": 0.7449, "step": 10603 }, { "epoch": 1.9069495639665557, "grad_norm": 1.2464463710784912, "learning_rate": 7.0973309067760455e-06, "loss": 0.9218, "step": 10604 }, { "epoch": 1.9071293715724176, "grad_norm": 1.3880364894866943, "learning_rate": 7.096802185356721e-06, "loss": 0.7033, "step": 10605 }, { "epoch": 1.9073091791782792, "grad_norm": 1.4507607221603394, "learning_rate": 7.096273435486541e-06, "loss": 0.6934, "step": 10606 }, { "epoch": 1.907488986784141, "grad_norm": 1.4929735660552979, "learning_rate": 7.095744657172683e-06, "loss": 0.7037, "step": 10607 }, { "epoch": 1.9076687943900028, "grad_norm": 1.5700833797454834, "learning_rate": 7.095215850422318e-06, "loss": 0.7633, "step": 10608 }, { "epoch": 1.9078486019958645, "grad_norm": 1.4247828722000122, "learning_rate": 7.094687015242624e-06, "loss": 0.7239, "step": 10609 }, { "epoch": 1.9080284096017261, "grad_norm": 1.415088415145874, "learning_rate": 7.094158151640776e-06, "loss": 0.743, "step": 10610 }, { "epoch": 1.908208217207588, "grad_norm": 1.627402901649475, "learning_rate": 7.0936292596239495e-06, "loss": 0.7745, "step": 10611 }, { "epoch": 1.9083880248134495, "grad_norm": 1.4510449171066284, "learning_rate": 7.093100339199322e-06, "loss": 0.7085, "step": 10612 }, { "epoch": 1.9085678324193114, "grad_norm": 1.48283851146698, "learning_rate": 7.092571390374068e-06, "loss": 0.701, "step": 10613 }, { "epoch": 1.908747640025173, "grad_norm": 1.5755600929260254, "learning_rate": 7.092042413155367e-06, "loss": 0.7966, "step": 10614 }, { "epoch": 1.9089274476310347, "grad_norm": 1.0437755584716797, "learning_rate": 7.091513407550394e-06, "loss": 0.8857, "step": 10615 }, { "epoch": 1.9091072552368966, "grad_norm": 1.1172581911087036, "learning_rate": 7.090984373566331e-06, "loss": 0.8977, "step": 10616 }, { "epoch": 1.9092870628427583, "grad_norm": 1.2109854221343994, "learning_rate": 7.090455311210352e-06, "loss": 0.898, "step": 10617 }, { "epoch": 1.90946687044862, "grad_norm": 1.4654593467712402, "learning_rate": 7.089926220489637e-06, "loss": 0.7077, "step": 10618 }, { "epoch": 1.9096466780544818, "grad_norm": 2.832794666290283, "learning_rate": 7.089397101411365e-06, "loss": 0.7591, "step": 10619 }, { "epoch": 1.9098264856603433, "grad_norm": 1.5437891483306885, "learning_rate": 7.088867953982718e-06, "loss": 0.7325, "step": 10620 }, { "epoch": 1.9100062932662052, "grad_norm": 1.4824333190917969, "learning_rate": 7.088338778210872e-06, "loss": 0.7501, "step": 10621 }, { "epoch": 1.9101861008720669, "grad_norm": 1.4708943367004395, "learning_rate": 7.0878095741030106e-06, "loss": 0.7332, "step": 10622 }, { "epoch": 1.9103659084779285, "grad_norm": 1.5382884740829468, "learning_rate": 7.0872803416663105e-06, "loss": 0.7499, "step": 10623 }, { "epoch": 1.9105457160837904, "grad_norm": 1.368862509727478, "learning_rate": 7.086751080907957e-06, "loss": 0.6685, "step": 10624 }, { "epoch": 1.910725523689652, "grad_norm": 1.340768575668335, "learning_rate": 7.086221791835129e-06, "loss": 0.8893, "step": 10625 }, { "epoch": 1.9109053312955138, "grad_norm": 1.5520654916763306, "learning_rate": 7.0856924744550085e-06, "loss": 0.6947, "step": 10626 }, { "epoch": 1.9110851389013757, "grad_norm": 1.1040889024734497, "learning_rate": 7.085163128774777e-06, "loss": 0.9107, "step": 10627 }, { "epoch": 1.911264946507237, "grad_norm": 1.5796356201171875, "learning_rate": 7.0846337548016194e-06, "loss": 0.7541, "step": 10628 }, { "epoch": 1.911444754113099, "grad_norm": 1.4771254062652588, "learning_rate": 7.084104352542715e-06, "loss": 0.7677, "step": 10629 }, { "epoch": 1.9116245617189607, "grad_norm": 1.536594271659851, "learning_rate": 7.0835749220052505e-06, "loss": 0.7092, "step": 10630 }, { "epoch": 1.9118043693248223, "grad_norm": 1.1572542190551758, "learning_rate": 7.0830454631964075e-06, "loss": 0.8813, "step": 10631 }, { "epoch": 1.9119841769306842, "grad_norm": 1.929943323135376, "learning_rate": 7.082515976123372e-06, "loss": 0.7058, "step": 10632 }, { "epoch": 1.912163984536546, "grad_norm": 1.41730535030365, "learning_rate": 7.0819864607933265e-06, "loss": 0.5954, "step": 10633 }, { "epoch": 1.9123437921424076, "grad_norm": 1.1351196765899658, "learning_rate": 7.0814569172134576e-06, "loss": 0.9281, "step": 10634 }, { "epoch": 1.9125235997482695, "grad_norm": 1.5670619010925293, "learning_rate": 7.080927345390948e-06, "loss": 0.6687, "step": 10635 }, { "epoch": 1.912703407354131, "grad_norm": 1.4636602401733398, "learning_rate": 7.080397745332986e-06, "loss": 0.7478, "step": 10636 }, { "epoch": 1.9128832149599928, "grad_norm": 1.415093183517456, "learning_rate": 7.079868117046755e-06, "loss": 0.7118, "step": 10637 }, { "epoch": 1.9130630225658547, "grad_norm": 1.488422155380249, "learning_rate": 7.079338460539444e-06, "loss": 0.7972, "step": 10638 }, { "epoch": 1.9132428301717161, "grad_norm": 1.4519708156585693, "learning_rate": 7.078808775818238e-06, "loss": 0.7348, "step": 10639 }, { "epoch": 1.913422637777578, "grad_norm": 1.581010341644287, "learning_rate": 7.078279062890324e-06, "loss": 0.756, "step": 10640 }, { "epoch": 1.9136024453834397, "grad_norm": 1.4074881076812744, "learning_rate": 7.07774932176289e-06, "loss": 0.668, "step": 10641 }, { "epoch": 1.9137822529893014, "grad_norm": 1.4753429889678955, "learning_rate": 7.0772195524431265e-06, "loss": 0.7637, "step": 10642 }, { "epoch": 1.9139620605951633, "grad_norm": 1.6643047332763672, "learning_rate": 7.076689754938216e-06, "loss": 0.7466, "step": 10643 }, { "epoch": 1.914141868201025, "grad_norm": 1.432794213294983, "learning_rate": 7.076159929255353e-06, "loss": 0.7153, "step": 10644 }, { "epoch": 1.9143216758068866, "grad_norm": 1.348471999168396, "learning_rate": 7.075630075401723e-06, "loss": 0.6708, "step": 10645 }, { "epoch": 1.9145014834127485, "grad_norm": 1.3263843059539795, "learning_rate": 7.075100193384516e-06, "loss": 0.7485, "step": 10646 }, { "epoch": 1.91468129101861, "grad_norm": 1.6374056339263916, "learning_rate": 7.074570283210922e-06, "loss": 0.7593, "step": 10647 }, { "epoch": 1.9148610986244718, "grad_norm": 1.583711862564087, "learning_rate": 7.074040344888132e-06, "loss": 0.6947, "step": 10648 }, { "epoch": 1.9150409062303335, "grad_norm": 1.294852375984192, "learning_rate": 7.073510378423336e-06, "loss": 0.9227, "step": 10649 }, { "epoch": 1.9152207138361952, "grad_norm": 1.462630271911621, "learning_rate": 7.0729803838237255e-06, "loss": 0.7359, "step": 10650 }, { "epoch": 1.915400521442057, "grad_norm": 1.5247102975845337, "learning_rate": 7.07245036109649e-06, "loss": 0.7314, "step": 10651 }, { "epoch": 1.9155803290479188, "grad_norm": 1.4889510869979858, "learning_rate": 7.071920310248822e-06, "loss": 0.7482, "step": 10652 }, { "epoch": 1.9157601366537804, "grad_norm": 1.5428979396820068, "learning_rate": 7.0713902312879145e-06, "loss": 0.7433, "step": 10653 }, { "epoch": 1.9159399442596423, "grad_norm": 1.5067795515060425, "learning_rate": 7.07086012422096e-06, "loss": 0.699, "step": 10654 }, { "epoch": 1.9161197518655038, "grad_norm": 1.5729496479034424, "learning_rate": 7.0703299890551505e-06, "loss": 0.7071, "step": 10655 }, { "epoch": 1.9162995594713657, "grad_norm": 1.6838946342468262, "learning_rate": 7.069799825797681e-06, "loss": 0.7434, "step": 10656 }, { "epoch": 1.9164793670772273, "grad_norm": 1.5009101629257202, "learning_rate": 7.069269634455742e-06, "loss": 0.7011, "step": 10657 }, { "epoch": 1.916659174683089, "grad_norm": 1.4665579795837402, "learning_rate": 7.068739415036529e-06, "loss": 0.6825, "step": 10658 }, { "epoch": 1.9168389822889509, "grad_norm": 1.5300313234329224, "learning_rate": 7.068209167547238e-06, "loss": 0.6828, "step": 10659 }, { "epoch": 1.9170187898948126, "grad_norm": 1.4192674160003662, "learning_rate": 7.067678891995062e-06, "loss": 0.6826, "step": 10660 }, { "epoch": 1.9171985975006742, "grad_norm": 1.3884623050689697, "learning_rate": 7.067148588387196e-06, "loss": 0.6922, "step": 10661 }, { "epoch": 1.9173784051065361, "grad_norm": 0.9949527978897095, "learning_rate": 7.0666182567308365e-06, "loss": 0.9516, "step": 10662 }, { "epoch": 1.9175582127123976, "grad_norm": 1.413671851158142, "learning_rate": 7.066087897033178e-06, "loss": 0.7894, "step": 10663 }, { "epoch": 1.9177380203182595, "grad_norm": 1.4752860069274902, "learning_rate": 7.065557509301418e-06, "loss": 0.7523, "step": 10664 }, { "epoch": 1.9179178279241214, "grad_norm": 1.5241433382034302, "learning_rate": 7.065027093542753e-06, "loss": 0.7049, "step": 10665 }, { "epoch": 1.9180976355299828, "grad_norm": 1.4907300472259521, "learning_rate": 7.064496649764381e-06, "loss": 0.7303, "step": 10666 }, { "epoch": 1.9182774431358447, "grad_norm": 1.442338466644287, "learning_rate": 7.063966177973498e-06, "loss": 0.7367, "step": 10667 }, { "epoch": 1.9184572507417064, "grad_norm": 1.5350431203842163, "learning_rate": 7.063435678177302e-06, "loss": 0.741, "step": 10668 }, { "epoch": 1.918637058347568, "grad_norm": 1.474593997001648, "learning_rate": 7.06290515038299e-06, "loss": 0.7678, "step": 10669 }, { "epoch": 1.91881686595343, "grad_norm": 1.0824862718582153, "learning_rate": 7.062374594597765e-06, "loss": 0.9087, "step": 10670 }, { "epoch": 1.9189966735592916, "grad_norm": 1.688550591468811, "learning_rate": 7.06184401082882e-06, "loss": 0.7384, "step": 10671 }, { "epoch": 1.9191764811651533, "grad_norm": 1.5961089134216309, "learning_rate": 7.061313399083358e-06, "loss": 0.717, "step": 10672 }, { "epoch": 1.9193562887710152, "grad_norm": 1.5605616569519043, "learning_rate": 7.060782759368579e-06, "loss": 0.6827, "step": 10673 }, { "epoch": 1.9195360963768766, "grad_norm": 1.1879404783248901, "learning_rate": 7.060252091691679e-06, "loss": 0.8858, "step": 10674 }, { "epoch": 1.9197159039827385, "grad_norm": 1.5006824731826782, "learning_rate": 7.0597213960598645e-06, "loss": 0.7864, "step": 10675 }, { "epoch": 1.9198957115886002, "grad_norm": 0.9802802801132202, "learning_rate": 7.059190672480333e-06, "loss": 0.9667, "step": 10676 }, { "epoch": 1.9200755191944618, "grad_norm": 1.619339108467102, "learning_rate": 7.058659920960285e-06, "loss": 0.7374, "step": 10677 }, { "epoch": 1.9202553268003237, "grad_norm": 1.435272455215454, "learning_rate": 7.058129141506923e-06, "loss": 0.6881, "step": 10678 }, { "epoch": 1.9204351344061854, "grad_norm": 1.6284290552139282, "learning_rate": 7.057598334127449e-06, "loss": 0.6936, "step": 10679 }, { "epoch": 1.920614942012047, "grad_norm": 1.5184470415115356, "learning_rate": 7.057067498829067e-06, "loss": 0.7056, "step": 10680 }, { "epoch": 1.920794749617909, "grad_norm": 1.5214266777038574, "learning_rate": 7.056536635618977e-06, "loss": 0.7598, "step": 10681 }, { "epoch": 1.9209745572237704, "grad_norm": 1.5596725940704346, "learning_rate": 7.056005744504384e-06, "loss": 0.6893, "step": 10682 }, { "epoch": 1.9211543648296323, "grad_norm": 1.514347791671753, "learning_rate": 7.05547482549249e-06, "loss": 0.7318, "step": 10683 }, { "epoch": 1.921334172435494, "grad_norm": 1.5458195209503174, "learning_rate": 7.054943878590499e-06, "loss": 0.7756, "step": 10684 }, { "epoch": 1.9215139800413557, "grad_norm": 1.4623141288757324, "learning_rate": 7.054412903805616e-06, "loss": 0.7696, "step": 10685 }, { "epoch": 1.9216937876472175, "grad_norm": 1.5640571117401123, "learning_rate": 7.053881901145047e-06, "loss": 0.7014, "step": 10686 }, { "epoch": 1.9218735952530792, "grad_norm": 1.431766152381897, "learning_rate": 7.053350870615996e-06, "loss": 0.681, "step": 10687 }, { "epoch": 1.9220534028589409, "grad_norm": 1.420756459236145, "learning_rate": 7.0528198122256664e-06, "loss": 0.7461, "step": 10688 }, { "epoch": 1.9222332104648028, "grad_norm": 1.4237226247787476, "learning_rate": 7.052288725981266e-06, "loss": 0.7781, "step": 10689 }, { "epoch": 1.9224130180706642, "grad_norm": 1.1160584688186646, "learning_rate": 7.051757611890001e-06, "loss": 0.9272, "step": 10690 }, { "epoch": 1.9225928256765261, "grad_norm": 1.5875585079193115, "learning_rate": 7.051226469959077e-06, "loss": 0.7819, "step": 10691 }, { "epoch": 1.922772633282388, "grad_norm": 1.5773290395736694, "learning_rate": 7.050695300195702e-06, "loss": 0.7366, "step": 10692 }, { "epoch": 1.9229524408882495, "grad_norm": 1.7222185134887695, "learning_rate": 7.050164102607081e-06, "loss": 0.7375, "step": 10693 }, { "epoch": 1.9231322484941114, "grad_norm": 1.426190972328186, "learning_rate": 7.049632877200424e-06, "loss": 0.67, "step": 10694 }, { "epoch": 1.923312056099973, "grad_norm": 1.7548426389694214, "learning_rate": 7.049101623982938e-06, "loss": 0.7089, "step": 10695 }, { "epoch": 1.9234918637058347, "grad_norm": 1.5790810585021973, "learning_rate": 7.048570342961832e-06, "loss": 0.7757, "step": 10696 }, { "epoch": 1.9236716713116966, "grad_norm": 1.4048223495483398, "learning_rate": 7.048039034144314e-06, "loss": 0.7476, "step": 10697 }, { "epoch": 1.9238514789175583, "grad_norm": 1.5190387964248657, "learning_rate": 7.047507697537594e-06, "loss": 0.7022, "step": 10698 }, { "epoch": 1.92403128652342, "grad_norm": 1.0212234258651733, "learning_rate": 7.046976333148881e-06, "loss": 0.9141, "step": 10699 }, { "epoch": 1.9242110941292818, "grad_norm": 1.4919462203979492, "learning_rate": 7.046444940985386e-06, "loss": 0.6904, "step": 10700 }, { "epoch": 1.9243909017351433, "grad_norm": 1.505125641822815, "learning_rate": 7.045913521054318e-06, "loss": 0.7934, "step": 10701 }, { "epoch": 1.9245707093410052, "grad_norm": 1.4559694528579712, "learning_rate": 7.0453820733628876e-06, "loss": 0.7269, "step": 10702 }, { "epoch": 1.9247505169468668, "grad_norm": 1.7348607778549194, "learning_rate": 7.044850597918307e-06, "loss": 0.6155, "step": 10703 }, { "epoch": 1.9249303245527285, "grad_norm": 1.5147467851638794, "learning_rate": 7.0443190947277864e-06, "loss": 0.7361, "step": 10704 }, { "epoch": 1.9251101321585904, "grad_norm": 1.3611235618591309, "learning_rate": 7.043787563798538e-06, "loss": 0.664, "step": 10705 }, { "epoch": 1.925289939764452, "grad_norm": 1.6144077777862549, "learning_rate": 7.043256005137773e-06, "loss": 0.77, "step": 10706 }, { "epoch": 1.9254697473703137, "grad_norm": 1.1719447374343872, "learning_rate": 7.0427244187527054e-06, "loss": 0.8999, "step": 10707 }, { "epoch": 1.9256495549761756, "grad_norm": 1.4209299087524414, "learning_rate": 7.042192804650549e-06, "loss": 0.6971, "step": 10708 }, { "epoch": 1.925829362582037, "grad_norm": 1.4896992444992065, "learning_rate": 7.041661162838515e-06, "loss": 0.717, "step": 10709 }, { "epoch": 1.926009170187899, "grad_norm": 1.04396653175354, "learning_rate": 7.041129493323819e-06, "loss": 0.9046, "step": 10710 }, { "epoch": 1.9261889777937606, "grad_norm": 1.5199532508850098, "learning_rate": 7.040597796113673e-06, "loss": 0.7494, "step": 10711 }, { "epoch": 1.9263687853996223, "grad_norm": 1.4945123195648193, "learning_rate": 7.040066071215294e-06, "loss": 0.707, "step": 10712 }, { "epoch": 1.9265485930054842, "grad_norm": 1.4547810554504395, "learning_rate": 7.039534318635893e-06, "loss": 0.7423, "step": 10713 }, { "epoch": 1.9267284006113459, "grad_norm": 1.9451621770858765, "learning_rate": 7.039002538382689e-06, "loss": 0.7743, "step": 10714 }, { "epoch": 1.9269082082172075, "grad_norm": 1.4682475328445435, "learning_rate": 7.038470730462895e-06, "loss": 0.7285, "step": 10715 }, { "epoch": 1.9270880158230694, "grad_norm": 1.540612816810608, "learning_rate": 7.037938894883729e-06, "loss": 0.7055, "step": 10716 }, { "epoch": 1.9272678234289309, "grad_norm": 1.394464135169983, "learning_rate": 7.037407031652405e-06, "loss": 0.7251, "step": 10717 }, { "epoch": 1.9274476310347928, "grad_norm": 1.4909276962280273, "learning_rate": 7.036875140776142e-06, "loss": 0.7185, "step": 10718 }, { "epoch": 1.9276274386406544, "grad_norm": 1.4458516836166382, "learning_rate": 7.036343222262155e-06, "loss": 0.7337, "step": 10719 }, { "epoch": 1.9278072462465161, "grad_norm": 1.5510857105255127, "learning_rate": 7.035811276117663e-06, "loss": 0.7736, "step": 10720 }, { "epoch": 1.927987053852378, "grad_norm": 1.5165451765060425, "learning_rate": 7.035279302349883e-06, "loss": 0.7774, "step": 10721 }, { "epoch": 1.9281668614582397, "grad_norm": 1.4660449028015137, "learning_rate": 7.034747300966035e-06, "loss": 0.7179, "step": 10722 }, { "epoch": 1.9283466690641013, "grad_norm": 1.650861382484436, "learning_rate": 7.034215271973334e-06, "loss": 0.7232, "step": 10723 }, { "epoch": 1.9285264766699632, "grad_norm": 1.4716827869415283, "learning_rate": 7.033683215379002e-06, "loss": 0.7473, "step": 10724 }, { "epoch": 1.928706284275825, "grad_norm": 1.5095102787017822, "learning_rate": 7.033151131190257e-06, "loss": 0.9382, "step": 10725 }, { "epoch": 1.9288860918816866, "grad_norm": 1.3931713104248047, "learning_rate": 7.032619019414319e-06, "loss": 0.6688, "step": 10726 }, { "epoch": 1.9290658994875485, "grad_norm": 1.4945696592330933, "learning_rate": 7.032086880058408e-06, "loss": 0.7312, "step": 10727 }, { "epoch": 1.92924570709341, "grad_norm": 1.514639973640442, "learning_rate": 7.0315547131297435e-06, "loss": 0.7319, "step": 10728 }, { "epoch": 1.9294255146992718, "grad_norm": 1.4537105560302734, "learning_rate": 7.031022518635547e-06, "loss": 0.7134, "step": 10729 }, { "epoch": 1.9296053223051335, "grad_norm": 1.55465567111969, "learning_rate": 7.030490296583041e-06, "loss": 0.7431, "step": 10730 }, { "epoch": 1.9297851299109952, "grad_norm": 1.6119526624679565, "learning_rate": 7.029958046979446e-06, "loss": 0.7523, "step": 10731 }, { "epoch": 1.929964937516857, "grad_norm": 1.4903485774993896, "learning_rate": 7.029425769831984e-06, "loss": 0.761, "step": 10732 }, { "epoch": 1.9301447451227187, "grad_norm": 1.4507665634155273, "learning_rate": 7.028893465147877e-06, "loss": 0.6719, "step": 10733 }, { "epoch": 1.9303245527285804, "grad_norm": 1.6570295095443726, "learning_rate": 7.028361132934347e-06, "loss": 0.6957, "step": 10734 }, { "epoch": 1.9305043603344423, "grad_norm": 1.452850103378296, "learning_rate": 7.02782877319862e-06, "loss": 0.7436, "step": 10735 }, { "epoch": 1.9306841679403037, "grad_norm": 1.4549574851989746, "learning_rate": 7.027296385947915e-06, "loss": 0.718, "step": 10736 }, { "epoch": 1.9308639755461656, "grad_norm": 1.521236777305603, "learning_rate": 7.02676397118946e-06, "loss": 0.7294, "step": 10737 }, { "epoch": 1.9310437831520273, "grad_norm": 1.5827182531356812, "learning_rate": 7.0262315289304765e-06, "loss": 0.7321, "step": 10738 }, { "epoch": 1.931223590757889, "grad_norm": 1.4340773820877075, "learning_rate": 7.025699059178188e-06, "loss": 0.713, "step": 10739 }, { "epoch": 1.9314033983637509, "grad_norm": 1.4158430099487305, "learning_rate": 7.025166561939822e-06, "loss": 0.671, "step": 10740 }, { "epoch": 1.9315832059696125, "grad_norm": 1.4819698333740234, "learning_rate": 7.024634037222606e-06, "loss": 0.7477, "step": 10741 }, { "epoch": 1.9317630135754742, "grad_norm": 1.8226672410964966, "learning_rate": 7.02410148503376e-06, "loss": 0.7825, "step": 10742 }, { "epoch": 1.931942821181336, "grad_norm": 1.514000654220581, "learning_rate": 7.0235689053805145e-06, "loss": 0.7269, "step": 10743 }, { "epoch": 1.9321226287871975, "grad_norm": 1.4411894083023071, "learning_rate": 7.023036298270091e-06, "loss": 0.7001, "step": 10744 }, { "epoch": 1.9323024363930594, "grad_norm": 1.4038907289505005, "learning_rate": 7.022503663709723e-06, "loss": 0.6661, "step": 10745 }, { "epoch": 1.932482243998921, "grad_norm": 1.41576087474823, "learning_rate": 7.021971001706633e-06, "loss": 0.7103, "step": 10746 }, { "epoch": 1.9326620516047828, "grad_norm": 1.606632113456726, "learning_rate": 7.02143831226805e-06, "loss": 0.7909, "step": 10747 }, { "epoch": 1.9328418592106447, "grad_norm": 1.4466761350631714, "learning_rate": 7.0209055954012e-06, "loss": 0.7587, "step": 10748 }, { "epoch": 1.9330216668165063, "grad_norm": 1.4951483011245728, "learning_rate": 7.020372851113313e-06, "loss": 0.6742, "step": 10749 }, { "epoch": 1.933201474422368, "grad_norm": 1.4688301086425781, "learning_rate": 7.0198400794116185e-06, "loss": 0.7488, "step": 10750 }, { "epoch": 1.93338128202823, "grad_norm": 1.715187430381775, "learning_rate": 7.019307280303344e-06, "loss": 0.7806, "step": 10751 }, { "epoch": 1.9335610896340916, "grad_norm": 1.0587952136993408, "learning_rate": 7.018774453795718e-06, "loss": 0.9564, "step": 10752 }, { "epoch": 1.9337408972399532, "grad_norm": 1.5681111812591553, "learning_rate": 7.018241599895974e-06, "loss": 0.7042, "step": 10753 }, { "epoch": 1.9339207048458151, "grad_norm": 1.4989521503448486, "learning_rate": 7.017708718611338e-06, "loss": 0.7262, "step": 10754 }, { "epoch": 1.9341005124516766, "grad_norm": 1.080761432647705, "learning_rate": 7.017175809949044e-06, "loss": 0.9442, "step": 10755 }, { "epoch": 1.9342803200575385, "grad_norm": 1.4406899213790894, "learning_rate": 7.016642873916318e-06, "loss": 0.7437, "step": 10756 }, { "epoch": 1.9344601276634001, "grad_norm": 1.4756486415863037, "learning_rate": 7.016109910520397e-06, "loss": 0.7203, "step": 10757 }, { "epoch": 1.9346399352692618, "grad_norm": 1.6031080484390259, "learning_rate": 7.015576919768509e-06, "loss": 0.7153, "step": 10758 }, { "epoch": 1.9348197428751237, "grad_norm": 1.1632989645004272, "learning_rate": 7.015043901667888e-06, "loss": 0.9095, "step": 10759 }, { "epoch": 1.9349995504809854, "grad_norm": 1.6649975776672363, "learning_rate": 7.014510856225762e-06, "loss": 0.7281, "step": 10760 }, { "epoch": 1.935179358086847, "grad_norm": 1.449988842010498, "learning_rate": 7.01397778344937e-06, "loss": 0.6525, "step": 10761 }, { "epoch": 1.935359165692709, "grad_norm": 1.5347115993499756, "learning_rate": 7.01344468334594e-06, "loss": 0.7351, "step": 10762 }, { "epoch": 1.9355389732985704, "grad_norm": 1.450648546218872, "learning_rate": 7.01291155592271e-06, "loss": 0.7523, "step": 10763 }, { "epoch": 1.9357187809044323, "grad_norm": 1.5220566987991333, "learning_rate": 7.01237840118691e-06, "loss": 0.7496, "step": 10764 }, { "epoch": 1.935898588510294, "grad_norm": 1.551329493522644, "learning_rate": 7.011845219145776e-06, "loss": 0.7004, "step": 10765 }, { "epoch": 1.9360783961161556, "grad_norm": 4.820184707641602, "learning_rate": 7.011312009806541e-06, "loss": 0.7753, "step": 10766 }, { "epoch": 1.9362582037220175, "grad_norm": 1.4943674802780151, "learning_rate": 7.0107787731764436e-06, "loss": 0.7756, "step": 10767 }, { "epoch": 1.9364380113278792, "grad_norm": 1.5266019105911255, "learning_rate": 7.010245509262715e-06, "loss": 0.727, "step": 10768 }, { "epoch": 1.9366178189337409, "grad_norm": 1.6094552278518677, "learning_rate": 7.009712218072593e-06, "loss": 0.7616, "step": 10769 }, { "epoch": 1.9367976265396027, "grad_norm": 1.5779883861541748, "learning_rate": 7.009178899613312e-06, "loss": 0.753, "step": 10770 }, { "epoch": 1.9369774341454642, "grad_norm": 1.4581514596939087, "learning_rate": 7.008645553892111e-06, "loss": 0.7373, "step": 10771 }, { "epoch": 1.937157241751326, "grad_norm": 1.5768849849700928, "learning_rate": 7.008112180916224e-06, "loss": 0.7454, "step": 10772 }, { "epoch": 1.9373370493571878, "grad_norm": 1.4852944612503052, "learning_rate": 7.007578780692892e-06, "loss": 0.736, "step": 10773 }, { "epoch": 1.9375168569630494, "grad_norm": 1.5430734157562256, "learning_rate": 7.007045353229349e-06, "loss": 0.7533, "step": 10774 }, { "epoch": 1.9376966645689113, "grad_norm": 1.4216556549072266, "learning_rate": 7.006511898532834e-06, "loss": 0.7274, "step": 10775 }, { "epoch": 1.937876472174773, "grad_norm": 1.497586727142334, "learning_rate": 7.0059784166105845e-06, "loss": 0.7155, "step": 10776 }, { "epoch": 1.9380562797806347, "grad_norm": 1.4962118864059448, "learning_rate": 7.005444907469842e-06, "loss": 0.7735, "step": 10777 }, { "epoch": 1.9382360873864966, "grad_norm": 1.7346019744873047, "learning_rate": 7.004911371117842e-06, "loss": 0.7534, "step": 10778 }, { "epoch": 1.9384158949923582, "grad_norm": 1.436851143836975, "learning_rate": 7.004377807561827e-06, "loss": 0.731, "step": 10779 }, { "epoch": 1.93859570259822, "grad_norm": 1.4564387798309326, "learning_rate": 7.0038442168090326e-06, "loss": 0.6827, "step": 10780 }, { "epoch": 1.9387755102040818, "grad_norm": 1.394980549812317, "learning_rate": 7.003310598866704e-06, "loss": 0.6417, "step": 10781 }, { "epoch": 1.9389553178099432, "grad_norm": 1.4392786026000977, "learning_rate": 7.002776953742078e-06, "loss": 0.6938, "step": 10782 }, { "epoch": 1.9391351254158051, "grad_norm": 1.5180610418319702, "learning_rate": 7.002243281442395e-06, "loss": 0.7736, "step": 10783 }, { "epoch": 1.9393149330216668, "grad_norm": 1.4864327907562256, "learning_rate": 7.0017095819748995e-06, "loss": 0.7146, "step": 10784 }, { "epoch": 1.9394947406275285, "grad_norm": 1.4764378070831299, "learning_rate": 7.001175855346832e-06, "loss": 0.7397, "step": 10785 }, { "epoch": 1.9396745482333904, "grad_norm": 1.4854776859283447, "learning_rate": 7.000642101565434e-06, "loss": 0.7878, "step": 10786 }, { "epoch": 1.939854355839252, "grad_norm": 1.6174330711364746, "learning_rate": 7.000108320637947e-06, "loss": 0.7349, "step": 10787 }, { "epoch": 1.9400341634451137, "grad_norm": 1.4205129146575928, "learning_rate": 6.999574512571614e-06, "loss": 0.694, "step": 10788 }, { "epoch": 1.9402139710509756, "grad_norm": 1.3918825387954712, "learning_rate": 6.999040677373681e-06, "loss": 0.6774, "step": 10789 }, { "epoch": 1.940393778656837, "grad_norm": 1.549601674079895, "learning_rate": 6.998506815051387e-06, "loss": 0.7181, "step": 10790 }, { "epoch": 1.940573586262699, "grad_norm": 1.4411954879760742, "learning_rate": 6.997972925611978e-06, "loss": 0.6825, "step": 10791 }, { "epoch": 1.9407533938685606, "grad_norm": 1.467405915260315, "learning_rate": 6.997439009062699e-06, "loss": 0.6885, "step": 10792 }, { "epoch": 1.9409332014744223, "grad_norm": 1.5373036861419678, "learning_rate": 6.9969050654107914e-06, "loss": 0.6935, "step": 10793 }, { "epoch": 1.9411130090802842, "grad_norm": 1.5847525596618652, "learning_rate": 6.996371094663503e-06, "loss": 0.7008, "step": 10794 }, { "epoch": 1.9412928166861458, "grad_norm": 1.6221520900726318, "learning_rate": 6.99583709682808e-06, "loss": 0.7384, "step": 10795 }, { "epoch": 1.9414726242920075, "grad_norm": 1.4495078325271606, "learning_rate": 6.995303071911765e-06, "loss": 0.7555, "step": 10796 }, { "epoch": 1.9416524318978694, "grad_norm": 1.2612459659576416, "learning_rate": 6.994769019921806e-06, "loss": 0.9068, "step": 10797 }, { "epoch": 1.9418322395037309, "grad_norm": 1.3412106037139893, "learning_rate": 6.994234940865448e-06, "loss": 0.7312, "step": 10798 }, { "epoch": 1.9420120471095927, "grad_norm": 1.4350923299789429, "learning_rate": 6.993700834749942e-06, "loss": 0.7275, "step": 10799 }, { "epoch": 1.9421918547154544, "grad_norm": 1.677707314491272, "learning_rate": 6.993166701582528e-06, "loss": 0.6881, "step": 10800 }, { "epoch": 1.942371662321316, "grad_norm": 1.4273433685302734, "learning_rate": 6.9926325413704574e-06, "loss": 0.7449, "step": 10801 }, { "epoch": 1.942551469927178, "grad_norm": 1.4858849048614502, "learning_rate": 6.9920983541209784e-06, "loss": 0.7317, "step": 10802 }, { "epoch": 1.9427312775330396, "grad_norm": 1.637556791305542, "learning_rate": 6.991564139841339e-06, "loss": 0.7615, "step": 10803 }, { "epoch": 1.9429110851389013, "grad_norm": 1.533219814300537, "learning_rate": 6.991029898538787e-06, "loss": 0.7104, "step": 10804 }, { "epoch": 1.9430908927447632, "grad_norm": 1.4784945249557495, "learning_rate": 6.9904956302205715e-06, "loss": 0.68, "step": 10805 }, { "epoch": 1.9432707003506249, "grad_norm": 1.3829014301300049, "learning_rate": 6.989961334893942e-06, "loss": 0.7101, "step": 10806 }, { "epoch": 1.9434505079564866, "grad_norm": 1.4536033868789673, "learning_rate": 6.98942701256615e-06, "loss": 0.699, "step": 10807 }, { "epoch": 1.9436303155623484, "grad_norm": 1.4716126918792725, "learning_rate": 6.988892663244442e-06, "loss": 0.7001, "step": 10808 }, { "epoch": 1.94381012316821, "grad_norm": 1.5212477445602417, "learning_rate": 6.988358286936073e-06, "loss": 0.7303, "step": 10809 }, { "epoch": 1.9439899307740718, "grad_norm": 1.5097863674163818, "learning_rate": 6.987823883648287e-06, "loss": 0.7653, "step": 10810 }, { "epoch": 1.9441697383799335, "grad_norm": 1.368109941482544, "learning_rate": 6.9872894533883415e-06, "loss": 0.9091, "step": 10811 }, { "epoch": 1.9443495459857951, "grad_norm": 1.4470937252044678, "learning_rate": 6.986754996163485e-06, "loss": 0.7126, "step": 10812 }, { "epoch": 1.944529353591657, "grad_norm": 1.4667744636535645, "learning_rate": 6.986220511980971e-06, "loss": 0.7201, "step": 10813 }, { "epoch": 1.9447091611975187, "grad_norm": 1.54987370967865, "learning_rate": 6.985686000848051e-06, "loss": 0.7794, "step": 10814 }, { "epoch": 1.9448889688033804, "grad_norm": 1.744326114654541, "learning_rate": 6.985151462771976e-06, "loss": 0.6557, "step": 10815 }, { "epoch": 1.9450687764092423, "grad_norm": 1.0886344909667969, "learning_rate": 6.98461689776e-06, "loss": 0.9232, "step": 10816 }, { "epoch": 1.9452485840151037, "grad_norm": 1.4941949844360352, "learning_rate": 6.984082305819379e-06, "loss": 0.7524, "step": 10817 }, { "epoch": 1.9454283916209656, "grad_norm": 1.1353554725646973, "learning_rate": 6.983547686957364e-06, "loss": 0.9259, "step": 10818 }, { "epoch": 1.9456081992268273, "grad_norm": 1.5010287761688232, "learning_rate": 6.983013041181209e-06, "loss": 0.7497, "step": 10819 }, { "epoch": 1.945788006832689, "grad_norm": 1.1083786487579346, "learning_rate": 6.982478368498169e-06, "loss": 0.9196, "step": 10820 }, { "epoch": 1.9459678144385508, "grad_norm": 1.4036073684692383, "learning_rate": 6.9819436689155e-06, "loss": 0.7819, "step": 10821 }, { "epoch": 1.9461476220444125, "grad_norm": 1.5154255628585815, "learning_rate": 6.981408942440454e-06, "loss": 0.7521, "step": 10822 }, { "epoch": 1.9463274296502742, "grad_norm": 1.4172121286392212, "learning_rate": 6.98087418908029e-06, "loss": 0.6948, "step": 10823 }, { "epoch": 1.946507237256136, "grad_norm": 1.6480026245117188, "learning_rate": 6.980339408842261e-06, "loss": 0.6918, "step": 10824 }, { "epoch": 1.9466870448619975, "grad_norm": 1.4123693704605103, "learning_rate": 6.979804601733625e-06, "loss": 0.6612, "step": 10825 }, { "epoch": 1.9468668524678594, "grad_norm": 1.5447146892547607, "learning_rate": 6.979269767761638e-06, "loss": 0.7694, "step": 10826 }, { "epoch": 1.947046660073721, "grad_norm": 1.1080186367034912, "learning_rate": 6.97873490693356e-06, "loss": 0.9218, "step": 10827 }, { "epoch": 1.9472264676795827, "grad_norm": 1.4652633666992188, "learning_rate": 6.978200019256643e-06, "loss": 0.7866, "step": 10828 }, { "epoch": 1.9474062752854446, "grad_norm": 1.52995765209198, "learning_rate": 6.977665104738149e-06, "loss": 0.6848, "step": 10829 }, { "epoch": 1.9475860828913063, "grad_norm": 1.4397261142730713, "learning_rate": 6.977130163385334e-06, "loss": 0.7087, "step": 10830 }, { "epoch": 1.947765890497168, "grad_norm": 1.414243221282959, "learning_rate": 6.976595195205457e-06, "loss": 0.7448, "step": 10831 }, { "epoch": 1.9479456981030299, "grad_norm": 1.385445237159729, "learning_rate": 6.976060200205776e-06, "loss": 0.6884, "step": 10832 }, { "epoch": 1.9481255057088915, "grad_norm": 1.5125763416290283, "learning_rate": 6.975525178393552e-06, "loss": 0.7345, "step": 10833 }, { "epoch": 1.9483053133147532, "grad_norm": 1.423629641532898, "learning_rate": 6.9749901297760425e-06, "loss": 0.642, "step": 10834 }, { "epoch": 1.948485120920615, "grad_norm": 1.3816171884536743, "learning_rate": 6.97445505436051e-06, "loss": 0.6494, "step": 10835 }, { "epoch": 1.9486649285264765, "grad_norm": 1.4212567806243896, "learning_rate": 6.9739199521542115e-06, "loss": 0.7422, "step": 10836 }, { "epoch": 1.9488447361323384, "grad_norm": 1.5040208101272583, "learning_rate": 6.973384823164409e-06, "loss": 0.745, "step": 10837 }, { "epoch": 1.9490245437382, "grad_norm": 1.5849686861038208, "learning_rate": 6.972849667398365e-06, "loss": 0.7073, "step": 10838 }, { "epoch": 1.9492043513440618, "grad_norm": 1.5114026069641113, "learning_rate": 6.97231448486334e-06, "loss": 0.7589, "step": 10839 }, { "epoch": 1.9493841589499237, "grad_norm": 1.4386018514633179, "learning_rate": 6.971779275566593e-06, "loss": 0.7353, "step": 10840 }, { "epoch": 1.9495639665557853, "grad_norm": 1.561326265335083, "learning_rate": 6.971244039515391e-06, "loss": 0.7421, "step": 10841 }, { "epoch": 1.949743774161647, "grad_norm": 1.4732389450073242, "learning_rate": 6.970708776716993e-06, "loss": 0.6869, "step": 10842 }, { "epoch": 1.949923581767509, "grad_norm": 1.503035306930542, "learning_rate": 6.970173487178663e-06, "loss": 0.7526, "step": 10843 }, { "epoch": 1.9501033893733704, "grad_norm": 1.416052222251892, "learning_rate": 6.969638170907663e-06, "loss": 0.6902, "step": 10844 }, { "epoch": 1.9502831969792322, "grad_norm": 1.4500302076339722, "learning_rate": 6.969102827911259e-06, "loss": 0.6947, "step": 10845 }, { "epoch": 1.950463004585094, "grad_norm": 1.5733155012130737, "learning_rate": 6.968567458196712e-06, "loss": 0.743, "step": 10846 }, { "epoch": 1.9506428121909556, "grad_norm": 1.4982517957687378, "learning_rate": 6.968032061771288e-06, "loss": 0.7334, "step": 10847 }, { "epoch": 1.9508226197968175, "grad_norm": 1.4302011728286743, "learning_rate": 6.967496638642251e-06, "loss": 0.6917, "step": 10848 }, { "epoch": 1.9510024274026792, "grad_norm": 1.5636653900146484, "learning_rate": 6.966961188816867e-06, "loss": 0.6954, "step": 10849 }, { "epoch": 1.9511822350085408, "grad_norm": 1.1366171836853027, "learning_rate": 6.9664257123024e-06, "loss": 0.9493, "step": 10850 }, { "epoch": 1.9513620426144027, "grad_norm": 1.5012954473495483, "learning_rate": 6.965890209106117e-06, "loss": 0.7973, "step": 10851 }, { "epoch": 1.9515418502202642, "grad_norm": 1.0949031114578247, "learning_rate": 6.965354679235284e-06, "loss": 0.9556, "step": 10852 }, { "epoch": 1.951721657826126, "grad_norm": 1.5162646770477295, "learning_rate": 6.964819122697165e-06, "loss": 0.767, "step": 10853 }, { "epoch": 1.9519014654319877, "grad_norm": 1.5566271543502808, "learning_rate": 6.9642835394990295e-06, "loss": 0.732, "step": 10854 }, { "epoch": 1.9520812730378494, "grad_norm": 1.5980859994888306, "learning_rate": 6.963747929648143e-06, "loss": 0.7378, "step": 10855 }, { "epoch": 1.9522610806437113, "grad_norm": 2.3842406272888184, "learning_rate": 6.963212293151776e-06, "loss": 0.7493, "step": 10856 }, { "epoch": 1.952440888249573, "grad_norm": 1.4735107421875, "learning_rate": 6.962676630017191e-06, "loss": 0.7652, "step": 10857 }, { "epoch": 1.9526206958554346, "grad_norm": 1.6124637126922607, "learning_rate": 6.962140940251662e-06, "loss": 0.7659, "step": 10858 }, { "epoch": 1.9528005034612965, "grad_norm": 1.5889184474945068, "learning_rate": 6.9616052238624536e-06, "loss": 0.7322, "step": 10859 }, { "epoch": 1.9529803110671582, "grad_norm": 1.4131016731262207, "learning_rate": 6.961069480856836e-06, "loss": 0.7263, "step": 10860 }, { "epoch": 1.9531601186730199, "grad_norm": 1.5664019584655762, "learning_rate": 6.960533711242079e-06, "loss": 0.726, "step": 10861 }, { "epoch": 1.9533399262788818, "grad_norm": 1.4138519763946533, "learning_rate": 6.959997915025454e-06, "loss": 0.722, "step": 10862 }, { "epoch": 1.9535197338847432, "grad_norm": 1.5423924922943115, "learning_rate": 6.959462092214227e-06, "loss": 0.7557, "step": 10863 }, { "epoch": 1.953699541490605, "grad_norm": 1.489086627960205, "learning_rate": 6.958926242815671e-06, "loss": 0.6912, "step": 10864 }, { "epoch": 1.9538793490964668, "grad_norm": 1.430307388305664, "learning_rate": 6.958390366837056e-06, "loss": 0.7257, "step": 10865 }, { "epoch": 1.9540591567023284, "grad_norm": 1.8717628717422485, "learning_rate": 6.957854464285654e-06, "loss": 0.7778, "step": 10866 }, { "epoch": 1.9542389643081903, "grad_norm": 1.6384555101394653, "learning_rate": 6.957318535168735e-06, "loss": 0.6999, "step": 10867 }, { "epoch": 1.954418771914052, "grad_norm": 1.5523133277893066, "learning_rate": 6.9567825794935725e-06, "loss": 0.7778, "step": 10868 }, { "epoch": 1.9545985795199137, "grad_norm": 1.7030963897705078, "learning_rate": 6.956246597267438e-06, "loss": 0.7921, "step": 10869 }, { "epoch": 1.9547783871257756, "grad_norm": 1.180822730064392, "learning_rate": 6.955710588497603e-06, "loss": 0.9142, "step": 10870 }, { "epoch": 1.954958194731637, "grad_norm": 1.5136412382125854, "learning_rate": 6.955174553191342e-06, "loss": 0.7647, "step": 10871 }, { "epoch": 1.955138002337499, "grad_norm": 1.5622291564941406, "learning_rate": 6.954638491355929e-06, "loss": 0.7551, "step": 10872 }, { "epoch": 1.9553178099433606, "grad_norm": 1.4930830001831055, "learning_rate": 6.954102402998635e-06, "loss": 0.7122, "step": 10873 }, { "epoch": 1.9554976175492222, "grad_norm": 1.5898023843765259, "learning_rate": 6.953566288126736e-06, "loss": 0.7395, "step": 10874 }, { "epoch": 1.9556774251550841, "grad_norm": 1.5903123617172241, "learning_rate": 6.953030146747506e-06, "loss": 0.7603, "step": 10875 }, { "epoch": 1.9558572327609458, "grad_norm": 1.4501651525497437, "learning_rate": 6.95249397886822e-06, "loss": 0.7429, "step": 10876 }, { "epoch": 1.9560370403668075, "grad_norm": 1.065207839012146, "learning_rate": 6.95195778449615e-06, "loss": 0.9263, "step": 10877 }, { "epoch": 1.9562168479726694, "grad_norm": 1.50663161277771, "learning_rate": 6.951421563638578e-06, "loss": 0.7266, "step": 10878 }, { "epoch": 1.9563966555785308, "grad_norm": 1.5421372652053833, "learning_rate": 6.950885316302773e-06, "loss": 0.7286, "step": 10879 }, { "epoch": 1.9565764631843927, "grad_norm": 1.417836308479309, "learning_rate": 6.9503490424960166e-06, "loss": 0.7377, "step": 10880 }, { "epoch": 1.9567562707902544, "grad_norm": 1.6828274726867676, "learning_rate": 6.94981274222558e-06, "loss": 0.6323, "step": 10881 }, { "epoch": 1.956936078396116, "grad_norm": 1.6460639238357544, "learning_rate": 6.949276415498743e-06, "loss": 0.7498, "step": 10882 }, { "epoch": 1.957115886001978, "grad_norm": 1.1124768257141113, "learning_rate": 6.948740062322784e-06, "loss": 0.968, "step": 10883 }, { "epoch": 1.9572956936078396, "grad_norm": 1.5632184743881226, "learning_rate": 6.948203682704981e-06, "loss": 0.7639, "step": 10884 }, { "epoch": 1.9574755012137013, "grad_norm": 1.4414187669754028, "learning_rate": 6.947667276652607e-06, "loss": 0.7209, "step": 10885 }, { "epoch": 1.9576553088195632, "grad_norm": 1.4738179445266724, "learning_rate": 6.947130844172947e-06, "loss": 0.7206, "step": 10886 }, { "epoch": 1.9578351164254248, "grad_norm": 1.4596197605133057, "learning_rate": 6.946594385273273e-06, "loss": 0.7344, "step": 10887 }, { "epoch": 1.9580149240312865, "grad_norm": 1.6609197854995728, "learning_rate": 6.946057899960869e-06, "loss": 0.6755, "step": 10888 }, { "epoch": 1.9581947316371484, "grad_norm": 1.1007802486419678, "learning_rate": 6.945521388243014e-06, "loss": 0.93, "step": 10889 }, { "epoch": 1.9583745392430099, "grad_norm": 1.441508412361145, "learning_rate": 6.944984850126986e-06, "loss": 0.7267, "step": 10890 }, { "epoch": 1.9585543468488718, "grad_norm": 1.5844172239303589, "learning_rate": 6.944448285620064e-06, "loss": 0.6857, "step": 10891 }, { "epoch": 1.9587341544547334, "grad_norm": 2.4597530364990234, "learning_rate": 6.943911694729531e-06, "loss": 0.8058, "step": 10892 }, { "epoch": 1.958913962060595, "grad_norm": 1.543541431427002, "learning_rate": 6.943375077462666e-06, "loss": 0.7393, "step": 10893 }, { "epoch": 1.959093769666457, "grad_norm": 1.5136070251464844, "learning_rate": 6.942838433826753e-06, "loss": 0.7304, "step": 10894 }, { "epoch": 1.9592735772723187, "grad_norm": 1.499144434928894, "learning_rate": 6.942301763829071e-06, "loss": 0.7394, "step": 10895 }, { "epoch": 1.9594533848781803, "grad_norm": 1.437192440032959, "learning_rate": 6.941765067476903e-06, "loss": 0.7646, "step": 10896 }, { "epoch": 1.9596331924840422, "grad_norm": 1.4384652376174927, "learning_rate": 6.9412283447775305e-06, "loss": 0.7052, "step": 10897 }, { "epoch": 1.9598130000899037, "grad_norm": 1.4400984048843384, "learning_rate": 6.940691595738237e-06, "loss": 0.7108, "step": 10898 }, { "epoch": 1.9599928076957656, "grad_norm": 1.3950979709625244, "learning_rate": 6.9401548203663046e-06, "loss": 0.6383, "step": 10899 }, { "epoch": 1.9601726153016272, "grad_norm": 1.4519480466842651, "learning_rate": 6.9396180186690175e-06, "loss": 0.7516, "step": 10900 }, { "epoch": 1.960352422907489, "grad_norm": 1.4925086498260498, "learning_rate": 6.939081190653658e-06, "loss": 0.6914, "step": 10901 }, { "epoch": 1.9605322305133508, "grad_norm": 1.4422593116760254, "learning_rate": 6.938544336327511e-06, "loss": 0.7934, "step": 10902 }, { "epoch": 1.9607120381192125, "grad_norm": 1.4682539701461792, "learning_rate": 6.938007455697862e-06, "loss": 0.7085, "step": 10903 }, { "epoch": 1.9608918457250741, "grad_norm": 1.1257990598678589, "learning_rate": 6.937470548771994e-06, "loss": 0.9171, "step": 10904 }, { "epoch": 1.961071653330936, "grad_norm": 1.1236517429351807, "learning_rate": 6.936933615557193e-06, "loss": 0.934, "step": 10905 }, { "epoch": 1.9612514609367975, "grad_norm": 1.151810646057129, "learning_rate": 6.936396656060746e-06, "loss": 0.8934, "step": 10906 }, { "epoch": 1.9614312685426594, "grad_norm": 1.0865731239318848, "learning_rate": 6.935859670289935e-06, "loss": 0.9281, "step": 10907 }, { "epoch": 1.961611076148521, "grad_norm": 1.4328457117080688, "learning_rate": 6.935322658252049e-06, "loss": 0.7152, "step": 10908 }, { "epoch": 1.9617908837543827, "grad_norm": 1.5251320600509644, "learning_rate": 6.934785619954374e-06, "loss": 0.7314, "step": 10909 }, { "epoch": 1.9619706913602446, "grad_norm": 1.1024004220962524, "learning_rate": 6.934248555404197e-06, "loss": 0.8933, "step": 10910 }, { "epoch": 1.9621504989661063, "grad_norm": 1.4785770177841187, "learning_rate": 6.933711464608804e-06, "loss": 0.7307, "step": 10911 }, { "epoch": 1.962330306571968, "grad_norm": 1.4432979822158813, "learning_rate": 6.933174347575486e-06, "loss": 0.7061, "step": 10912 }, { "epoch": 1.9625101141778298, "grad_norm": 1.5328994989395142, "learning_rate": 6.932637204311528e-06, "loss": 0.7871, "step": 10913 }, { "epoch": 1.9626899217836915, "grad_norm": 1.4486777782440186, "learning_rate": 6.932100034824217e-06, "loss": 0.7672, "step": 10914 }, { "epoch": 1.9628697293895532, "grad_norm": 1.4679287672042847, "learning_rate": 6.931562839120845e-06, "loss": 0.7337, "step": 10915 }, { "epoch": 1.963049536995415, "grad_norm": 1.5392075777053833, "learning_rate": 6.9310256172087e-06, "loss": 0.6965, "step": 10916 }, { "epoch": 1.9632293446012765, "grad_norm": 1.490967035293579, "learning_rate": 6.9304883690950706e-06, "loss": 0.7185, "step": 10917 }, { "epoch": 1.9634091522071384, "grad_norm": 1.4122157096862793, "learning_rate": 6.929951094787248e-06, "loss": 0.7509, "step": 10918 }, { "epoch": 1.963588959813, "grad_norm": 1.4210608005523682, "learning_rate": 6.929413794292521e-06, "loss": 0.7057, "step": 10919 }, { "epoch": 1.9637687674188617, "grad_norm": 4.285470485687256, "learning_rate": 6.928876467618181e-06, "loss": 0.7134, "step": 10920 }, { "epoch": 1.9639485750247236, "grad_norm": 1.6154910326004028, "learning_rate": 6.928339114771517e-06, "loss": 0.736, "step": 10921 }, { "epoch": 1.9641283826305853, "grad_norm": 1.5029995441436768, "learning_rate": 6.9278017357598225e-06, "loss": 0.7717, "step": 10922 }, { "epoch": 1.964308190236447, "grad_norm": 1.4804770946502686, "learning_rate": 6.927264330590388e-06, "loss": 0.6943, "step": 10923 }, { "epoch": 1.9644879978423089, "grad_norm": 1.8157155513763428, "learning_rate": 6.926726899270504e-06, "loss": 0.6815, "step": 10924 }, { "epoch": 1.9646678054481703, "grad_norm": 1.478346824645996, "learning_rate": 6.926189441807465e-06, "loss": 0.7776, "step": 10925 }, { "epoch": 1.9648476130540322, "grad_norm": 1.5224215984344482, "learning_rate": 6.925651958208563e-06, "loss": 0.6845, "step": 10926 }, { "epoch": 1.9650274206598939, "grad_norm": 1.4355586767196655, "learning_rate": 6.925114448481089e-06, "loss": 0.7045, "step": 10927 }, { "epoch": 1.9652072282657556, "grad_norm": 1.5356860160827637, "learning_rate": 6.924576912632341e-06, "loss": 0.7834, "step": 10928 }, { "epoch": 1.9653870358716174, "grad_norm": 1.4616163969039917, "learning_rate": 6.9240393506696066e-06, "loss": 0.7648, "step": 10929 }, { "epoch": 1.9655668434774791, "grad_norm": 1.6093262434005737, "learning_rate": 6.923501762600186e-06, "loss": 0.706, "step": 10930 }, { "epoch": 1.9657466510833408, "grad_norm": 1.501589298248291, "learning_rate": 6.922964148431368e-06, "loss": 0.7666, "step": 10931 }, { "epoch": 1.9659264586892027, "grad_norm": 1.358655571937561, "learning_rate": 6.92242650817045e-06, "loss": 0.7282, "step": 10932 }, { "epoch": 1.9661062662950641, "grad_norm": 1.5662747621536255, "learning_rate": 6.921888841824727e-06, "loss": 0.7102, "step": 10933 }, { "epoch": 1.966286073900926, "grad_norm": 1.636786699295044, "learning_rate": 6.921351149401495e-06, "loss": 0.7401, "step": 10934 }, { "epoch": 1.9664658815067877, "grad_norm": 1.5072033405303955, "learning_rate": 6.920813430908048e-06, "loss": 0.7107, "step": 10935 }, { "epoch": 1.9666456891126494, "grad_norm": 1.4584858417510986, "learning_rate": 6.920275686351683e-06, "loss": 0.7337, "step": 10936 }, { "epoch": 1.9668254967185113, "grad_norm": 1.109371304512024, "learning_rate": 6.919737915739696e-06, "loss": 0.9104, "step": 10937 }, { "epoch": 1.967005304324373, "grad_norm": 1.5606203079223633, "learning_rate": 6.9192001190793855e-06, "loss": 0.6594, "step": 10938 }, { "epoch": 1.9671851119302346, "grad_norm": 1.4041975736618042, "learning_rate": 6.918662296378048e-06, "loss": 0.6814, "step": 10939 }, { "epoch": 1.9673649195360965, "grad_norm": 1.5219275951385498, "learning_rate": 6.918124447642981e-06, "loss": 0.7337, "step": 10940 }, { "epoch": 1.967544727141958, "grad_norm": 2.620363473892212, "learning_rate": 6.9175865728814806e-06, "loss": 0.6569, "step": 10941 }, { "epoch": 1.9677245347478198, "grad_norm": 1.0634874105453491, "learning_rate": 6.917048672100848e-06, "loss": 0.9108, "step": 10942 }, { "epoch": 1.9679043423536817, "grad_norm": 1.5425225496292114, "learning_rate": 6.916510745308379e-06, "loss": 0.7446, "step": 10943 }, { "epoch": 1.9680841499595432, "grad_norm": 1.4565104246139526, "learning_rate": 6.915972792511375e-06, "loss": 0.6894, "step": 10944 }, { "epoch": 1.968263957565405, "grad_norm": 1.5219929218292236, "learning_rate": 6.9154348137171335e-06, "loss": 0.7232, "step": 10945 }, { "epoch": 1.9684437651712667, "grad_norm": 1.6263303756713867, "learning_rate": 6.914896808932954e-06, "loss": 0.7219, "step": 10946 }, { "epoch": 1.9686235727771284, "grad_norm": 1.75033700466156, "learning_rate": 6.914358778166138e-06, "loss": 0.7031, "step": 10947 }, { "epoch": 1.9688033803829903, "grad_norm": 1.556885838508606, "learning_rate": 6.913820721423987e-06, "loss": 0.7996, "step": 10948 }, { "epoch": 1.968983187988852, "grad_norm": 1.5064703226089478, "learning_rate": 6.913282638713798e-06, "loss": 0.7469, "step": 10949 }, { "epoch": 1.9691629955947136, "grad_norm": 1.5336354970932007, "learning_rate": 6.912744530042875e-06, "loss": 0.8491, "step": 10950 }, { "epoch": 1.9693428032005755, "grad_norm": 1.4934914112091064, "learning_rate": 6.912206395418518e-06, "loss": 0.8054, "step": 10951 }, { "epoch": 1.969522610806437, "grad_norm": 1.4789196252822876, "learning_rate": 6.91166823484803e-06, "loss": 0.7235, "step": 10952 }, { "epoch": 1.9697024184122989, "grad_norm": 1.547851800918579, "learning_rate": 6.911130048338712e-06, "loss": 0.784, "step": 10953 }, { "epoch": 1.9698822260181605, "grad_norm": 1.531949520111084, "learning_rate": 6.910591835897868e-06, "loss": 0.7462, "step": 10954 }, { "epoch": 1.9700620336240222, "grad_norm": 1.6075413227081299, "learning_rate": 6.910053597532798e-06, "loss": 0.7098, "step": 10955 }, { "epoch": 1.970241841229884, "grad_norm": 1.3716331720352173, "learning_rate": 6.909515333250809e-06, "loss": 0.6997, "step": 10956 }, { "epoch": 1.9704216488357458, "grad_norm": 1.3708101511001587, "learning_rate": 6.908977043059201e-06, "loss": 0.7087, "step": 10957 }, { "epoch": 1.9706014564416074, "grad_norm": 1.5447181463241577, "learning_rate": 6.908438726965279e-06, "loss": 0.7764, "step": 10958 }, { "epoch": 1.9707812640474693, "grad_norm": 1.5506500005722046, "learning_rate": 6.907900384976347e-06, "loss": 0.7287, "step": 10959 }, { "epoch": 1.9709610716533308, "grad_norm": 1.4603610038757324, "learning_rate": 6.907362017099713e-06, "loss": 0.7634, "step": 10960 }, { "epoch": 1.9711408792591927, "grad_norm": 1.4652884006500244, "learning_rate": 6.906823623342675e-06, "loss": 0.748, "step": 10961 }, { "epoch": 1.9713206868650544, "grad_norm": 1.498145341873169, "learning_rate": 6.906285203712546e-06, "loss": 0.7636, "step": 10962 }, { "epoch": 1.971500494470916, "grad_norm": 1.496447205543518, "learning_rate": 6.905746758216627e-06, "loss": 0.6712, "step": 10963 }, { "epoch": 1.971680302076778, "grad_norm": 1.423529028892517, "learning_rate": 6.905208286862226e-06, "loss": 0.7479, "step": 10964 }, { "epoch": 1.9718601096826396, "grad_norm": 1.1071054935455322, "learning_rate": 6.904669789656648e-06, "loss": 0.9421, "step": 10965 }, { "epoch": 1.9720399172885013, "grad_norm": 1.0521092414855957, "learning_rate": 6.904131266607199e-06, "loss": 0.9393, "step": 10966 }, { "epoch": 1.9722197248943631, "grad_norm": 1.5289896726608276, "learning_rate": 6.9035927177211884e-06, "loss": 0.8182, "step": 10967 }, { "epoch": 1.9723995325002246, "grad_norm": 1.6125506162643433, "learning_rate": 6.903054143005921e-06, "loss": 0.7755, "step": 10968 }, { "epoch": 1.9725793401060865, "grad_norm": 1.4380733966827393, "learning_rate": 6.902515542468706e-06, "loss": 0.707, "step": 10969 }, { "epoch": 1.9727591477119484, "grad_norm": 1.0938578844070435, "learning_rate": 6.901976916116852e-06, "loss": 0.9418, "step": 10970 }, { "epoch": 1.9729389553178098, "grad_norm": 1.4636259078979492, "learning_rate": 6.901438263957667e-06, "loss": 0.7418, "step": 10971 }, { "epoch": 1.9731187629236717, "grad_norm": 1.502314567565918, "learning_rate": 6.90089958599846e-06, "loss": 0.7794, "step": 10972 }, { "epoch": 1.9732985705295334, "grad_norm": 1.2024471759796143, "learning_rate": 6.900360882246541e-06, "loss": 0.9383, "step": 10973 }, { "epoch": 1.973478378135395, "grad_norm": 1.4854995012283325, "learning_rate": 6.899822152709217e-06, "loss": 0.7806, "step": 10974 }, { "epoch": 1.973658185741257, "grad_norm": 1.5069369077682495, "learning_rate": 6.899283397393799e-06, "loss": 0.7622, "step": 10975 }, { "epoch": 1.9738379933471186, "grad_norm": 1.503359079360962, "learning_rate": 6.898744616307598e-06, "loss": 0.7552, "step": 10976 }, { "epoch": 1.9740178009529803, "grad_norm": 1.5785573720932007, "learning_rate": 6.898205809457923e-06, "loss": 0.7461, "step": 10977 }, { "epoch": 1.9741976085588422, "grad_norm": 1.4815057516098022, "learning_rate": 6.897666976852087e-06, "loss": 0.7142, "step": 10978 }, { "epoch": 1.9743774161647036, "grad_norm": 1.485489010810852, "learning_rate": 6.897128118497398e-06, "loss": 0.7656, "step": 10979 }, { "epoch": 1.9745572237705655, "grad_norm": 1.5623756647109985, "learning_rate": 6.896589234401172e-06, "loss": 0.7326, "step": 10980 }, { "epoch": 1.9747370313764272, "grad_norm": 1.5130784511566162, "learning_rate": 6.896050324570718e-06, "loss": 0.7076, "step": 10981 }, { "epoch": 1.9749168389822889, "grad_norm": 1.4772429466247559, "learning_rate": 6.895511389013349e-06, "loss": 0.6784, "step": 10982 }, { "epoch": 1.9750966465881508, "grad_norm": 1.4612399339675903, "learning_rate": 6.894972427736378e-06, "loss": 0.7198, "step": 10983 }, { "epoch": 1.9752764541940124, "grad_norm": 1.4923474788665771, "learning_rate": 6.894433440747117e-06, "loss": 0.7489, "step": 10984 }, { "epoch": 1.975456261799874, "grad_norm": 1.5738235712051392, "learning_rate": 6.893894428052881e-06, "loss": 0.7373, "step": 10985 }, { "epoch": 1.975636069405736, "grad_norm": 1.4418489933013916, "learning_rate": 6.893355389660982e-06, "loss": 0.6826, "step": 10986 }, { "epoch": 1.9758158770115974, "grad_norm": 1.4614313840866089, "learning_rate": 6.892816325578735e-06, "loss": 0.7288, "step": 10987 }, { "epoch": 1.9759956846174593, "grad_norm": 2.176503896713257, "learning_rate": 6.892277235813453e-06, "loss": 0.7143, "step": 10988 }, { "epoch": 1.976175492223321, "grad_norm": 1.4815601110458374, "learning_rate": 6.891738120372453e-06, "loss": 0.6958, "step": 10989 }, { "epoch": 1.9763552998291827, "grad_norm": 1.460098385810852, "learning_rate": 6.891198979263049e-06, "loss": 0.808, "step": 10990 }, { "epoch": 1.9765351074350446, "grad_norm": 1.436486840248108, "learning_rate": 6.890659812492555e-06, "loss": 0.6974, "step": 10991 }, { "epoch": 1.9767149150409062, "grad_norm": 1.39750075340271, "learning_rate": 6.890120620068288e-06, "loss": 0.9357, "step": 10992 }, { "epoch": 1.976894722646768, "grad_norm": 1.4702123403549194, "learning_rate": 6.889581401997566e-06, "loss": 0.7195, "step": 10993 }, { "epoch": 1.9770745302526298, "grad_norm": 1.3464668989181519, "learning_rate": 6.889042158287702e-06, "loss": 0.9464, "step": 10994 }, { "epoch": 1.9772543378584913, "grad_norm": 1.479265570640564, "learning_rate": 6.888502888946017e-06, "loss": 0.6518, "step": 10995 }, { "epoch": 1.9774341454643531, "grad_norm": 1.558803677558899, "learning_rate": 6.887963593979824e-06, "loss": 0.7586, "step": 10996 }, { "epoch": 1.977613953070215, "grad_norm": 1.5110807418823242, "learning_rate": 6.887424273396443e-06, "loss": 0.6958, "step": 10997 }, { "epoch": 1.9777937606760765, "grad_norm": 1.4579888582229614, "learning_rate": 6.88688492720319e-06, "loss": 0.7256, "step": 10998 }, { "epoch": 1.9779735682819384, "grad_norm": 1.139938473701477, "learning_rate": 6.886345555407386e-06, "loss": 0.9141, "step": 10999 }, { "epoch": 1.9781533758878, "grad_norm": 1.476102590560913, "learning_rate": 6.885806158016347e-06, "loss": 0.7052, "step": 11000 }, { "epoch": 1.9781533758878, "eval_loss": 0.7836995124816895, "eval_runtime": 148.6939, "eval_samples_per_second": 96.722, "eval_steps_per_second": 1.513, "step": 11000 }, { "epoch": 1.9783331834936617, "grad_norm": 1.4718774557113647, "learning_rate": 6.885266735037392e-06, "loss": 0.751, "step": 11001 }, { "epoch": 1.9785129910995236, "grad_norm": 1.5003232955932617, "learning_rate": 6.884727286477842e-06, "loss": 0.6932, "step": 11002 }, { "epoch": 1.9786927987053853, "grad_norm": 1.5746426582336426, "learning_rate": 6.884187812345016e-06, "loss": 0.7776, "step": 11003 }, { "epoch": 1.978872606311247, "grad_norm": 1.435928463935852, "learning_rate": 6.883648312646234e-06, "loss": 0.7359, "step": 11004 }, { "epoch": 1.9790524139171088, "grad_norm": 1.402667760848999, "learning_rate": 6.883108787388817e-06, "loss": 0.7234, "step": 11005 }, { "epoch": 1.9792322215229703, "grad_norm": 1.5582858324050903, "learning_rate": 6.882569236580083e-06, "loss": 0.7995, "step": 11006 }, { "epoch": 1.9794120291288322, "grad_norm": 1.4435086250305176, "learning_rate": 6.8820296602273554e-06, "loss": 0.7268, "step": 11007 }, { "epoch": 1.9795918367346939, "grad_norm": 1.3615394830703735, "learning_rate": 6.881490058337953e-06, "loss": 0.6896, "step": 11008 }, { "epoch": 1.9797716443405555, "grad_norm": 1.0543231964111328, "learning_rate": 6.8809504309192025e-06, "loss": 0.9422, "step": 11009 }, { "epoch": 1.9799514519464174, "grad_norm": 1.4429880380630493, "learning_rate": 6.8804107779784194e-06, "loss": 0.6929, "step": 11010 }, { "epoch": 1.980131259552279, "grad_norm": 1.4770362377166748, "learning_rate": 6.879871099522931e-06, "loss": 0.733, "step": 11011 }, { "epoch": 1.9803110671581408, "grad_norm": 1.4201842546463013, "learning_rate": 6.879331395560058e-06, "loss": 0.7188, "step": 11012 }, { "epoch": 1.9804908747640027, "grad_norm": 1.4393433332443237, "learning_rate": 6.878791666097124e-06, "loss": 0.6737, "step": 11013 }, { "epoch": 1.980670682369864, "grad_norm": 1.514168620109558, "learning_rate": 6.8782519111414515e-06, "loss": 0.7072, "step": 11014 }, { "epoch": 1.980850489975726, "grad_norm": 1.5061209201812744, "learning_rate": 6.877712130700367e-06, "loss": 0.7382, "step": 11015 }, { "epoch": 1.9810302975815877, "grad_norm": 1.4298030138015747, "learning_rate": 6.877172324781191e-06, "loss": 0.7662, "step": 11016 }, { "epoch": 1.9812101051874493, "grad_norm": 1.4768931865692139, "learning_rate": 6.876632493391251e-06, "loss": 0.7685, "step": 11017 }, { "epoch": 1.9813899127933112, "grad_norm": 1.489603042602539, "learning_rate": 6.87609263653787e-06, "loss": 0.7488, "step": 11018 }, { "epoch": 1.981569720399173, "grad_norm": 1.5525634288787842, "learning_rate": 6.875552754228374e-06, "loss": 0.7644, "step": 11019 }, { "epoch": 1.9817495280050346, "grad_norm": 1.4668464660644531, "learning_rate": 6.875012846470087e-06, "loss": 0.7036, "step": 11020 }, { "epoch": 1.9819293356108965, "grad_norm": 1.4462335109710693, "learning_rate": 6.874472913270338e-06, "loss": 0.7425, "step": 11021 }, { "epoch": 1.982109143216758, "grad_norm": 1.5008128881454468, "learning_rate": 6.873932954636449e-06, "loss": 0.7624, "step": 11022 }, { "epoch": 1.9822889508226198, "grad_norm": 1.5232932567596436, "learning_rate": 6.8733929705757484e-06, "loss": 0.7207, "step": 11023 }, { "epoch": 1.9824687584284815, "grad_norm": 1.6698201894760132, "learning_rate": 6.872852961095564e-06, "loss": 0.7227, "step": 11024 }, { "epoch": 1.9826485660343431, "grad_norm": 1.0891352891921997, "learning_rate": 6.872312926203223e-06, "loss": 0.9536, "step": 11025 }, { "epoch": 1.982828373640205, "grad_norm": 1.4844164848327637, "learning_rate": 6.871772865906053e-06, "loss": 0.7938, "step": 11026 }, { "epoch": 1.9830081812460667, "grad_norm": 1.4665498733520508, "learning_rate": 6.87123278021138e-06, "loss": 0.7355, "step": 11027 }, { "epoch": 1.9831879888519284, "grad_norm": 1.4898475408554077, "learning_rate": 6.870692669126533e-06, "loss": 0.7846, "step": 11028 }, { "epoch": 1.9833677964577903, "grad_norm": 1.488698124885559, "learning_rate": 6.870152532658843e-06, "loss": 0.7204, "step": 11029 }, { "epoch": 1.983547604063652, "grad_norm": 1.3803688287734985, "learning_rate": 6.869612370815635e-06, "loss": 0.6626, "step": 11030 }, { "epoch": 1.9837274116695136, "grad_norm": 1.5501610040664673, "learning_rate": 6.8690721836042416e-06, "loss": 0.7195, "step": 11031 }, { "epoch": 1.9839072192753755, "grad_norm": 1.6374578475952148, "learning_rate": 6.8685319710319895e-06, "loss": 0.6498, "step": 11032 }, { "epoch": 1.984087026881237, "grad_norm": 1.4749279022216797, "learning_rate": 6.867991733106212e-06, "loss": 0.7111, "step": 11033 }, { "epoch": 1.9842668344870988, "grad_norm": 1.463220477104187, "learning_rate": 6.867451469834237e-06, "loss": 0.7503, "step": 11034 }, { "epoch": 1.9844466420929605, "grad_norm": 1.4782958030700684, "learning_rate": 6.866911181223396e-06, "loss": 0.7175, "step": 11035 }, { "epoch": 1.9846264496988222, "grad_norm": 1.517030954360962, "learning_rate": 6.86637086728102e-06, "loss": 0.6983, "step": 11036 }, { "epoch": 1.984806257304684, "grad_norm": 1.7883166074752808, "learning_rate": 6.865830528014441e-06, "loss": 0.7863, "step": 11037 }, { "epoch": 1.9849860649105457, "grad_norm": 1.6212528944015503, "learning_rate": 6.865290163430989e-06, "loss": 0.6803, "step": 11038 }, { "epoch": 1.9851658725164074, "grad_norm": 0.9904396533966064, "learning_rate": 6.864749773537998e-06, "loss": 0.9122, "step": 11039 }, { "epoch": 1.9853456801222693, "grad_norm": 1.4290995597839355, "learning_rate": 6.864209358342797e-06, "loss": 0.7511, "step": 11040 }, { "epoch": 1.9855254877281308, "grad_norm": 1.5347466468811035, "learning_rate": 6.863668917852724e-06, "loss": 0.8035, "step": 11041 }, { "epoch": 1.9857052953339926, "grad_norm": 1.396988868713379, "learning_rate": 6.863128452075107e-06, "loss": 0.7702, "step": 11042 }, { "epoch": 1.9858851029398543, "grad_norm": 1.1086992025375366, "learning_rate": 6.862587961017283e-06, "loss": 0.8838, "step": 11043 }, { "epoch": 1.986064910545716, "grad_norm": 1.4933618307113647, "learning_rate": 6.862047444686584e-06, "loss": 0.7651, "step": 11044 }, { "epoch": 1.9862447181515779, "grad_norm": 1.489985466003418, "learning_rate": 6.861506903090343e-06, "loss": 0.7643, "step": 11045 }, { "epoch": 1.9864245257574396, "grad_norm": 1.3299927711486816, "learning_rate": 6.860966336235897e-06, "loss": 0.8938, "step": 11046 }, { "epoch": 1.9866043333633012, "grad_norm": 1.5123481750488281, "learning_rate": 6.860425744130581e-06, "loss": 0.6924, "step": 11047 }, { "epoch": 1.9867841409691631, "grad_norm": 1.2254880666732788, "learning_rate": 6.8598851267817265e-06, "loss": 0.8624, "step": 11048 }, { "epoch": 1.9869639485750246, "grad_norm": 1.4561922550201416, "learning_rate": 6.859344484196673e-06, "loss": 0.7308, "step": 11049 }, { "epoch": 1.9871437561808865, "grad_norm": 1.3909294605255127, "learning_rate": 6.858803816382753e-06, "loss": 0.6712, "step": 11050 }, { "epoch": 1.9873235637867481, "grad_norm": 1.5503604412078857, "learning_rate": 6.858263123347307e-06, "loss": 0.734, "step": 11051 }, { "epoch": 1.9875033713926098, "grad_norm": 1.6084636449813843, "learning_rate": 6.857722405097666e-06, "loss": 0.7799, "step": 11052 }, { "epoch": 1.9876831789984717, "grad_norm": 1.5121548175811768, "learning_rate": 6.8571816616411705e-06, "loss": 0.7087, "step": 11053 }, { "epoch": 1.9878629866043334, "grad_norm": 1.4435967206954956, "learning_rate": 6.8566408929851555e-06, "loss": 0.706, "step": 11054 }, { "epoch": 1.988042794210195, "grad_norm": 1.5026301145553589, "learning_rate": 6.856100099136962e-06, "loss": 0.7075, "step": 11055 }, { "epoch": 1.988222601816057, "grad_norm": 1.472647786140442, "learning_rate": 6.855559280103923e-06, "loss": 0.7053, "step": 11056 }, { "epoch": 1.9884024094219186, "grad_norm": 1.5707565546035767, "learning_rate": 6.855018435893381e-06, "loss": 0.7393, "step": 11057 }, { "epoch": 1.9885822170277803, "grad_norm": 1.5976258516311646, "learning_rate": 6.854477566512673e-06, "loss": 0.7776, "step": 11058 }, { "epoch": 1.9887620246336422, "grad_norm": 1.0568852424621582, "learning_rate": 6.853936671969138e-06, "loss": 0.9043, "step": 11059 }, { "epoch": 1.9889418322395036, "grad_norm": 1.4650336503982544, "learning_rate": 6.853395752270113e-06, "loss": 0.7886, "step": 11060 }, { "epoch": 1.9891216398453655, "grad_norm": 1.416238784790039, "learning_rate": 6.8528548074229415e-06, "loss": 0.6746, "step": 11061 }, { "epoch": 1.9893014474512272, "grad_norm": 1.4760841131210327, "learning_rate": 6.8523138374349604e-06, "loss": 0.669, "step": 11062 }, { "epoch": 1.9894812550570888, "grad_norm": 1.4729411602020264, "learning_rate": 6.851772842313513e-06, "loss": 0.7581, "step": 11063 }, { "epoch": 1.9896610626629507, "grad_norm": 0.9932333827018738, "learning_rate": 6.851231822065936e-06, "loss": 0.9043, "step": 11064 }, { "epoch": 1.9898408702688124, "grad_norm": 1.402089238166809, "learning_rate": 6.850690776699574e-06, "loss": 0.6747, "step": 11065 }, { "epoch": 1.990020677874674, "grad_norm": 1.6586774587631226, "learning_rate": 6.850149706221764e-06, "loss": 0.7227, "step": 11066 }, { "epoch": 1.990200485480536, "grad_norm": 1.5754318237304688, "learning_rate": 6.8496086106398505e-06, "loss": 0.741, "step": 11067 }, { "epoch": 1.9903802930863974, "grad_norm": 1.6603835821151733, "learning_rate": 6.849067489961176e-06, "loss": 0.7479, "step": 11068 }, { "epoch": 1.9905601006922593, "grad_norm": 1.00852370262146, "learning_rate": 6.8485263441930824e-06, "loss": 0.9528, "step": 11069 }, { "epoch": 1.990739908298121, "grad_norm": 1.5621381998062134, "learning_rate": 6.84798517334291e-06, "loss": 0.7439, "step": 11070 }, { "epoch": 1.9909197159039826, "grad_norm": 1.5894030332565308, "learning_rate": 6.847443977418005e-06, "loss": 0.7384, "step": 11071 }, { "epoch": 1.9910995235098445, "grad_norm": 1.6320979595184326, "learning_rate": 6.846902756425709e-06, "loss": 0.7627, "step": 11072 }, { "epoch": 1.9912793311157062, "grad_norm": 1.5582773685455322, "learning_rate": 6.846361510373367e-06, "loss": 0.7081, "step": 11073 }, { "epoch": 1.9914591387215679, "grad_norm": 1.4437987804412842, "learning_rate": 6.845820239268321e-06, "loss": 0.7699, "step": 11074 }, { "epoch": 1.9916389463274298, "grad_norm": 1.459993600845337, "learning_rate": 6.845278943117917e-06, "loss": 0.7321, "step": 11075 }, { "epoch": 1.9918187539332912, "grad_norm": 1.5008655786514282, "learning_rate": 6.844737621929498e-06, "loss": 0.7579, "step": 11076 }, { "epoch": 1.9919985615391531, "grad_norm": 1.468339443206787, "learning_rate": 6.8441962757104105e-06, "loss": 0.7585, "step": 11077 }, { "epoch": 1.9921783691450148, "grad_norm": 1.5181119441986084, "learning_rate": 6.843654904467999e-06, "loss": 0.7385, "step": 11078 }, { "epoch": 1.9923581767508765, "grad_norm": 1.4802501201629639, "learning_rate": 6.84311350820961e-06, "loss": 0.7313, "step": 11079 }, { "epoch": 1.9925379843567383, "grad_norm": 1.5629535913467407, "learning_rate": 6.842572086942589e-06, "loss": 0.8002, "step": 11080 }, { "epoch": 1.9927177919626, "grad_norm": 1.4882181882858276, "learning_rate": 6.842030640674283e-06, "loss": 0.6934, "step": 11081 }, { "epoch": 1.9928975995684617, "grad_norm": 1.5105408430099487, "learning_rate": 6.841489169412036e-06, "loss": 0.6544, "step": 11082 }, { "epoch": 1.9930774071743236, "grad_norm": 1.515045404434204, "learning_rate": 6.840947673163201e-06, "loss": 0.6978, "step": 11083 }, { "epoch": 1.9932572147801852, "grad_norm": 1.425992488861084, "learning_rate": 6.84040615193512e-06, "loss": 0.7377, "step": 11084 }, { "epoch": 1.993437022386047, "grad_norm": 1.4342821836471558, "learning_rate": 6.839864605735141e-06, "loss": 0.6844, "step": 11085 }, { "epoch": 1.9936168299919088, "grad_norm": 1.4519871473312378, "learning_rate": 6.839323034570615e-06, "loss": 0.8167, "step": 11086 }, { "epoch": 1.9937966375977703, "grad_norm": 1.8551801443099976, "learning_rate": 6.838781438448888e-06, "loss": 0.7856, "step": 11087 }, { "epoch": 1.9939764452036322, "grad_norm": 1.4299736022949219, "learning_rate": 6.83823981737731e-06, "loss": 0.7015, "step": 11088 }, { "epoch": 1.9941562528094938, "grad_norm": 1.212161660194397, "learning_rate": 6.83769817136323e-06, "loss": 0.9214, "step": 11089 }, { "epoch": 1.9943360604153555, "grad_norm": 1.6847959756851196, "learning_rate": 6.837156500413995e-06, "loss": 0.7617, "step": 11090 }, { "epoch": 1.9945158680212174, "grad_norm": 1.4689406156539917, "learning_rate": 6.836614804536959e-06, "loss": 0.9507, "step": 11091 }, { "epoch": 1.994695675627079, "grad_norm": 1.501308560371399, "learning_rate": 6.8360730837394695e-06, "loss": 0.7368, "step": 11092 }, { "epoch": 1.9948754832329407, "grad_norm": 1.4350529909133911, "learning_rate": 6.835531338028879e-06, "loss": 0.7894, "step": 11093 }, { "epoch": 1.9950552908388026, "grad_norm": 1.4294536113739014, "learning_rate": 6.8349895674125344e-06, "loss": 0.7558, "step": 11094 }, { "epoch": 1.995235098444664, "grad_norm": 1.423492193222046, "learning_rate": 6.8344477718977905e-06, "loss": 0.7182, "step": 11095 }, { "epoch": 1.995414906050526, "grad_norm": 1.0645297765731812, "learning_rate": 6.833905951491997e-06, "loss": 0.9538, "step": 11096 }, { "epoch": 1.9955947136563876, "grad_norm": 1.5729373693466187, "learning_rate": 6.833364106202506e-06, "loss": 0.7794, "step": 11097 }, { "epoch": 1.9957745212622493, "grad_norm": 1.406935214996338, "learning_rate": 6.8328222360366696e-06, "loss": 0.7854, "step": 11098 }, { "epoch": 1.9959543288681112, "grad_norm": 1.4615659713745117, "learning_rate": 6.83228034100184e-06, "loss": 0.7831, "step": 11099 }, { "epoch": 1.9961341364739729, "grad_norm": 1.4578720331192017, "learning_rate": 6.8317384211053706e-06, "loss": 0.799, "step": 11100 }, { "epoch": 1.9963139440798345, "grad_norm": 1.6781046390533447, "learning_rate": 6.831196476354615e-06, "loss": 0.6831, "step": 11101 }, { "epoch": 1.9964937516856964, "grad_norm": 1.434567928314209, "learning_rate": 6.830654506756925e-06, "loss": 0.6591, "step": 11102 }, { "epoch": 1.9966735592915579, "grad_norm": 1.388749122619629, "learning_rate": 6.830112512319656e-06, "loss": 0.6918, "step": 11103 }, { "epoch": 1.9968533668974198, "grad_norm": 1.384181261062622, "learning_rate": 6.8295704930501615e-06, "loss": 0.7645, "step": 11104 }, { "epoch": 1.9970331745032814, "grad_norm": 1.435779333114624, "learning_rate": 6.829028448955795e-06, "loss": 0.7126, "step": 11105 }, { "epoch": 1.997212982109143, "grad_norm": 1.6994699239730835, "learning_rate": 6.828486380043915e-06, "loss": 0.7404, "step": 11106 }, { "epoch": 1.997392789715005, "grad_norm": 1.1082663536071777, "learning_rate": 6.827944286321871e-06, "loss": 0.9232, "step": 11107 }, { "epoch": 1.9975725973208667, "grad_norm": 1.5429316759109497, "learning_rate": 6.827402167797024e-06, "loss": 0.7278, "step": 11108 }, { "epoch": 1.9977524049267283, "grad_norm": 1.0560106039047241, "learning_rate": 6.826860024476726e-06, "loss": 0.9056, "step": 11109 }, { "epoch": 1.9979322125325902, "grad_norm": 1.6200038194656372, "learning_rate": 6.826317856368336e-06, "loss": 0.6991, "step": 11110 }, { "epoch": 1.998112020138452, "grad_norm": 1.56877601146698, "learning_rate": 6.8257756634792075e-06, "loss": 0.7291, "step": 11111 }, { "epoch": 1.9982918277443136, "grad_norm": 1.4987815618515015, "learning_rate": 6.825233445816699e-06, "loss": 0.7102, "step": 11112 }, { "epoch": 1.9984716353501755, "grad_norm": 1.2543370723724365, "learning_rate": 6.824691203388168e-06, "loss": 0.9521, "step": 11113 }, { "epoch": 1.998651442956037, "grad_norm": 1.1094207763671875, "learning_rate": 6.824148936200971e-06, "loss": 0.9549, "step": 11114 }, { "epoch": 1.9988312505618988, "grad_norm": 1.4640706777572632, "learning_rate": 6.823606644262467e-06, "loss": 0.7054, "step": 11115 }, { "epoch": 1.9990110581677605, "grad_norm": 0.9786985516548157, "learning_rate": 6.823064327580015e-06, "loss": 0.9198, "step": 11116 }, { "epoch": 1.9991908657736221, "grad_norm": 1.5877877473831177, "learning_rate": 6.82252198616097e-06, "loss": 0.7294, "step": 11117 }, { "epoch": 1.999370673379484, "grad_norm": 1.5562161207199097, "learning_rate": 6.821979620012696e-06, "loss": 0.7358, "step": 11118 }, { "epoch": 1.9995504809853457, "grad_norm": 1.481863021850586, "learning_rate": 6.821437229142545e-06, "loss": 0.7168, "step": 11119 }, { "epoch": 1.9997302885912074, "grad_norm": 1.1110146045684814, "learning_rate": 6.820894813557885e-06, "loss": 0.8721, "step": 11120 }, { "epoch": 2.000179807605862, "grad_norm": 1.3735594749450684, "learning_rate": 6.820352373266068e-06, "loss": 0.6739, "step": 11121 }, { "epoch": 2.0003596152117233, "grad_norm": 1.460617184638977, "learning_rate": 6.819809908274459e-06, "loss": 0.6772, "step": 11122 }, { "epoch": 2.0005394228175852, "grad_norm": 1.4847930669784546, "learning_rate": 6.819267418590419e-06, "loss": 0.6714, "step": 11123 }, { "epoch": 2.000719230423447, "grad_norm": 1.3662217855453491, "learning_rate": 6.818724904221305e-06, "loss": 0.6675, "step": 11124 }, { "epoch": 2.0008990380293086, "grad_norm": 1.3843417167663574, "learning_rate": 6.818182365174482e-06, "loss": 0.646, "step": 11125 }, { "epoch": 2.0010788456351705, "grad_norm": 1.6789045333862305, "learning_rate": 6.817639801457311e-06, "loss": 0.6764, "step": 11126 }, { "epoch": 2.001258653241032, "grad_norm": 1.0392532348632812, "learning_rate": 6.817097213077151e-06, "loss": 0.8828, "step": 11127 }, { "epoch": 2.001438460846894, "grad_norm": 1.4508414268493652, "learning_rate": 6.816554600041367e-06, "loss": 0.7327, "step": 11128 }, { "epoch": 2.0016182684527557, "grad_norm": 1.3185889720916748, "learning_rate": 6.81601196235732e-06, "loss": 0.6935, "step": 11129 }, { "epoch": 2.001798076058617, "grad_norm": 1.3626306056976318, "learning_rate": 6.815469300032374e-06, "loss": 0.6568, "step": 11130 }, { "epoch": 2.001977883664479, "grad_norm": 1.497225046157837, "learning_rate": 6.814926613073891e-06, "loss": 0.6157, "step": 11131 }, { "epoch": 2.002157691270341, "grad_norm": 1.4123618602752686, "learning_rate": 6.8143839014892355e-06, "loss": 0.6792, "step": 11132 }, { "epoch": 2.0023374988762024, "grad_norm": 1.45771062374115, "learning_rate": 6.81384116528577e-06, "loss": 0.6576, "step": 11133 }, { "epoch": 2.0025173064820643, "grad_norm": 1.4248605966567993, "learning_rate": 6.813298404470862e-06, "loss": 0.5986, "step": 11134 }, { "epoch": 2.0026971140879257, "grad_norm": 1.3361332416534424, "learning_rate": 6.812755619051874e-06, "loss": 0.6353, "step": 11135 }, { "epoch": 2.0028769216937876, "grad_norm": 1.4830224514007568, "learning_rate": 6.812212809036171e-06, "loss": 0.6411, "step": 11136 }, { "epoch": 2.0030567292996495, "grad_norm": 1.5054196119308472, "learning_rate": 6.811669974431117e-06, "loss": 0.6798, "step": 11137 }, { "epoch": 2.003236536905511, "grad_norm": 1.2021291255950928, "learning_rate": 6.8111271152440786e-06, "loss": 0.8817, "step": 11138 }, { "epoch": 2.003416344511373, "grad_norm": 1.4268760681152344, "learning_rate": 6.810584231482422e-06, "loss": 0.6253, "step": 11139 }, { "epoch": 2.0035961521172347, "grad_norm": 1.3426034450531006, "learning_rate": 6.810041323153514e-06, "loss": 0.6211, "step": 11140 }, { "epoch": 2.003775959723096, "grad_norm": 1.4226889610290527, "learning_rate": 6.809498390264718e-06, "loss": 0.6731, "step": 11141 }, { "epoch": 2.003955767328958, "grad_norm": 1.4765530824661255, "learning_rate": 6.8089554328234054e-06, "loss": 0.6413, "step": 11142 }, { "epoch": 2.0041355749348195, "grad_norm": 1.2493613958358765, "learning_rate": 6.80841245083694e-06, "loss": 0.8847, "step": 11143 }, { "epoch": 2.0043153825406814, "grad_norm": 1.594893455505371, "learning_rate": 6.80786944431269e-06, "loss": 0.6617, "step": 11144 }, { "epoch": 2.0044951901465433, "grad_norm": 1.1073604822158813, "learning_rate": 6.807326413258024e-06, "loss": 0.9041, "step": 11145 }, { "epoch": 2.0046749977524048, "grad_norm": 1.486133337020874, "learning_rate": 6.806783357680311e-06, "loss": 0.6773, "step": 11146 }, { "epoch": 2.0048548053582667, "grad_norm": 1.4714484214782715, "learning_rate": 6.806240277586919e-06, "loss": 0.6585, "step": 11147 }, { "epoch": 2.0050346129641285, "grad_norm": 1.5997474193572998, "learning_rate": 6.805697172985215e-06, "loss": 0.7097, "step": 11148 }, { "epoch": 2.00521442056999, "grad_norm": 1.3531832695007324, "learning_rate": 6.80515404388257e-06, "loss": 0.7065, "step": 11149 }, { "epoch": 2.005394228175852, "grad_norm": 1.0054067373275757, "learning_rate": 6.804610890286354e-06, "loss": 0.8694, "step": 11150 }, { "epoch": 2.005574035781714, "grad_norm": 1.3979018926620483, "learning_rate": 6.8040677122039354e-06, "loss": 0.6732, "step": 11151 }, { "epoch": 2.0057538433875752, "grad_norm": 1.0351120233535767, "learning_rate": 6.803524509642686e-06, "loss": 0.8931, "step": 11152 }, { "epoch": 2.005933650993437, "grad_norm": 1.4178146123886108, "learning_rate": 6.802981282609975e-06, "loss": 0.6561, "step": 11153 }, { "epoch": 2.0061134585992986, "grad_norm": 1.5008522272109985, "learning_rate": 6.802438031113174e-06, "loss": 0.6933, "step": 11154 }, { "epoch": 2.0062932662051605, "grad_norm": 1.5078436136245728, "learning_rate": 6.801894755159653e-06, "loss": 0.6266, "step": 11155 }, { "epoch": 2.0064730738110224, "grad_norm": 1.553615927696228, "learning_rate": 6.801351454756785e-06, "loss": 0.6497, "step": 11156 }, { "epoch": 2.006652881416884, "grad_norm": 1.890998363494873, "learning_rate": 6.800808129911941e-06, "loss": 0.7233, "step": 11157 }, { "epoch": 2.0068326890227457, "grad_norm": 1.0194453001022339, "learning_rate": 6.800264780632495e-06, "loss": 0.8592, "step": 11158 }, { "epoch": 2.0070124966286076, "grad_norm": 1.1677387952804565, "learning_rate": 6.7997214069258166e-06, "loss": 0.8501, "step": 11159 }, { "epoch": 2.007192304234469, "grad_norm": 1.1204097270965576, "learning_rate": 6.7991780087992805e-06, "loss": 0.9038, "step": 11160 }, { "epoch": 2.007372111840331, "grad_norm": 1.5024118423461914, "learning_rate": 6.79863458626026e-06, "loss": 0.7181, "step": 11161 }, { "epoch": 2.0075519194461924, "grad_norm": 2.2083446979522705, "learning_rate": 6.798091139316128e-06, "loss": 0.6425, "step": 11162 }, { "epoch": 2.0077317270520543, "grad_norm": 1.4852099418640137, "learning_rate": 6.797547667974259e-06, "loss": 0.6146, "step": 11163 }, { "epoch": 2.007911534657916, "grad_norm": 1.4741142988204956, "learning_rate": 6.797004172242028e-06, "loss": 0.6449, "step": 11164 }, { "epoch": 2.0080913422637776, "grad_norm": 1.0433241128921509, "learning_rate": 6.796460652126805e-06, "loss": 0.8665, "step": 11165 }, { "epoch": 2.0082711498696395, "grad_norm": 1.4758973121643066, "learning_rate": 6.79591710763597e-06, "loss": 0.7557, "step": 11166 }, { "epoch": 2.0084509574755014, "grad_norm": 1.1608506441116333, "learning_rate": 6.795373538776896e-06, "loss": 0.8418, "step": 11167 }, { "epoch": 2.008630765081363, "grad_norm": 1.479368805885315, "learning_rate": 6.79482994555696e-06, "loss": 0.6985, "step": 11168 }, { "epoch": 2.0088105726872247, "grad_norm": 1.4298570156097412, "learning_rate": 6.794286327983534e-06, "loss": 0.6654, "step": 11169 }, { "epoch": 2.008990380293086, "grad_norm": 1.0638601779937744, "learning_rate": 6.793742686064e-06, "loss": 0.8576, "step": 11170 }, { "epoch": 2.009170187898948, "grad_norm": 1.472361445426941, "learning_rate": 6.7931990198057295e-06, "loss": 0.6511, "step": 11171 }, { "epoch": 2.00934999550481, "grad_norm": 1.595489501953125, "learning_rate": 6.792655329216102e-06, "loss": 0.6522, "step": 11172 }, { "epoch": 2.0095298031106714, "grad_norm": 1.4847033023834229, "learning_rate": 6.792111614302494e-06, "loss": 0.6751, "step": 11173 }, { "epoch": 2.0097096107165333, "grad_norm": 1.4499584436416626, "learning_rate": 6.791567875072282e-06, "loss": 0.724, "step": 11174 }, { "epoch": 2.009889418322395, "grad_norm": 1.0709140300750732, "learning_rate": 6.791024111532845e-06, "loss": 0.8575, "step": 11175 }, { "epoch": 2.0100692259282567, "grad_norm": 1.5583983659744263, "learning_rate": 6.790480323691562e-06, "loss": 0.7695, "step": 11176 }, { "epoch": 2.0102490335341185, "grad_norm": 1.5726982355117798, "learning_rate": 6.789936511555808e-06, "loss": 0.7978, "step": 11177 }, { "epoch": 2.0104288411399804, "grad_norm": 1.4897992610931396, "learning_rate": 6.789392675132967e-06, "loss": 0.715, "step": 11178 }, { "epoch": 2.010608648745842, "grad_norm": 1.519355058670044, "learning_rate": 6.788848814430413e-06, "loss": 0.7069, "step": 11179 }, { "epoch": 2.010788456351704, "grad_norm": 1.426159143447876, "learning_rate": 6.7883049294555295e-06, "loss": 0.6859, "step": 11180 }, { "epoch": 2.0109682639575652, "grad_norm": 1.5480470657348633, "learning_rate": 6.787761020215693e-06, "loss": 0.6496, "step": 11181 }, { "epoch": 2.011148071563427, "grad_norm": 1.5300309658050537, "learning_rate": 6.787217086718288e-06, "loss": 0.7027, "step": 11182 }, { "epoch": 2.011327879169289, "grad_norm": 1.4806829690933228, "learning_rate": 6.786673128970689e-06, "loss": 0.6481, "step": 11183 }, { "epoch": 2.0115076867751505, "grad_norm": 1.656105399131775, "learning_rate": 6.786129146980283e-06, "loss": 0.6911, "step": 11184 }, { "epoch": 2.0116874943810124, "grad_norm": 1.5413206815719604, "learning_rate": 6.785585140754445e-06, "loss": 0.7204, "step": 11185 }, { "epoch": 2.0118673019868742, "grad_norm": 1.5337406396865845, "learning_rate": 6.785041110300561e-06, "loss": 0.6377, "step": 11186 }, { "epoch": 2.0120471095927357, "grad_norm": 1.5394010543823242, "learning_rate": 6.784497055626012e-06, "loss": 0.6422, "step": 11187 }, { "epoch": 2.0122269171985976, "grad_norm": 1.589060664176941, "learning_rate": 6.7839529767381785e-06, "loss": 0.6743, "step": 11188 }, { "epoch": 2.012406724804459, "grad_norm": 1.4695395231246948, "learning_rate": 6.7834088736444435e-06, "loss": 0.6245, "step": 11189 }, { "epoch": 2.012586532410321, "grad_norm": 1.5764150619506836, "learning_rate": 6.782864746352191e-06, "loss": 0.6925, "step": 11190 }, { "epoch": 2.012766340016183, "grad_norm": 1.5025781393051147, "learning_rate": 6.782320594868803e-06, "loss": 0.7332, "step": 11191 }, { "epoch": 2.0129461476220443, "grad_norm": 1.422508955001831, "learning_rate": 6.781776419201664e-06, "loss": 0.6105, "step": 11192 }, { "epoch": 2.013125955227906, "grad_norm": 1.4786896705627441, "learning_rate": 6.781232219358156e-06, "loss": 0.6656, "step": 11193 }, { "epoch": 2.013305762833768, "grad_norm": 1.490911602973938, "learning_rate": 6.780687995345665e-06, "loss": 0.7252, "step": 11194 }, { "epoch": 2.0134855704396295, "grad_norm": 1.410660743713379, "learning_rate": 6.780143747171573e-06, "loss": 0.635, "step": 11195 }, { "epoch": 2.0136653780454914, "grad_norm": 1.4672868251800537, "learning_rate": 6.779599474843268e-06, "loss": 0.6635, "step": 11196 }, { "epoch": 2.013845185651353, "grad_norm": 1.6096899509429932, "learning_rate": 6.779055178368131e-06, "loss": 0.6091, "step": 11197 }, { "epoch": 2.0140249932572147, "grad_norm": 1.4116500616073608, "learning_rate": 6.7785108577535505e-06, "loss": 0.6639, "step": 11198 }, { "epoch": 2.0142048008630766, "grad_norm": 1.3886526823043823, "learning_rate": 6.77796651300691e-06, "loss": 0.6467, "step": 11199 }, { "epoch": 2.014384608468938, "grad_norm": 1.4444162845611572, "learning_rate": 6.7774221441356e-06, "loss": 0.624, "step": 11200 }, { "epoch": 2.0145644160748, "grad_norm": 1.557566523551941, "learning_rate": 6.7768777511470014e-06, "loss": 0.7158, "step": 11201 }, { "epoch": 2.014744223680662, "grad_norm": 1.415872573852539, "learning_rate": 6.776333334048505e-06, "loss": 0.7154, "step": 11202 }, { "epoch": 2.0149240312865233, "grad_norm": 1.4761513471603394, "learning_rate": 6.775788892847495e-06, "loss": 0.6823, "step": 11203 }, { "epoch": 2.015103838892385, "grad_norm": 1.4779343605041504, "learning_rate": 6.7752444275513594e-06, "loss": 0.6513, "step": 11204 }, { "epoch": 2.015283646498247, "grad_norm": 1.4665294885635376, "learning_rate": 6.7746999381674865e-06, "loss": 0.677, "step": 11205 }, { "epoch": 2.0154634541041085, "grad_norm": 1.3735407590866089, "learning_rate": 6.774155424703264e-06, "loss": 0.6649, "step": 11206 }, { "epoch": 2.0156432617099704, "grad_norm": 1.4569036960601807, "learning_rate": 6.77361088716608e-06, "loss": 0.6804, "step": 11207 }, { "epoch": 2.015823069315832, "grad_norm": 1.4498140811920166, "learning_rate": 6.7730663255633245e-06, "loss": 0.6429, "step": 11208 }, { "epoch": 2.0160028769216938, "grad_norm": 1.159297227859497, "learning_rate": 6.772521739902385e-06, "loss": 0.8717, "step": 11209 }, { "epoch": 2.0161826845275557, "grad_norm": 1.480883002281189, "learning_rate": 6.77197713019065e-06, "loss": 0.6425, "step": 11210 }, { "epoch": 2.016362492133417, "grad_norm": 1.0241131782531738, "learning_rate": 6.7714324964355115e-06, "loss": 0.8942, "step": 11211 }, { "epoch": 2.016542299739279, "grad_norm": 1.1157186031341553, "learning_rate": 6.77088783864436e-06, "loss": 0.8877, "step": 11212 }, { "epoch": 2.016722107345141, "grad_norm": 1.1706187725067139, "learning_rate": 6.770343156824581e-06, "loss": 0.8197, "step": 11213 }, { "epoch": 2.0169019149510024, "grad_norm": 1.4623003005981445, "learning_rate": 6.769798450983571e-06, "loss": 0.6401, "step": 11214 }, { "epoch": 2.0170817225568642, "grad_norm": 1.574325680732727, "learning_rate": 6.769253721128717e-06, "loss": 0.7084, "step": 11215 }, { "epoch": 2.0172615301627257, "grad_norm": 1.5580421686172485, "learning_rate": 6.768708967267412e-06, "loss": 0.6326, "step": 11216 }, { "epoch": 2.0174413377685876, "grad_norm": 1.4924962520599365, "learning_rate": 6.768164189407047e-06, "loss": 0.6569, "step": 11217 }, { "epoch": 2.0176211453744495, "grad_norm": 1.5082305669784546, "learning_rate": 6.7676193875550145e-06, "loss": 0.7085, "step": 11218 }, { "epoch": 2.017800952980311, "grad_norm": 2.825094699859619, "learning_rate": 6.767074561718705e-06, "loss": 0.6413, "step": 11219 }, { "epoch": 2.017980760586173, "grad_norm": 1.5192947387695312, "learning_rate": 6.766529711905513e-06, "loss": 0.6724, "step": 11220 }, { "epoch": 2.0181605681920347, "grad_norm": 1.1316393613815308, "learning_rate": 6.76598483812283e-06, "loss": 0.8337, "step": 11221 }, { "epoch": 2.018340375797896, "grad_norm": 1.4410630464553833, "learning_rate": 6.765439940378051e-06, "loss": 0.6696, "step": 11222 }, { "epoch": 2.018520183403758, "grad_norm": 1.4512193202972412, "learning_rate": 6.764895018678568e-06, "loss": 0.667, "step": 11223 }, { "epoch": 2.0186999910096195, "grad_norm": 1.208032488822937, "learning_rate": 6.764350073031776e-06, "loss": 0.8757, "step": 11224 }, { "epoch": 2.0188797986154814, "grad_norm": 1.442264437675476, "learning_rate": 6.763805103445067e-06, "loss": 0.7324, "step": 11225 }, { "epoch": 2.0190596062213433, "grad_norm": 1.5045846700668335, "learning_rate": 6.76326010992584e-06, "loss": 0.6046, "step": 11226 }, { "epoch": 2.0192394138272047, "grad_norm": 1.5186269283294678, "learning_rate": 6.762715092481485e-06, "loss": 0.7384, "step": 11227 }, { "epoch": 2.0194192214330666, "grad_norm": 1.6045628786087036, "learning_rate": 6.762170051119398e-06, "loss": 0.6504, "step": 11228 }, { "epoch": 2.0195990290389285, "grad_norm": 1.0891059637069702, "learning_rate": 6.761624985846977e-06, "loss": 0.8515, "step": 11229 }, { "epoch": 2.01977883664479, "grad_norm": 1.5736500024795532, "learning_rate": 6.761079896671616e-06, "loss": 0.7047, "step": 11230 }, { "epoch": 2.019958644250652, "grad_norm": 1.480804681777954, "learning_rate": 6.760534783600712e-06, "loss": 0.6535, "step": 11231 }, { "epoch": 2.0201384518565138, "grad_norm": 1.4465874433517456, "learning_rate": 6.75998964664166e-06, "loss": 0.6387, "step": 11232 }, { "epoch": 2.020318259462375, "grad_norm": 1.614319086074829, "learning_rate": 6.75944448580186e-06, "loss": 0.6967, "step": 11233 }, { "epoch": 2.020498067068237, "grad_norm": 1.5007290840148926, "learning_rate": 6.758899301088705e-06, "loss": 0.7497, "step": 11234 }, { "epoch": 2.0206778746740985, "grad_norm": 1.1367864608764648, "learning_rate": 6.758354092509596e-06, "loss": 0.8671, "step": 11235 }, { "epoch": 2.0208576822799604, "grad_norm": 1.4461030960083008, "learning_rate": 6.757808860071929e-06, "loss": 0.6749, "step": 11236 }, { "epoch": 2.0210374898858223, "grad_norm": 1.666469693183899, "learning_rate": 6.7572636037831005e-06, "loss": 0.7152, "step": 11237 }, { "epoch": 2.0212172974916838, "grad_norm": 1.3517168760299683, "learning_rate": 6.756718323650512e-06, "loss": 0.6937, "step": 11238 }, { "epoch": 2.0213971050975457, "grad_norm": 1.6806309223175049, "learning_rate": 6.756173019681561e-06, "loss": 0.6051, "step": 11239 }, { "epoch": 2.0215769127034076, "grad_norm": 1.0983816385269165, "learning_rate": 6.755627691883646e-06, "loss": 0.8879, "step": 11240 }, { "epoch": 2.021756720309269, "grad_norm": 1.1063865423202515, "learning_rate": 6.755082340264167e-06, "loss": 0.8728, "step": 11241 }, { "epoch": 2.021936527915131, "grad_norm": 1.5178519487380981, "learning_rate": 6.7545369648305236e-06, "loss": 0.6469, "step": 11242 }, { "epoch": 2.0221163355209923, "grad_norm": 1.4295111894607544, "learning_rate": 6.753991565590114e-06, "loss": 0.7087, "step": 11243 }, { "epoch": 2.0222961431268542, "grad_norm": 1.439882516860962, "learning_rate": 6.753446142550343e-06, "loss": 0.6245, "step": 11244 }, { "epoch": 2.022475950732716, "grad_norm": 1.5743327140808105, "learning_rate": 6.752900695718607e-06, "loss": 0.6661, "step": 11245 }, { "epoch": 2.0226557583385776, "grad_norm": 1.3956791162490845, "learning_rate": 6.752355225102309e-06, "loss": 0.6132, "step": 11246 }, { "epoch": 2.0228355659444395, "grad_norm": 1.9065366983413696, "learning_rate": 6.751809730708851e-06, "loss": 0.7015, "step": 11247 }, { "epoch": 2.0230153735503014, "grad_norm": 1.2705432176589966, "learning_rate": 6.751264212545633e-06, "loss": 0.895, "step": 11248 }, { "epoch": 2.023195181156163, "grad_norm": 1.4700231552124023, "learning_rate": 6.7507186706200575e-06, "loss": 0.677, "step": 11249 }, { "epoch": 2.0233749887620247, "grad_norm": 1.70515775680542, "learning_rate": 6.750173104939526e-06, "loss": 0.6532, "step": 11250 }, { "epoch": 2.023554796367886, "grad_norm": 1.6023353338241577, "learning_rate": 6.749627515511443e-06, "loss": 0.7054, "step": 11251 }, { "epoch": 2.023734603973748, "grad_norm": 1.5265029668807983, "learning_rate": 6.749081902343209e-06, "loss": 0.6625, "step": 11252 }, { "epoch": 2.02391441157961, "grad_norm": 1.4345612525939941, "learning_rate": 6.7485362654422296e-06, "loss": 0.6711, "step": 11253 }, { "epoch": 2.0240942191854714, "grad_norm": 1.5010390281677246, "learning_rate": 6.747990604815907e-06, "loss": 0.7409, "step": 11254 }, { "epoch": 2.0242740267913333, "grad_norm": 1.5463823080062866, "learning_rate": 6.747444920471646e-06, "loss": 0.7006, "step": 11255 }, { "epoch": 2.024453834397195, "grad_norm": 1.6226493120193481, "learning_rate": 6.74689921241685e-06, "loss": 0.6374, "step": 11256 }, { "epoch": 2.0246336420030566, "grad_norm": 1.56220281124115, "learning_rate": 6.746353480658925e-06, "loss": 0.7569, "step": 11257 }, { "epoch": 2.0248134496089185, "grad_norm": 1.5095603466033936, "learning_rate": 6.745807725205273e-06, "loss": 0.6887, "step": 11258 }, { "epoch": 2.0249932572147804, "grad_norm": 1.5027978420257568, "learning_rate": 6.745261946063302e-06, "loss": 0.6691, "step": 11259 }, { "epoch": 2.025173064820642, "grad_norm": 1.4240041971206665, "learning_rate": 6.744716143240415e-06, "loss": 0.6517, "step": 11260 }, { "epoch": 2.0253528724265037, "grad_norm": 1.1833081245422363, "learning_rate": 6.744170316744021e-06, "loss": 0.8329, "step": 11261 }, { "epoch": 2.025532680032365, "grad_norm": 1.0643457174301147, "learning_rate": 6.743624466581524e-06, "loss": 0.8686, "step": 11262 }, { "epoch": 2.025712487638227, "grad_norm": 1.4703820943832397, "learning_rate": 6.743078592760329e-06, "loss": 0.647, "step": 11263 }, { "epoch": 2.025892295244089, "grad_norm": 1.6422213315963745, "learning_rate": 6.742532695287848e-06, "loss": 0.6874, "step": 11264 }, { "epoch": 2.0260721028499504, "grad_norm": 1.5876144170761108, "learning_rate": 6.7419867741714815e-06, "loss": 0.7154, "step": 11265 }, { "epoch": 2.0262519104558123, "grad_norm": 1.6499587297439575, "learning_rate": 6.741440829418642e-06, "loss": 0.6941, "step": 11266 }, { "epoch": 2.026431718061674, "grad_norm": 1.4702917337417603, "learning_rate": 6.740894861036735e-06, "loss": 0.6177, "step": 11267 }, { "epoch": 2.0266115256675357, "grad_norm": 1.6662846803665161, "learning_rate": 6.740348869033169e-06, "loss": 0.6468, "step": 11268 }, { "epoch": 2.0267913332733976, "grad_norm": 1.5603079795837402, "learning_rate": 6.739802853415354e-06, "loss": 0.7015, "step": 11269 }, { "epoch": 2.026971140879259, "grad_norm": 1.5146582126617432, "learning_rate": 6.7392568141906945e-06, "loss": 0.6887, "step": 11270 }, { "epoch": 2.027150948485121, "grad_norm": 1.5277433395385742, "learning_rate": 6.738710751366604e-06, "loss": 0.6406, "step": 11271 }, { "epoch": 2.027330756090983, "grad_norm": 1.5618793964385986, "learning_rate": 6.7381646649504886e-06, "loss": 0.6762, "step": 11272 }, { "epoch": 2.0275105636968442, "grad_norm": 1.2213772535324097, "learning_rate": 6.737618554949761e-06, "loss": 0.8755, "step": 11273 }, { "epoch": 2.027690371302706, "grad_norm": 1.5170283317565918, "learning_rate": 6.737072421371829e-06, "loss": 0.7222, "step": 11274 }, { "epoch": 2.027870178908568, "grad_norm": 1.4963327646255493, "learning_rate": 6.736526264224101e-06, "loss": 0.6233, "step": 11275 }, { "epoch": 2.0280499865144295, "grad_norm": 2.4423084259033203, "learning_rate": 6.735980083513993e-06, "loss": 0.6752, "step": 11276 }, { "epoch": 2.0282297941202914, "grad_norm": 1.5741382837295532, "learning_rate": 6.735433879248914e-06, "loss": 0.6757, "step": 11277 }, { "epoch": 2.028409601726153, "grad_norm": 1.5238569974899292, "learning_rate": 6.734887651436272e-06, "loss": 0.6831, "step": 11278 }, { "epoch": 2.0285894093320147, "grad_norm": 1.1369385719299316, "learning_rate": 6.734341400083481e-06, "loss": 0.8882, "step": 11279 }, { "epoch": 2.0287692169378766, "grad_norm": 1.4605865478515625, "learning_rate": 6.733795125197955e-06, "loss": 0.6793, "step": 11280 }, { "epoch": 2.028949024543738, "grad_norm": 1.574211835861206, "learning_rate": 6.733248826787103e-06, "loss": 0.7255, "step": 11281 }, { "epoch": 2.0291288321496, "grad_norm": 1.117437481880188, "learning_rate": 6.732702504858338e-06, "loss": 0.862, "step": 11282 }, { "epoch": 2.029308639755462, "grad_norm": 1.6287603378295898, "learning_rate": 6.732156159419074e-06, "loss": 0.6753, "step": 11283 }, { "epoch": 2.0294884473613233, "grad_norm": 1.4855279922485352, "learning_rate": 6.731609790476724e-06, "loss": 0.6779, "step": 11284 }, { "epoch": 2.029668254967185, "grad_norm": 1.4951972961425781, "learning_rate": 6.731063398038701e-06, "loss": 0.6494, "step": 11285 }, { "epoch": 2.0298480625730466, "grad_norm": 1.5738441944122314, "learning_rate": 6.730516982112418e-06, "loss": 0.6735, "step": 11286 }, { "epoch": 2.0300278701789085, "grad_norm": 1.1529287099838257, "learning_rate": 6.729970542705293e-06, "loss": 0.813, "step": 11287 }, { "epoch": 2.0302076777847704, "grad_norm": 1.6411772966384888, "learning_rate": 6.729424079824736e-06, "loss": 0.7218, "step": 11288 }, { "epoch": 2.030387485390632, "grad_norm": 1.537070393562317, "learning_rate": 6.728877593478163e-06, "loss": 0.6418, "step": 11289 }, { "epoch": 2.0305672929964937, "grad_norm": 1.5294771194458008, "learning_rate": 6.728331083672991e-06, "loss": 0.5922, "step": 11290 }, { "epoch": 2.0307471006023556, "grad_norm": 1.535576343536377, "learning_rate": 6.727784550416634e-06, "loss": 0.6758, "step": 11291 }, { "epoch": 2.030926908208217, "grad_norm": 1.485020637512207, "learning_rate": 6.727237993716507e-06, "loss": 0.6745, "step": 11292 }, { "epoch": 2.031106715814079, "grad_norm": 1.5024254322052002, "learning_rate": 6.7266914135800266e-06, "loss": 0.5818, "step": 11293 }, { "epoch": 2.031286523419941, "grad_norm": 1.4554100036621094, "learning_rate": 6.726144810014608e-06, "loss": 0.6692, "step": 11294 }, { "epoch": 2.0314663310258023, "grad_norm": 1.130881428718567, "learning_rate": 6.725598183027673e-06, "loss": 0.8273, "step": 11295 }, { "epoch": 2.031646138631664, "grad_norm": 1.3330522775650024, "learning_rate": 6.725051532626632e-06, "loss": 0.8808, "step": 11296 }, { "epoch": 2.0318259462375257, "grad_norm": 1.6804392337799072, "learning_rate": 6.724504858818906e-06, "loss": 0.6678, "step": 11297 }, { "epoch": 2.0320057538433876, "grad_norm": 1.5145810842514038, "learning_rate": 6.72395816161191e-06, "loss": 0.6511, "step": 11298 }, { "epoch": 2.0321855614492494, "grad_norm": 1.4966015815734863, "learning_rate": 6.7234114410130665e-06, "loss": 0.685, "step": 11299 }, { "epoch": 2.032365369055111, "grad_norm": 1.5082905292510986, "learning_rate": 6.722864697029789e-06, "loss": 0.6922, "step": 11300 }, { "epoch": 2.032545176660973, "grad_norm": 1.4613913297653198, "learning_rate": 6.722317929669501e-06, "loss": 0.6879, "step": 11301 }, { "epoch": 2.0327249842668347, "grad_norm": 1.5479991436004639, "learning_rate": 6.721771138939617e-06, "loss": 0.6801, "step": 11302 }, { "epoch": 2.032904791872696, "grad_norm": 1.5170350074768066, "learning_rate": 6.721224324847557e-06, "loss": 0.673, "step": 11303 }, { "epoch": 2.033084599478558, "grad_norm": 1.5240360498428345, "learning_rate": 6.7206774874007415e-06, "loss": 0.6933, "step": 11304 }, { "epoch": 2.0332644070844195, "grad_norm": 1.0654784440994263, "learning_rate": 6.720130626606593e-06, "loss": 0.8747, "step": 11305 }, { "epoch": 2.0334442146902814, "grad_norm": 1.4449665546417236, "learning_rate": 6.719583742472526e-06, "loss": 0.5943, "step": 11306 }, { "epoch": 2.0336240222961433, "grad_norm": 1.5641800165176392, "learning_rate": 6.719036835005964e-06, "loss": 0.687, "step": 11307 }, { "epoch": 2.0338038299020047, "grad_norm": 1.4418318271636963, "learning_rate": 6.718489904214328e-06, "loss": 0.6558, "step": 11308 }, { "epoch": 2.0339836375078666, "grad_norm": 1.5346640348434448, "learning_rate": 6.717942950105041e-06, "loss": 0.6909, "step": 11309 }, { "epoch": 2.0341634451137285, "grad_norm": 1.6798126697540283, "learning_rate": 6.7173959726855195e-06, "loss": 0.6903, "step": 11310 }, { "epoch": 2.03434325271959, "grad_norm": 1.4317690134048462, "learning_rate": 6.7168489719631905e-06, "loss": 0.7082, "step": 11311 }, { "epoch": 2.034523060325452, "grad_norm": 1.5212621688842773, "learning_rate": 6.716301947945472e-06, "loss": 0.5801, "step": 11312 }, { "epoch": 2.0347028679313137, "grad_norm": 1.56775963306427, "learning_rate": 6.715754900639789e-06, "loss": 0.7236, "step": 11313 }, { "epoch": 2.034882675537175, "grad_norm": 1.7832261323928833, "learning_rate": 6.7152078300535625e-06, "loss": 0.6968, "step": 11314 }, { "epoch": 2.035062483143037, "grad_norm": 1.1387358903884888, "learning_rate": 6.714660736194218e-06, "loss": 0.8403, "step": 11315 }, { "epoch": 2.0352422907488985, "grad_norm": 1.686815857887268, "learning_rate": 6.714113619069176e-06, "loss": 0.6802, "step": 11316 }, { "epoch": 2.0354220983547604, "grad_norm": 1.4027464389801025, "learning_rate": 6.713566478685861e-06, "loss": 0.7055, "step": 11317 }, { "epoch": 2.0356019059606223, "grad_norm": 1.6038998365402222, "learning_rate": 6.713019315051698e-06, "loss": 0.6975, "step": 11318 }, { "epoch": 2.0357817135664837, "grad_norm": 1.5085121393203735, "learning_rate": 6.71247212817411e-06, "loss": 0.6364, "step": 11319 }, { "epoch": 2.0359615211723456, "grad_norm": 1.541229009628296, "learning_rate": 6.711924918060521e-06, "loss": 0.6273, "step": 11320 }, { "epoch": 2.0361413287782075, "grad_norm": 1.0738025903701782, "learning_rate": 6.71137768471836e-06, "loss": 0.8487, "step": 11321 }, { "epoch": 2.036321136384069, "grad_norm": 1.4936074018478394, "learning_rate": 6.710830428155048e-06, "loss": 0.6365, "step": 11322 }, { "epoch": 2.036500943989931, "grad_norm": 1.4361374378204346, "learning_rate": 6.7102831483780115e-06, "loss": 0.6999, "step": 11323 }, { "epoch": 2.0366807515957923, "grad_norm": 1.5364128351211548, "learning_rate": 6.709735845394677e-06, "loss": 0.7295, "step": 11324 }, { "epoch": 2.036860559201654, "grad_norm": 1.546479344367981, "learning_rate": 6.709188519212472e-06, "loss": 0.6991, "step": 11325 }, { "epoch": 2.037040366807516, "grad_norm": 1.5551263093948364, "learning_rate": 6.7086411698388195e-06, "loss": 0.67, "step": 11326 }, { "epoch": 2.0372201744133775, "grad_norm": 1.0792810916900635, "learning_rate": 6.70809379728115e-06, "loss": 0.9125, "step": 11327 }, { "epoch": 2.0373999820192394, "grad_norm": 1.503182053565979, "learning_rate": 6.7075464015468875e-06, "loss": 0.6721, "step": 11328 }, { "epoch": 2.0375797896251013, "grad_norm": 1.6073849201202393, "learning_rate": 6.70699898264346e-06, "loss": 0.704, "step": 11329 }, { "epoch": 2.037759597230963, "grad_norm": 1.5054352283477783, "learning_rate": 6.706451540578298e-06, "loss": 0.7016, "step": 11330 }, { "epoch": 2.0379394048368247, "grad_norm": 1.5251551866531372, "learning_rate": 6.705904075358827e-06, "loss": 0.6642, "step": 11331 }, { "epoch": 2.038119212442686, "grad_norm": 1.5333831310272217, "learning_rate": 6.705356586992476e-06, "loss": 0.7137, "step": 11332 }, { "epoch": 2.038299020048548, "grad_norm": 1.575154185295105, "learning_rate": 6.704809075486674e-06, "loss": 0.641, "step": 11333 }, { "epoch": 2.03847882765441, "grad_norm": 1.5434033870697021, "learning_rate": 6.70426154084885e-06, "loss": 0.6662, "step": 11334 }, { "epoch": 2.0386586352602714, "grad_norm": 1.4640834331512451, "learning_rate": 6.703713983086433e-06, "loss": 0.7044, "step": 11335 }, { "epoch": 2.0388384428661332, "grad_norm": 1.5545735359191895, "learning_rate": 6.703166402206853e-06, "loss": 0.6383, "step": 11336 }, { "epoch": 2.039018250471995, "grad_norm": 1.5233575105667114, "learning_rate": 6.70261879821754e-06, "loss": 0.6805, "step": 11337 }, { "epoch": 2.0391980580778566, "grad_norm": 1.1208741664886475, "learning_rate": 6.702071171125922e-06, "loss": 0.8856, "step": 11338 }, { "epoch": 2.0393778656837185, "grad_norm": 1.5272268056869507, "learning_rate": 6.701523520939432e-06, "loss": 0.6929, "step": 11339 }, { "epoch": 2.03955767328958, "grad_norm": 1.087945580482483, "learning_rate": 6.700975847665502e-06, "loss": 0.8952, "step": 11340 }, { "epoch": 2.039737480895442, "grad_norm": 1.4067474603652954, "learning_rate": 6.700428151311562e-06, "loss": 0.6722, "step": 11341 }, { "epoch": 2.0399172885013037, "grad_norm": 1.354711890220642, "learning_rate": 6.699880431885042e-06, "loss": 0.6444, "step": 11342 }, { "epoch": 2.040097096107165, "grad_norm": 1.5971847772598267, "learning_rate": 6.6993326893933755e-06, "loss": 0.6343, "step": 11343 }, { "epoch": 2.040276903713027, "grad_norm": 1.5141935348510742, "learning_rate": 6.698784923843993e-06, "loss": 0.6486, "step": 11344 }, { "epoch": 2.040456711318889, "grad_norm": 1.422719955444336, "learning_rate": 6.698237135244329e-06, "loss": 0.6542, "step": 11345 }, { "epoch": 2.0406365189247504, "grad_norm": 1.6690688133239746, "learning_rate": 6.697689323601815e-06, "loss": 0.6241, "step": 11346 }, { "epoch": 2.0408163265306123, "grad_norm": 1.0924924612045288, "learning_rate": 6.697141488923886e-06, "loss": 0.8891, "step": 11347 }, { "epoch": 2.040996134136474, "grad_norm": 1.5482227802276611, "learning_rate": 6.696593631217973e-06, "loss": 0.6695, "step": 11348 }, { "epoch": 2.0411759417423356, "grad_norm": 1.491065263748169, "learning_rate": 6.69604575049151e-06, "loss": 0.6084, "step": 11349 }, { "epoch": 2.0413557493481975, "grad_norm": 1.6383765935897827, "learning_rate": 6.695497846751931e-06, "loss": 0.7165, "step": 11350 }, { "epoch": 2.041535556954059, "grad_norm": 1.0943918228149414, "learning_rate": 6.694949920006673e-06, "loss": 0.8916, "step": 11351 }, { "epoch": 2.041715364559921, "grad_norm": 1.563994288444519, "learning_rate": 6.6944019702631655e-06, "loss": 0.7301, "step": 11352 }, { "epoch": 2.0418951721657828, "grad_norm": 1.4619818925857544, "learning_rate": 6.693853997528849e-06, "loss": 0.6768, "step": 11353 }, { "epoch": 2.042074979771644, "grad_norm": 1.4542418718338013, "learning_rate": 6.693306001811156e-06, "loss": 0.6272, "step": 11354 }, { "epoch": 2.042254787377506, "grad_norm": 1.1183782815933228, "learning_rate": 6.692757983117522e-06, "loss": 0.8908, "step": 11355 }, { "epoch": 2.042434594983368, "grad_norm": 1.5100743770599365, "learning_rate": 6.692209941455384e-06, "loss": 0.6048, "step": 11356 }, { "epoch": 2.0426144025892294, "grad_norm": 1.5478609800338745, "learning_rate": 6.691661876832176e-06, "loss": 0.6812, "step": 11357 }, { "epoch": 2.0427942101950913, "grad_norm": 1.5216330289840698, "learning_rate": 6.691113789255338e-06, "loss": 0.6785, "step": 11358 }, { "epoch": 2.042974017800953, "grad_norm": 1.5756075382232666, "learning_rate": 6.690565678732303e-06, "loss": 0.7244, "step": 11359 }, { "epoch": 2.0431538254068147, "grad_norm": 1.6126203536987305, "learning_rate": 6.690017545270512e-06, "loss": 0.7083, "step": 11360 }, { "epoch": 2.0433336330126766, "grad_norm": 1.4739779233932495, "learning_rate": 6.689469388877399e-06, "loss": 0.6635, "step": 11361 }, { "epoch": 2.043513440618538, "grad_norm": 1.5585885047912598, "learning_rate": 6.6889212095604036e-06, "loss": 0.6422, "step": 11362 }, { "epoch": 2.0436932482244, "grad_norm": 1.4976071119308472, "learning_rate": 6.6883730073269626e-06, "loss": 0.6889, "step": 11363 }, { "epoch": 2.043873055830262, "grad_norm": 1.4748408794403076, "learning_rate": 6.687824782184517e-06, "loss": 0.708, "step": 11364 }, { "epoch": 2.0440528634361232, "grad_norm": 1.501523733139038, "learning_rate": 6.6872765341405026e-06, "loss": 0.6294, "step": 11365 }, { "epoch": 2.044232671041985, "grad_norm": 2.0403220653533936, "learning_rate": 6.68672826320236e-06, "loss": 0.6368, "step": 11366 }, { "epoch": 2.0444124786478466, "grad_norm": 1.56971275806427, "learning_rate": 6.686179969377528e-06, "loss": 0.6885, "step": 11367 }, { "epoch": 2.0445922862537085, "grad_norm": 1.727836012840271, "learning_rate": 6.685631652673446e-06, "loss": 0.705, "step": 11368 }, { "epoch": 2.0447720938595704, "grad_norm": 1.5397447347640991, "learning_rate": 6.685083313097554e-06, "loss": 0.7209, "step": 11369 }, { "epoch": 2.044951901465432, "grad_norm": 1.5276938676834106, "learning_rate": 6.684534950657294e-06, "loss": 0.6559, "step": 11370 }, { "epoch": 2.0451317090712937, "grad_norm": 1.4741184711456299, "learning_rate": 6.6839865653601035e-06, "loss": 0.7014, "step": 11371 }, { "epoch": 2.0453115166771556, "grad_norm": 1.0047398805618286, "learning_rate": 6.6834381572134265e-06, "loss": 0.8678, "step": 11372 }, { "epoch": 2.045491324283017, "grad_norm": 1.6268644332885742, "learning_rate": 6.6828897262247e-06, "loss": 0.7002, "step": 11373 }, { "epoch": 2.045671131888879, "grad_norm": 1.5984159708023071, "learning_rate": 6.68234127240137e-06, "loss": 0.7296, "step": 11374 }, { "epoch": 2.045850939494741, "grad_norm": 1.1617698669433594, "learning_rate": 6.681792795750876e-06, "loss": 0.8713, "step": 11375 }, { "epoch": 2.0460307471006023, "grad_norm": 1.5696743726730347, "learning_rate": 6.681244296280661e-06, "loss": 0.6889, "step": 11376 }, { "epoch": 2.046210554706464, "grad_norm": 1.4568455219268799, "learning_rate": 6.680695773998166e-06, "loss": 0.6769, "step": 11377 }, { "epoch": 2.0463903623123256, "grad_norm": 1.64006507396698, "learning_rate": 6.680147228910836e-06, "loss": 0.6812, "step": 11378 }, { "epoch": 2.0465701699181875, "grad_norm": 1.758424997329712, "learning_rate": 6.679598661026111e-06, "loss": 0.7907, "step": 11379 }, { "epoch": 2.0467499775240494, "grad_norm": 1.5414676666259766, "learning_rate": 6.679050070351438e-06, "loss": 0.6244, "step": 11380 }, { "epoch": 2.046929785129911, "grad_norm": 1.527406930923462, "learning_rate": 6.678501456894257e-06, "loss": 0.6142, "step": 11381 }, { "epoch": 2.0471095927357728, "grad_norm": 1.6530953645706177, "learning_rate": 6.6779528206620145e-06, "loss": 0.6253, "step": 11382 }, { "epoch": 2.0472894003416346, "grad_norm": 1.590136170387268, "learning_rate": 6.6774041616621536e-06, "loss": 0.6829, "step": 11383 }, { "epoch": 2.047469207947496, "grad_norm": 1.4105544090270996, "learning_rate": 6.6768554799021176e-06, "loss": 0.7076, "step": 11384 }, { "epoch": 2.047649015553358, "grad_norm": 1.5417901277542114, "learning_rate": 6.676306775389355e-06, "loss": 0.667, "step": 11385 }, { "epoch": 2.0478288231592194, "grad_norm": 1.5042126178741455, "learning_rate": 6.675758048131309e-06, "loss": 0.6557, "step": 11386 }, { "epoch": 2.0480086307650813, "grad_norm": 1.5485564470291138, "learning_rate": 6.675209298135424e-06, "loss": 0.7108, "step": 11387 }, { "epoch": 2.048188438370943, "grad_norm": 1.0387325286865234, "learning_rate": 6.674660525409149e-06, "loss": 0.8754, "step": 11388 }, { "epoch": 2.0483682459768047, "grad_norm": 1.0236706733703613, "learning_rate": 6.674111729959927e-06, "loss": 0.884, "step": 11389 }, { "epoch": 2.0485480535826666, "grad_norm": 1.3700149059295654, "learning_rate": 6.673562911795205e-06, "loss": 0.6763, "step": 11390 }, { "epoch": 2.0487278611885285, "grad_norm": 1.5397233963012695, "learning_rate": 6.67301407092243e-06, "loss": 0.655, "step": 11391 }, { "epoch": 2.04890766879439, "grad_norm": 1.6830298900604248, "learning_rate": 6.67246520734905e-06, "loss": 0.7097, "step": 11392 }, { "epoch": 2.049087476400252, "grad_norm": 1.6396536827087402, "learning_rate": 6.671916321082511e-06, "loss": 0.6567, "step": 11393 }, { "epoch": 2.0492672840061132, "grad_norm": 1.49663507938385, "learning_rate": 6.671367412130263e-06, "loss": 0.714, "step": 11394 }, { "epoch": 2.049447091611975, "grad_norm": 1.461517333984375, "learning_rate": 6.67081848049975e-06, "loss": 0.7227, "step": 11395 }, { "epoch": 2.049626899217837, "grad_norm": 1.486451506614685, "learning_rate": 6.670269526198423e-06, "loss": 0.6546, "step": 11396 }, { "epoch": 2.0498067068236985, "grad_norm": 1.5425729751586914, "learning_rate": 6.66972054923373e-06, "loss": 0.6926, "step": 11397 }, { "epoch": 2.0499865144295604, "grad_norm": 1.1536462306976318, "learning_rate": 6.669171549613122e-06, "loss": 0.8318, "step": 11398 }, { "epoch": 2.0501663220354223, "grad_norm": 1.6528078317642212, "learning_rate": 6.6686225273440445e-06, "loss": 0.7229, "step": 11399 }, { "epoch": 2.0503461296412837, "grad_norm": 1.4513434171676636, "learning_rate": 6.66807348243395e-06, "loss": 0.6385, "step": 11400 }, { "epoch": 2.0505259372471456, "grad_norm": 1.4579404592514038, "learning_rate": 6.667524414890285e-06, "loss": 0.6746, "step": 11401 }, { "epoch": 2.0507057448530075, "grad_norm": 1.5113093852996826, "learning_rate": 6.666975324720504e-06, "loss": 0.6807, "step": 11402 }, { "epoch": 2.050885552458869, "grad_norm": 1.5111790895462036, "learning_rate": 6.666426211932054e-06, "loss": 0.7201, "step": 11403 }, { "epoch": 2.051065360064731, "grad_norm": 1.4508661031723022, "learning_rate": 6.665877076532388e-06, "loss": 0.6806, "step": 11404 }, { "epoch": 2.0512451676705923, "grad_norm": 1.440661907196045, "learning_rate": 6.6653279185289545e-06, "loss": 0.6048, "step": 11405 }, { "epoch": 2.051424975276454, "grad_norm": 1.6714924573898315, "learning_rate": 6.6647787379292065e-06, "loss": 0.6813, "step": 11406 }, { "epoch": 2.051604782882316, "grad_norm": 1.6315635442733765, "learning_rate": 6.664229534740595e-06, "loss": 0.7208, "step": 11407 }, { "epoch": 2.0517845904881775, "grad_norm": 1.5237174034118652, "learning_rate": 6.663680308970574e-06, "loss": 0.6844, "step": 11408 }, { "epoch": 2.0519643980940394, "grad_norm": 1.0391393899917603, "learning_rate": 6.663131060626593e-06, "loss": 0.865, "step": 11409 }, { "epoch": 2.0521442056999013, "grad_norm": 1.5047799348831177, "learning_rate": 6.662581789716106e-06, "loss": 0.6537, "step": 11410 }, { "epoch": 2.0523240133057628, "grad_norm": 1.4573051929473877, "learning_rate": 6.662032496246565e-06, "loss": 0.7142, "step": 11411 }, { "epoch": 2.0525038209116246, "grad_norm": 1.4926117658615112, "learning_rate": 6.661483180225425e-06, "loss": 0.6647, "step": 11412 }, { "epoch": 2.052683628517486, "grad_norm": 1.5299818515777588, "learning_rate": 6.660933841660138e-06, "loss": 0.6724, "step": 11413 }, { "epoch": 2.052863436123348, "grad_norm": 1.5961661338806152, "learning_rate": 6.6603844805581585e-06, "loss": 0.7403, "step": 11414 }, { "epoch": 2.05304324372921, "grad_norm": 1.5519105195999146, "learning_rate": 6.65983509692694e-06, "loss": 0.7053, "step": 11415 }, { "epoch": 2.0532230513350713, "grad_norm": 1.5119155645370483, "learning_rate": 6.659285690773936e-06, "loss": 0.6914, "step": 11416 }, { "epoch": 2.053402858940933, "grad_norm": 1.200379490852356, "learning_rate": 6.658736262106603e-06, "loss": 0.8581, "step": 11417 }, { "epoch": 2.053582666546795, "grad_norm": 1.5328019857406616, "learning_rate": 6.658186810932396e-06, "loss": 0.6621, "step": 11418 }, { "epoch": 2.0537624741526566, "grad_norm": 1.505379557609558, "learning_rate": 6.657637337258769e-06, "loss": 0.6884, "step": 11419 }, { "epoch": 2.0539422817585185, "grad_norm": 1.8543565273284912, "learning_rate": 6.657087841093179e-06, "loss": 0.6461, "step": 11420 }, { "epoch": 2.05412208936438, "grad_norm": 1.4335256814956665, "learning_rate": 6.656538322443082e-06, "loss": 0.642, "step": 11421 }, { "epoch": 2.054301896970242, "grad_norm": 1.5719918012619019, "learning_rate": 6.655988781315933e-06, "loss": 0.6723, "step": 11422 }, { "epoch": 2.0544817045761037, "grad_norm": 1.0719634294509888, "learning_rate": 6.655439217719189e-06, "loss": 0.8457, "step": 11423 }, { "epoch": 2.054661512181965, "grad_norm": 1.5020445585250854, "learning_rate": 6.654889631660306e-06, "loss": 0.7051, "step": 11424 }, { "epoch": 2.054841319787827, "grad_norm": 1.0144870281219482, "learning_rate": 6.654340023146743e-06, "loss": 0.8623, "step": 11425 }, { "epoch": 2.055021127393689, "grad_norm": 1.5327997207641602, "learning_rate": 6.653790392185957e-06, "loss": 0.6789, "step": 11426 }, { "epoch": 2.0552009349995504, "grad_norm": 1.5551066398620605, "learning_rate": 6.653240738785405e-06, "loss": 0.6732, "step": 11427 }, { "epoch": 2.0553807426054123, "grad_norm": 1.6032333374023438, "learning_rate": 6.652691062952545e-06, "loss": 0.6936, "step": 11428 }, { "epoch": 2.055560550211274, "grad_norm": 1.6301912069320679, "learning_rate": 6.652141364694836e-06, "loss": 0.6632, "step": 11429 }, { "epoch": 2.0557403578171356, "grad_norm": 4.865167617797852, "learning_rate": 6.651591644019737e-06, "loss": 0.6831, "step": 11430 }, { "epoch": 2.0559201654229975, "grad_norm": 1.641890048980713, "learning_rate": 6.651041900934706e-06, "loss": 0.6969, "step": 11431 }, { "epoch": 2.056099973028859, "grad_norm": 1.0800564289093018, "learning_rate": 6.650492135447204e-06, "loss": 0.9091, "step": 11432 }, { "epoch": 2.056279780634721, "grad_norm": 0.9712265133857727, "learning_rate": 6.649942347564688e-06, "loss": 0.8423, "step": 11433 }, { "epoch": 2.0564595882405827, "grad_norm": 1.4313803911209106, "learning_rate": 6.64939253729462e-06, "loss": 0.6768, "step": 11434 }, { "epoch": 2.056639395846444, "grad_norm": 1.1467355489730835, "learning_rate": 6.64884270464446e-06, "loss": 0.8647, "step": 11435 }, { "epoch": 2.056819203452306, "grad_norm": 2.9515984058380127, "learning_rate": 6.648292849621667e-06, "loss": 0.6729, "step": 11436 }, { "epoch": 2.056999011058168, "grad_norm": 1.485385537147522, "learning_rate": 6.647742972233703e-06, "loss": 0.6988, "step": 11437 }, { "epoch": 2.0571788186640294, "grad_norm": 1.4745124578475952, "learning_rate": 6.647193072488028e-06, "loss": 0.7337, "step": 11438 }, { "epoch": 2.0573586262698913, "grad_norm": 1.59126615524292, "learning_rate": 6.646643150392104e-06, "loss": 0.6812, "step": 11439 }, { "epoch": 2.0575384338757527, "grad_norm": 1.4973399639129639, "learning_rate": 6.646093205953397e-06, "loss": 0.6556, "step": 11440 }, { "epoch": 2.0577182414816146, "grad_norm": 1.1559327840805054, "learning_rate": 6.645543239179362e-06, "loss": 0.8993, "step": 11441 }, { "epoch": 2.0578980490874765, "grad_norm": 1.5366181135177612, "learning_rate": 6.644993250077465e-06, "loss": 0.6357, "step": 11442 }, { "epoch": 2.058077856693338, "grad_norm": 1.4222571849822998, "learning_rate": 6.644443238655167e-06, "loss": 0.6892, "step": 11443 }, { "epoch": 2.0582576642992, "grad_norm": 1.4784388542175293, "learning_rate": 6.643893204919933e-06, "loss": 0.673, "step": 11444 }, { "epoch": 2.0584374719050618, "grad_norm": 1.5995407104492188, "learning_rate": 6.643343148879225e-06, "loss": 0.612, "step": 11445 }, { "epoch": 2.058617279510923, "grad_norm": 1.5515732765197754, "learning_rate": 6.6427930705405085e-06, "loss": 0.7029, "step": 11446 }, { "epoch": 2.058797087116785, "grad_norm": 1.4925352334976196, "learning_rate": 6.642242969911243e-06, "loss": 0.6796, "step": 11447 }, { "epoch": 2.0589768947226466, "grad_norm": 1.4270075559616089, "learning_rate": 6.6416928469988974e-06, "loss": 0.6719, "step": 11448 }, { "epoch": 2.0591567023285084, "grad_norm": 1.4848394393920898, "learning_rate": 6.641142701810932e-06, "loss": 0.6467, "step": 11449 }, { "epoch": 2.0593365099343703, "grad_norm": 1.5381481647491455, "learning_rate": 6.640592534354815e-06, "loss": 0.6852, "step": 11450 }, { "epoch": 2.059516317540232, "grad_norm": 1.5955469608306885, "learning_rate": 6.640042344638009e-06, "loss": 0.6815, "step": 11451 }, { "epoch": 2.0596961251460937, "grad_norm": 1.528643012046814, "learning_rate": 6.639492132667981e-06, "loss": 0.7318, "step": 11452 }, { "epoch": 2.0598759327519556, "grad_norm": 1.7225704193115234, "learning_rate": 6.6389418984521956e-06, "loss": 0.7115, "step": 11453 }, { "epoch": 2.060055740357817, "grad_norm": 1.1799548864364624, "learning_rate": 6.638391641998119e-06, "loss": 0.8744, "step": 11454 }, { "epoch": 2.060235547963679, "grad_norm": 1.3737715482711792, "learning_rate": 6.637841363313218e-06, "loss": 0.6339, "step": 11455 }, { "epoch": 2.060415355569541, "grad_norm": 1.6489250659942627, "learning_rate": 6.637291062404959e-06, "loss": 0.7172, "step": 11456 }, { "epoch": 2.0605951631754023, "grad_norm": 1.6442584991455078, "learning_rate": 6.636740739280808e-06, "loss": 0.7179, "step": 11457 }, { "epoch": 2.060774970781264, "grad_norm": 1.4575444459915161, "learning_rate": 6.636190393948234e-06, "loss": 0.6545, "step": 11458 }, { "epoch": 2.0609547783871256, "grad_norm": 1.6670269966125488, "learning_rate": 6.635640026414703e-06, "loss": 0.682, "step": 11459 }, { "epoch": 2.0611345859929875, "grad_norm": 1.55397367477417, "learning_rate": 6.635089636687682e-06, "loss": 0.72, "step": 11460 }, { "epoch": 2.0613143935988494, "grad_norm": 1.5361686944961548, "learning_rate": 6.6345392247746385e-06, "loss": 0.7765, "step": 11461 }, { "epoch": 2.061494201204711, "grad_norm": 1.5389117002487183, "learning_rate": 6.633988790683045e-06, "loss": 0.6386, "step": 11462 }, { "epoch": 2.0616740088105727, "grad_norm": 1.5212767124176025, "learning_rate": 6.633438334420368e-06, "loss": 0.6788, "step": 11463 }, { "epoch": 2.0618538164164346, "grad_norm": 1.6768134832382202, "learning_rate": 6.632887855994075e-06, "loss": 0.6412, "step": 11464 }, { "epoch": 2.062033624022296, "grad_norm": 1.5780105590820312, "learning_rate": 6.632337355411637e-06, "loss": 0.634, "step": 11465 }, { "epoch": 2.062213431628158, "grad_norm": 1.5730308294296265, "learning_rate": 6.631786832680523e-06, "loss": 0.6851, "step": 11466 }, { "epoch": 2.0623932392340194, "grad_norm": 1.5848199129104614, "learning_rate": 6.631236287808202e-06, "loss": 0.6689, "step": 11467 }, { "epoch": 2.0625730468398813, "grad_norm": 1.55526864528656, "learning_rate": 6.630685720802146e-06, "loss": 0.6618, "step": 11468 }, { "epoch": 2.062752854445743, "grad_norm": 1.4697740077972412, "learning_rate": 6.6301351316698226e-06, "loss": 0.707, "step": 11469 }, { "epoch": 2.0629326620516046, "grad_norm": 1.5598807334899902, "learning_rate": 6.629584520418705e-06, "loss": 0.6561, "step": 11470 }, { "epoch": 2.0631124696574665, "grad_norm": 1.4073450565338135, "learning_rate": 6.629033887056265e-06, "loss": 0.6398, "step": 11471 }, { "epoch": 2.0632922772633284, "grad_norm": 1.5629786252975464, "learning_rate": 6.628483231589972e-06, "loss": 0.611, "step": 11472 }, { "epoch": 2.06347208486919, "grad_norm": 1.5092905759811401, "learning_rate": 6.627932554027298e-06, "loss": 0.6721, "step": 11473 }, { "epoch": 2.0636518924750518, "grad_norm": 1.4669023752212524, "learning_rate": 6.627381854375715e-06, "loss": 0.6498, "step": 11474 }, { "epoch": 2.063831700080913, "grad_norm": 1.5050346851348877, "learning_rate": 6.626831132642696e-06, "loss": 0.6525, "step": 11475 }, { "epoch": 2.064011507686775, "grad_norm": 1.533119797706604, "learning_rate": 6.626280388835713e-06, "loss": 0.6506, "step": 11476 }, { "epoch": 2.064191315292637, "grad_norm": 1.148364782333374, "learning_rate": 6.6257296229622405e-06, "loss": 0.8913, "step": 11477 }, { "epoch": 2.0643711228984984, "grad_norm": 1.4359242916107178, "learning_rate": 6.625178835029749e-06, "loss": 0.6946, "step": 11478 }, { "epoch": 2.0645509305043603, "grad_norm": 1.4823565483093262, "learning_rate": 6.624628025045713e-06, "loss": 0.6231, "step": 11479 }, { "epoch": 2.0647307381102222, "grad_norm": 1.5343393087387085, "learning_rate": 6.624077193017606e-06, "loss": 0.6932, "step": 11480 }, { "epoch": 2.0649105457160837, "grad_norm": 0.968015193939209, "learning_rate": 6.623526338952903e-06, "loss": 0.8372, "step": 11481 }, { "epoch": 2.0650903533219456, "grad_norm": 1.4782867431640625, "learning_rate": 6.622975462859078e-06, "loss": 0.6966, "step": 11482 }, { "epoch": 2.065270160927807, "grad_norm": 0.9893025755882263, "learning_rate": 6.622424564743606e-06, "loss": 0.912, "step": 11483 }, { "epoch": 2.065449968533669, "grad_norm": 1.1065269708633423, "learning_rate": 6.621873644613961e-06, "loss": 0.8558, "step": 11484 }, { "epoch": 2.065629776139531, "grad_norm": 1.5834941864013672, "learning_rate": 6.621322702477618e-06, "loss": 0.7058, "step": 11485 }, { "epoch": 2.0658095837453923, "grad_norm": 1.378332257270813, "learning_rate": 6.620771738342055e-06, "loss": 0.6489, "step": 11486 }, { "epoch": 2.065989391351254, "grad_norm": 1.0130531787872314, "learning_rate": 6.620220752214745e-06, "loss": 0.8689, "step": 11487 }, { "epoch": 2.066169198957116, "grad_norm": 1.3614356517791748, "learning_rate": 6.619669744103165e-06, "loss": 0.5697, "step": 11488 }, { "epoch": 2.0663490065629775, "grad_norm": 1.494321346282959, "learning_rate": 6.619118714014794e-06, "loss": 0.6651, "step": 11489 }, { "epoch": 2.0665288141688394, "grad_norm": 1.54887056350708, "learning_rate": 6.618567661957104e-06, "loss": 0.7613, "step": 11490 }, { "epoch": 2.0667086217747013, "grad_norm": 1.4490348100662231, "learning_rate": 6.618016587937577e-06, "loss": 0.6846, "step": 11491 }, { "epoch": 2.0668884293805627, "grad_norm": 1.5982085466384888, "learning_rate": 6.617465491963686e-06, "loss": 0.6611, "step": 11492 }, { "epoch": 2.0670682369864246, "grad_norm": 1.5872479677200317, "learning_rate": 6.61691437404291e-06, "loss": 0.6769, "step": 11493 }, { "epoch": 2.067248044592286, "grad_norm": 1.4384583234786987, "learning_rate": 6.616363234182729e-06, "loss": 0.6426, "step": 11494 }, { "epoch": 2.067427852198148, "grad_norm": 1.565241813659668, "learning_rate": 6.615812072390619e-06, "loss": 0.7662, "step": 11495 }, { "epoch": 2.06760765980401, "grad_norm": 1.5937930345535278, "learning_rate": 6.61526088867406e-06, "loss": 0.637, "step": 11496 }, { "epoch": 2.0677874674098713, "grad_norm": 1.520307183265686, "learning_rate": 6.614709683040531e-06, "loss": 0.7075, "step": 11497 }, { "epoch": 2.067967275015733, "grad_norm": 1.1854157447814941, "learning_rate": 6.614158455497509e-06, "loss": 0.819, "step": 11498 }, { "epoch": 2.068147082621595, "grad_norm": 1.160326361656189, "learning_rate": 6.613607206052476e-06, "loss": 0.868, "step": 11499 }, { "epoch": 2.0683268902274565, "grad_norm": 1.6428252458572388, "learning_rate": 6.6130559347129085e-06, "loss": 0.6527, "step": 11500 }, { "epoch": 2.0683268902274565, "eval_loss": 0.7895369529724121, "eval_runtime": 150.5121, "eval_samples_per_second": 95.554, "eval_steps_per_second": 1.495, "step": 11500 }, { "epoch": 2.0685066978333184, "grad_norm": 1.5503431558609009, "learning_rate": 6.61250464148629e-06, "loss": 0.611, "step": 11501 }, { "epoch": 2.06868650543918, "grad_norm": 1.5300010442733765, "learning_rate": 6.611953326380099e-06, "loss": 0.6668, "step": 11502 }, { "epoch": 2.0688663130450418, "grad_norm": 1.5032094717025757, "learning_rate": 6.6114019894018174e-06, "loss": 0.757, "step": 11503 }, { "epoch": 2.0690461206509037, "grad_norm": 1.5245059728622437, "learning_rate": 6.6108506305589235e-06, "loss": 0.6104, "step": 11504 }, { "epoch": 2.069225928256765, "grad_norm": 1.630609393119812, "learning_rate": 6.6102992498589e-06, "loss": 0.6675, "step": 11505 }, { "epoch": 2.069405735862627, "grad_norm": 1.1911375522613525, "learning_rate": 6.609747847309229e-06, "loss": 0.8814, "step": 11506 }, { "epoch": 2.069585543468489, "grad_norm": 1.3800300359725952, "learning_rate": 6.609196422917394e-06, "loss": 0.6535, "step": 11507 }, { "epoch": 2.0697653510743503, "grad_norm": 1.4463742971420288, "learning_rate": 6.6086449766908725e-06, "loss": 0.6433, "step": 11508 }, { "epoch": 2.0699451586802122, "grad_norm": 1.5512638092041016, "learning_rate": 6.608093508637151e-06, "loss": 0.6437, "step": 11509 }, { "epoch": 2.070124966286074, "grad_norm": 1.0846521854400635, "learning_rate": 6.60754201876371e-06, "loss": 0.8653, "step": 11510 }, { "epoch": 2.0703047738919356, "grad_norm": 1.4725170135498047, "learning_rate": 6.606990507078034e-06, "loss": 0.697, "step": 11511 }, { "epoch": 2.0704845814977975, "grad_norm": 1.4794384241104126, "learning_rate": 6.6064389735876035e-06, "loss": 0.6525, "step": 11512 }, { "epoch": 2.070664389103659, "grad_norm": 1.4997179508209229, "learning_rate": 6.605887418299905e-06, "loss": 0.6596, "step": 11513 }, { "epoch": 2.070844196709521, "grad_norm": 1.635703444480896, "learning_rate": 6.605335841222422e-06, "loss": 0.6757, "step": 11514 }, { "epoch": 2.0710240043153827, "grad_norm": 1.5800074338912964, "learning_rate": 6.604784242362638e-06, "loss": 0.6673, "step": 11515 }, { "epoch": 2.071203811921244, "grad_norm": 1.5520350933074951, "learning_rate": 6.6042326217280365e-06, "loss": 0.6626, "step": 11516 }, { "epoch": 2.071383619527106, "grad_norm": 1.5635517835617065, "learning_rate": 6.603680979326104e-06, "loss": 0.7074, "step": 11517 }, { "epoch": 2.071563427132968, "grad_norm": 1.0509836673736572, "learning_rate": 6.603129315164324e-06, "loss": 0.9005, "step": 11518 }, { "epoch": 2.0717432347388294, "grad_norm": 1.5701894760131836, "learning_rate": 6.602577629250184e-06, "loss": 0.6789, "step": 11519 }, { "epoch": 2.0719230423446913, "grad_norm": 1.5274405479431152, "learning_rate": 6.602025921591167e-06, "loss": 0.7028, "step": 11520 }, { "epoch": 2.0721028499505527, "grad_norm": 1.4244211912155151, "learning_rate": 6.601474192194762e-06, "loss": 0.6382, "step": 11521 }, { "epoch": 2.0722826575564146, "grad_norm": 1.5340255498886108, "learning_rate": 6.600922441068452e-06, "loss": 0.6793, "step": 11522 }, { "epoch": 2.0724624651622765, "grad_norm": 1.5781737565994263, "learning_rate": 6.6003706682197265e-06, "loss": 0.7057, "step": 11523 }, { "epoch": 2.072642272768138, "grad_norm": 1.0945582389831543, "learning_rate": 6.5998188736560694e-06, "loss": 0.811, "step": 11524 }, { "epoch": 2.072822080374, "grad_norm": 1.5157541036605835, "learning_rate": 6.599267057384971e-06, "loss": 0.7026, "step": 11525 }, { "epoch": 2.0730018879798617, "grad_norm": 1.511104702949524, "learning_rate": 6.598715219413916e-06, "loss": 0.704, "step": 11526 }, { "epoch": 2.073181695585723, "grad_norm": 1.450274109840393, "learning_rate": 6.598163359750394e-06, "loss": 0.6523, "step": 11527 }, { "epoch": 2.073361503191585, "grad_norm": 1.7289668321609497, "learning_rate": 6.5976114784018905e-06, "loss": 0.6581, "step": 11528 }, { "epoch": 2.0735413107974465, "grad_norm": 1.9317469596862793, "learning_rate": 6.597059575375897e-06, "loss": 0.683, "step": 11529 }, { "epoch": 2.0737211184033084, "grad_norm": 1.0467116832733154, "learning_rate": 6.5965076506799e-06, "loss": 0.8402, "step": 11530 }, { "epoch": 2.0739009260091703, "grad_norm": 1.5098254680633545, "learning_rate": 6.595955704321391e-06, "loss": 0.6208, "step": 11531 }, { "epoch": 2.0740807336150318, "grad_norm": 1.458878993988037, "learning_rate": 6.5954037363078545e-06, "loss": 0.6657, "step": 11532 }, { "epoch": 2.0742605412208936, "grad_norm": 1.5184223651885986, "learning_rate": 6.5948517466467844e-06, "loss": 0.7053, "step": 11533 }, { "epoch": 2.0744403488267555, "grad_norm": 1.4986881017684937, "learning_rate": 6.5942997353456675e-06, "loss": 0.665, "step": 11534 }, { "epoch": 2.074620156432617, "grad_norm": 1.5169754028320312, "learning_rate": 6.5937477024119965e-06, "loss": 0.6706, "step": 11535 }, { "epoch": 2.074799964038479, "grad_norm": 1.4773504734039307, "learning_rate": 6.5931956478532585e-06, "loss": 0.6247, "step": 11536 }, { "epoch": 2.0749797716443403, "grad_norm": 1.808561086654663, "learning_rate": 6.592643571676946e-06, "loss": 0.712, "step": 11537 }, { "epoch": 2.0751595792502022, "grad_norm": 1.533112645149231, "learning_rate": 6.592091473890552e-06, "loss": 0.6933, "step": 11538 }, { "epoch": 2.075339386856064, "grad_norm": 1.5416347980499268, "learning_rate": 6.591539354501566e-06, "loss": 0.7, "step": 11539 }, { "epoch": 2.0755191944619256, "grad_norm": 1.1333450078964233, "learning_rate": 6.590987213517477e-06, "loss": 0.8672, "step": 11540 }, { "epoch": 2.0756990020677875, "grad_norm": 1.4965648651123047, "learning_rate": 6.59043505094578e-06, "loss": 0.6936, "step": 11541 }, { "epoch": 2.0758788096736494, "grad_norm": 1.045707106590271, "learning_rate": 6.589882866793968e-06, "loss": 0.8717, "step": 11542 }, { "epoch": 2.076058617279511, "grad_norm": 1.032339096069336, "learning_rate": 6.5893306610695294e-06, "loss": 0.855, "step": 11543 }, { "epoch": 2.0762384248853727, "grad_norm": 1.5030683279037476, "learning_rate": 6.58877843377996e-06, "loss": 0.6671, "step": 11544 }, { "epoch": 2.0764182324912346, "grad_norm": 1.4510306119918823, "learning_rate": 6.588226184932752e-06, "loss": 0.689, "step": 11545 }, { "epoch": 2.076598040097096, "grad_norm": 1.558578610420227, "learning_rate": 6.587673914535398e-06, "loss": 0.6856, "step": 11546 }, { "epoch": 2.076777847702958, "grad_norm": 1.7529577016830444, "learning_rate": 6.587121622595393e-06, "loss": 0.7134, "step": 11547 }, { "epoch": 2.0769576553088194, "grad_norm": 1.1274104118347168, "learning_rate": 6.58656930912023e-06, "loss": 0.864, "step": 11548 }, { "epoch": 2.0771374629146813, "grad_norm": 1.564758062362671, "learning_rate": 6.586016974117403e-06, "loss": 0.6979, "step": 11549 }, { "epoch": 2.077317270520543, "grad_norm": 1.597218632698059, "learning_rate": 6.585464617594406e-06, "loss": 0.6687, "step": 11550 }, { "epoch": 2.0774970781264046, "grad_norm": 1.643175721168518, "learning_rate": 6.584912239558736e-06, "loss": 0.667, "step": 11551 }, { "epoch": 2.0776768857322665, "grad_norm": 1.4657084941864014, "learning_rate": 6.584359840017885e-06, "loss": 0.6747, "step": 11552 }, { "epoch": 2.0778566933381284, "grad_norm": 1.534477710723877, "learning_rate": 6.583807418979352e-06, "loss": 0.7295, "step": 11553 }, { "epoch": 2.07803650094399, "grad_norm": 1.0329898595809937, "learning_rate": 6.583254976450628e-06, "loss": 0.87, "step": 11554 }, { "epoch": 2.0782163085498517, "grad_norm": 1.5551522970199585, "learning_rate": 6.582702512439214e-06, "loss": 0.6884, "step": 11555 }, { "epoch": 2.078396116155713, "grad_norm": 1.5712908506393433, "learning_rate": 6.582150026952602e-06, "loss": 0.6812, "step": 11556 }, { "epoch": 2.078575923761575, "grad_norm": 1.5080662965774536, "learning_rate": 6.581597519998291e-06, "loss": 0.6965, "step": 11557 }, { "epoch": 2.078755731367437, "grad_norm": 1.4458590745925903, "learning_rate": 6.5810449915837755e-06, "loss": 0.663, "step": 11558 }, { "epoch": 2.0789355389732984, "grad_norm": 1.4990190267562866, "learning_rate": 6.580492441716555e-06, "loss": 0.6962, "step": 11559 }, { "epoch": 2.0791153465791603, "grad_norm": 1.5127774477005005, "learning_rate": 6.579939870404125e-06, "loss": 0.7331, "step": 11560 }, { "epoch": 2.079295154185022, "grad_norm": 1.4446195363998413, "learning_rate": 6.579387277653986e-06, "loss": 0.6328, "step": 11561 }, { "epoch": 2.0794749617908836, "grad_norm": 1.5365071296691895, "learning_rate": 6.578834663473631e-06, "loss": 0.6073, "step": 11562 }, { "epoch": 2.0796547693967455, "grad_norm": 1.535901427268982, "learning_rate": 6.578282027870564e-06, "loss": 0.7084, "step": 11563 }, { "epoch": 2.0798345770026074, "grad_norm": 1.4563182592391968, "learning_rate": 6.57772937085228e-06, "loss": 0.6895, "step": 11564 }, { "epoch": 2.080014384608469, "grad_norm": 1.5566169023513794, "learning_rate": 6.5771766924262795e-06, "loss": 0.7475, "step": 11565 }, { "epoch": 2.0801941922143308, "grad_norm": 1.4668859243392944, "learning_rate": 6.576623992600059e-06, "loss": 0.6316, "step": 11566 }, { "epoch": 2.080373999820192, "grad_norm": 1.0068978071212769, "learning_rate": 6.57607127138112e-06, "loss": 0.8537, "step": 11567 }, { "epoch": 2.080553807426054, "grad_norm": 1.5283328294754028, "learning_rate": 6.5755185287769616e-06, "loss": 0.7215, "step": 11568 }, { "epoch": 2.080733615031916, "grad_norm": 1.5069165229797363, "learning_rate": 6.574965764795085e-06, "loss": 0.7548, "step": 11569 }, { "epoch": 2.0809134226377775, "grad_norm": 1.1139651536941528, "learning_rate": 6.574412979442989e-06, "loss": 0.8537, "step": 11570 }, { "epoch": 2.0810932302436393, "grad_norm": 1.0204105377197266, "learning_rate": 6.5738601727281745e-06, "loss": 0.905, "step": 11571 }, { "epoch": 2.0812730378495012, "grad_norm": 1.480085849761963, "learning_rate": 6.573307344658144e-06, "loss": 0.7075, "step": 11572 }, { "epoch": 2.0814528454553627, "grad_norm": 0.8975569605827332, "learning_rate": 6.572754495240396e-06, "loss": 0.8891, "step": 11573 }, { "epoch": 2.0816326530612246, "grad_norm": 1.558517575263977, "learning_rate": 6.572201624482433e-06, "loss": 0.6871, "step": 11574 }, { "epoch": 2.081812460667086, "grad_norm": 1.6404482126235962, "learning_rate": 6.571648732391758e-06, "loss": 0.6421, "step": 11575 }, { "epoch": 2.081992268272948, "grad_norm": 1.5906696319580078, "learning_rate": 6.571095818975871e-06, "loss": 0.6072, "step": 11576 }, { "epoch": 2.08217207587881, "grad_norm": 1.6497834920883179, "learning_rate": 6.570542884242277e-06, "loss": 0.6881, "step": 11577 }, { "epoch": 2.0823518834846713, "grad_norm": 1.506490707397461, "learning_rate": 6.569989928198475e-06, "loss": 0.7246, "step": 11578 }, { "epoch": 2.082531691090533, "grad_norm": 1.7028694152832031, "learning_rate": 6.569436950851969e-06, "loss": 0.6635, "step": 11579 }, { "epoch": 2.082711498696395, "grad_norm": 1.1230213642120361, "learning_rate": 6.568883952210264e-06, "loss": 0.8684, "step": 11580 }, { "epoch": 2.0828913063022565, "grad_norm": 1.6369491815567017, "learning_rate": 6.568330932280862e-06, "loss": 0.6397, "step": 11581 }, { "epoch": 2.0830711139081184, "grad_norm": 1.0975383520126343, "learning_rate": 6.567777891071267e-06, "loss": 0.8683, "step": 11582 }, { "epoch": 2.08325092151398, "grad_norm": 1.5583053827285767, "learning_rate": 6.567224828588984e-06, "loss": 0.6796, "step": 11583 }, { "epoch": 2.0834307291198417, "grad_norm": 1.18943452835083, "learning_rate": 6.566671744841516e-06, "loss": 0.8887, "step": 11584 }, { "epoch": 2.0836105367257036, "grad_norm": 1.5282820463180542, "learning_rate": 6.566118639836369e-06, "loss": 0.6934, "step": 11585 }, { "epoch": 2.083790344331565, "grad_norm": 1.9097141027450562, "learning_rate": 6.565565513581045e-06, "loss": 0.7374, "step": 11586 }, { "epoch": 2.083970151937427, "grad_norm": 1.587113857269287, "learning_rate": 6.565012366083053e-06, "loss": 0.6909, "step": 11587 }, { "epoch": 2.084149959543289, "grad_norm": 1.52427339553833, "learning_rate": 6.564459197349896e-06, "loss": 0.6875, "step": 11588 }, { "epoch": 2.0843297671491503, "grad_norm": 1.4902405738830566, "learning_rate": 6.5639060073890814e-06, "loss": 0.6918, "step": 11589 }, { "epoch": 2.084509574755012, "grad_norm": 1.4434376955032349, "learning_rate": 6.5633527962081135e-06, "loss": 0.6392, "step": 11590 }, { "epoch": 2.0846893823608736, "grad_norm": 1.521706461906433, "learning_rate": 6.562799563814498e-06, "loss": 0.7091, "step": 11591 }, { "epoch": 2.0848691899667355, "grad_norm": 1.4394330978393555, "learning_rate": 6.562246310215745e-06, "loss": 0.6321, "step": 11592 }, { "epoch": 2.0850489975725974, "grad_norm": 1.1186256408691406, "learning_rate": 6.561693035419359e-06, "loss": 0.8971, "step": 11593 }, { "epoch": 2.085228805178459, "grad_norm": 1.6031461954116821, "learning_rate": 6.5611397394328465e-06, "loss": 0.6855, "step": 11594 }, { "epoch": 2.0854086127843208, "grad_norm": 1.500598430633545, "learning_rate": 6.560586422263719e-06, "loss": 0.6819, "step": 11595 }, { "epoch": 2.0855884203901827, "grad_norm": 1.4403154850006104, "learning_rate": 6.560033083919479e-06, "loss": 0.6645, "step": 11596 }, { "epoch": 2.085768227996044, "grad_norm": 1.610793113708496, "learning_rate": 6.559479724407638e-06, "loss": 0.6781, "step": 11597 }, { "epoch": 2.085948035601906, "grad_norm": 1.053251028060913, "learning_rate": 6.5589263437357035e-06, "loss": 0.8409, "step": 11598 }, { "epoch": 2.086127843207768, "grad_norm": 1.4573733806610107, "learning_rate": 6.558372941911183e-06, "loss": 0.6696, "step": 11599 }, { "epoch": 2.0863076508136293, "grad_norm": 1.571022391319275, "learning_rate": 6.557819518941588e-06, "loss": 0.6695, "step": 11600 }, { "epoch": 2.0864874584194912, "grad_norm": 1.5372309684753418, "learning_rate": 6.557266074834425e-06, "loss": 0.6512, "step": 11601 }, { "epoch": 2.0866672660253527, "grad_norm": 1.5346747636795044, "learning_rate": 6.556712609597205e-06, "loss": 0.6637, "step": 11602 }, { "epoch": 2.0868470736312146, "grad_norm": 1.6375945806503296, "learning_rate": 6.556159123237438e-06, "loss": 0.7336, "step": 11603 }, { "epoch": 2.0870268812370765, "grad_norm": 1.5217047929763794, "learning_rate": 6.555605615762632e-06, "loss": 0.6826, "step": 11604 }, { "epoch": 2.087206688842938, "grad_norm": 1.522826075553894, "learning_rate": 6.5550520871803e-06, "loss": 0.6887, "step": 11605 }, { "epoch": 2.0873864964488, "grad_norm": 1.0193089246749878, "learning_rate": 6.554498537497953e-06, "loss": 0.8852, "step": 11606 }, { "epoch": 2.0875663040546617, "grad_norm": 1.0840544700622559, "learning_rate": 6.553944966723098e-06, "loss": 0.8336, "step": 11607 }, { "epoch": 2.087746111660523, "grad_norm": 1.5045440196990967, "learning_rate": 6.553391374863252e-06, "loss": 0.667, "step": 11608 }, { "epoch": 2.087925919266385, "grad_norm": 1.5044770240783691, "learning_rate": 6.55283776192592e-06, "loss": 0.6644, "step": 11609 }, { "epoch": 2.0881057268722465, "grad_norm": 1.5771725177764893, "learning_rate": 6.552284127918619e-06, "loss": 0.6369, "step": 11610 }, { "epoch": 2.0882855344781084, "grad_norm": 1.5583513975143433, "learning_rate": 6.551730472848858e-06, "loss": 0.6202, "step": 11611 }, { "epoch": 2.0884653420839703, "grad_norm": 1.6274607181549072, "learning_rate": 6.551176796724152e-06, "loss": 0.6663, "step": 11612 }, { "epoch": 2.0886451496898317, "grad_norm": 1.4494199752807617, "learning_rate": 6.550623099552012e-06, "loss": 0.6326, "step": 11613 }, { "epoch": 2.0888249572956936, "grad_norm": 1.6655468940734863, "learning_rate": 6.55006938133995e-06, "loss": 0.7405, "step": 11614 }, { "epoch": 2.0890047649015555, "grad_norm": 1.5163404941558838, "learning_rate": 6.5495156420954804e-06, "loss": 0.6928, "step": 11615 }, { "epoch": 2.089184572507417, "grad_norm": 1.6428693532943726, "learning_rate": 6.5489618818261184e-06, "loss": 0.7116, "step": 11616 }, { "epoch": 2.089364380113279, "grad_norm": 1.5258162021636963, "learning_rate": 6.548408100539374e-06, "loss": 0.6916, "step": 11617 }, { "epoch": 2.0895441877191407, "grad_norm": 1.5629799365997314, "learning_rate": 6.547854298242766e-06, "loss": 0.7111, "step": 11618 }, { "epoch": 2.089723995325002, "grad_norm": 1.6010167598724365, "learning_rate": 6.547300474943804e-06, "loss": 0.7394, "step": 11619 }, { "epoch": 2.089903802930864, "grad_norm": 1.5000892877578735, "learning_rate": 6.546746630650006e-06, "loss": 0.6826, "step": 11620 }, { "epoch": 2.0900836105367255, "grad_norm": 1.7316091060638428, "learning_rate": 6.546192765368885e-06, "loss": 0.7131, "step": 11621 }, { "epoch": 2.0902634181425874, "grad_norm": 1.4701948165893555, "learning_rate": 6.5456388791079575e-06, "loss": 0.6338, "step": 11622 }, { "epoch": 2.0904432257484493, "grad_norm": 1.5430293083190918, "learning_rate": 6.545084971874738e-06, "loss": 0.6692, "step": 11623 }, { "epoch": 2.0906230333543108, "grad_norm": 1.8907005786895752, "learning_rate": 6.544531043676743e-06, "loss": 0.6318, "step": 11624 }, { "epoch": 2.0908028409601727, "grad_norm": 1.2063387632369995, "learning_rate": 6.543977094521489e-06, "loss": 0.9274, "step": 11625 }, { "epoch": 2.0909826485660346, "grad_norm": 1.0976659059524536, "learning_rate": 6.543423124416491e-06, "loss": 0.8353, "step": 11626 }, { "epoch": 2.091162456171896, "grad_norm": 1.450353741645813, "learning_rate": 6.542869133369265e-06, "loss": 0.7207, "step": 11627 }, { "epoch": 2.091342263777758, "grad_norm": 1.553428292274475, "learning_rate": 6.542315121387331e-06, "loss": 0.6376, "step": 11628 }, { "epoch": 2.0915220713836193, "grad_norm": 1.597406029701233, "learning_rate": 6.541761088478204e-06, "loss": 0.6685, "step": 11629 }, { "epoch": 2.0917018789894812, "grad_norm": 1.5782477855682373, "learning_rate": 6.541207034649404e-06, "loss": 0.7236, "step": 11630 }, { "epoch": 2.091881686595343, "grad_norm": 1.427843451499939, "learning_rate": 6.540652959908445e-06, "loss": 0.6053, "step": 11631 }, { "epoch": 2.0920614942012046, "grad_norm": 1.603793978691101, "learning_rate": 6.5400988642628474e-06, "loss": 0.709, "step": 11632 }, { "epoch": 2.0922413018070665, "grad_norm": 1.5096813440322876, "learning_rate": 6.5395447477201275e-06, "loss": 0.6452, "step": 11633 }, { "epoch": 2.0924211094129284, "grad_norm": 1.572696328163147, "learning_rate": 6.538990610287807e-06, "loss": 0.7311, "step": 11634 }, { "epoch": 2.09260091701879, "grad_norm": 1.5889086723327637, "learning_rate": 6.538436451973404e-06, "loss": 0.7024, "step": 11635 }, { "epoch": 2.0927807246246517, "grad_norm": 1.521946907043457, "learning_rate": 6.537882272784435e-06, "loss": 0.6807, "step": 11636 }, { "epoch": 2.092960532230513, "grad_norm": 1.4932564496994019, "learning_rate": 6.5373280727284215e-06, "loss": 0.6354, "step": 11637 }, { "epoch": 2.093140339836375, "grad_norm": 1.769871473312378, "learning_rate": 6.536773851812886e-06, "loss": 0.6829, "step": 11638 }, { "epoch": 2.093320147442237, "grad_norm": 1.6009267568588257, "learning_rate": 6.536219610045343e-06, "loss": 0.6752, "step": 11639 }, { "epoch": 2.0934999550480984, "grad_norm": 1.5521341562271118, "learning_rate": 6.535665347433317e-06, "loss": 0.7034, "step": 11640 }, { "epoch": 2.0936797626539603, "grad_norm": 1.568015217781067, "learning_rate": 6.535111063984327e-06, "loss": 0.6958, "step": 11641 }, { "epoch": 2.093859570259822, "grad_norm": 1.488222599029541, "learning_rate": 6.534556759705895e-06, "loss": 0.8807, "step": 11642 }, { "epoch": 2.0940393778656836, "grad_norm": 1.4645295143127441, "learning_rate": 6.534002434605539e-06, "loss": 0.6636, "step": 11643 }, { "epoch": 2.0942191854715455, "grad_norm": 1.678420066833496, "learning_rate": 6.533448088690785e-06, "loss": 0.6748, "step": 11644 }, { "epoch": 2.094398993077407, "grad_norm": 1.5419005155563354, "learning_rate": 6.5328937219691515e-06, "loss": 0.6837, "step": 11645 }, { "epoch": 2.094578800683269, "grad_norm": 1.4444714784622192, "learning_rate": 6.532339334448161e-06, "loss": 0.6692, "step": 11646 }, { "epoch": 2.0947586082891307, "grad_norm": 1.1690369844436646, "learning_rate": 6.531784926135336e-06, "loss": 0.8497, "step": 11647 }, { "epoch": 2.094938415894992, "grad_norm": 1.539286494255066, "learning_rate": 6.531230497038201e-06, "loss": 0.6782, "step": 11648 }, { "epoch": 2.095118223500854, "grad_norm": 1.4596235752105713, "learning_rate": 6.530676047164277e-06, "loss": 0.7476, "step": 11649 }, { "epoch": 2.095298031106716, "grad_norm": 1.4854037761688232, "learning_rate": 6.530121576521088e-06, "loss": 0.6979, "step": 11650 }, { "epoch": 2.0954778387125774, "grad_norm": 1.0857887268066406, "learning_rate": 6.529567085116155e-06, "loss": 0.8331, "step": 11651 }, { "epoch": 2.0956576463184393, "grad_norm": 1.4854365587234497, "learning_rate": 6.5290125729570066e-06, "loss": 0.6979, "step": 11652 }, { "epoch": 2.095837453924301, "grad_norm": 1.5348763465881348, "learning_rate": 6.528458040051161e-06, "loss": 0.6827, "step": 11653 }, { "epoch": 2.0960172615301627, "grad_norm": 1.5508978366851807, "learning_rate": 6.527903486406147e-06, "loss": 0.6446, "step": 11654 }, { "epoch": 2.0961970691360245, "grad_norm": 1.5532090663909912, "learning_rate": 6.5273489120294875e-06, "loss": 0.6571, "step": 11655 }, { "epoch": 2.096376876741886, "grad_norm": 1.5877000093460083, "learning_rate": 6.526794316928707e-06, "loss": 0.6864, "step": 11656 }, { "epoch": 2.096556684347748, "grad_norm": 1.5075819492340088, "learning_rate": 6.52623970111133e-06, "loss": 0.6451, "step": 11657 }, { "epoch": 2.09673649195361, "grad_norm": 1.6167372465133667, "learning_rate": 6.525685064584883e-06, "loss": 0.7135, "step": 11658 }, { "epoch": 2.0969162995594712, "grad_norm": 1.435248613357544, "learning_rate": 6.5251304073568925e-06, "loss": 0.6291, "step": 11659 }, { "epoch": 2.097096107165333, "grad_norm": 1.4736981391906738, "learning_rate": 6.524575729434884e-06, "loss": 0.6407, "step": 11660 }, { "epoch": 2.097275914771195, "grad_norm": 1.5705528259277344, "learning_rate": 6.524021030826381e-06, "loss": 0.7017, "step": 11661 }, { "epoch": 2.0974557223770565, "grad_norm": 1.5283093452453613, "learning_rate": 6.523466311538916e-06, "loss": 0.6494, "step": 11662 }, { "epoch": 2.0976355299829184, "grad_norm": 1.5341767072677612, "learning_rate": 6.52291157158001e-06, "loss": 0.6793, "step": 11663 }, { "epoch": 2.09781533758878, "grad_norm": 1.5984588861465454, "learning_rate": 6.522356810957193e-06, "loss": 0.7362, "step": 11664 }, { "epoch": 2.0979951451946417, "grad_norm": 1.4608769416809082, "learning_rate": 6.52180202967799e-06, "loss": 0.6682, "step": 11665 }, { "epoch": 2.0981749528005036, "grad_norm": 1.441142201423645, "learning_rate": 6.521247227749933e-06, "loss": 0.6914, "step": 11666 }, { "epoch": 2.098354760406365, "grad_norm": 1.5828436613082886, "learning_rate": 6.520692405180545e-06, "loss": 0.7188, "step": 11667 }, { "epoch": 2.098534568012227, "grad_norm": 1.0852694511413574, "learning_rate": 6.5201375619773556e-06, "loss": 0.8858, "step": 11668 }, { "epoch": 2.098714375618089, "grad_norm": 1.5368379354476929, "learning_rate": 6.519582698147895e-06, "loss": 0.7121, "step": 11669 }, { "epoch": 2.0988941832239503, "grad_norm": 1.5434731245040894, "learning_rate": 6.519027813699692e-06, "loss": 0.7364, "step": 11670 }, { "epoch": 2.099073990829812, "grad_norm": 4.054597854614258, "learning_rate": 6.518472908640275e-06, "loss": 0.6511, "step": 11671 }, { "epoch": 2.099253798435674, "grad_norm": 1.5871485471725464, "learning_rate": 6.517917982977172e-06, "loss": 0.6994, "step": 11672 }, { "epoch": 2.0994336060415355, "grad_norm": 1.0990440845489502, "learning_rate": 6.5173630367179144e-06, "loss": 0.8807, "step": 11673 }, { "epoch": 2.0996134136473974, "grad_norm": 1.4686015844345093, "learning_rate": 6.516808069870031e-06, "loss": 0.6628, "step": 11674 }, { "epoch": 2.099793221253259, "grad_norm": 1.1331721544265747, "learning_rate": 6.516253082441052e-06, "loss": 0.8497, "step": 11675 }, { "epoch": 2.0999730288591207, "grad_norm": 1.501611351966858, "learning_rate": 6.515698074438509e-06, "loss": 0.6718, "step": 11676 }, { "epoch": 2.1001528364649826, "grad_norm": 1.067779541015625, "learning_rate": 6.5151430458699315e-06, "loss": 0.8972, "step": 11677 }, { "epoch": 2.100332644070844, "grad_norm": 1.6639957427978516, "learning_rate": 6.514587996742852e-06, "loss": 0.735, "step": 11678 }, { "epoch": 2.100512451676706, "grad_norm": 1.5959749221801758, "learning_rate": 6.514032927064798e-06, "loss": 0.683, "step": 11679 }, { "epoch": 2.100692259282568, "grad_norm": 1.045625925064087, "learning_rate": 6.513477836843305e-06, "loss": 0.9036, "step": 11680 }, { "epoch": 2.1008720668884293, "grad_norm": 1.621772050857544, "learning_rate": 6.512922726085904e-06, "loss": 0.7034, "step": 11681 }, { "epoch": 2.101051874494291, "grad_norm": 1.6851557493209839, "learning_rate": 6.512367594800127e-06, "loss": 0.668, "step": 11682 }, { "epoch": 2.1012316821001527, "grad_norm": 1.7496027946472168, "learning_rate": 6.511812442993506e-06, "loss": 0.651, "step": 11683 }, { "epoch": 2.1014114897060145, "grad_norm": 1.537941813468933, "learning_rate": 6.511257270673574e-06, "loss": 0.7149, "step": 11684 }, { "epoch": 2.1015912973118764, "grad_norm": 1.6587876081466675, "learning_rate": 6.510702077847864e-06, "loss": 0.7505, "step": 11685 }, { "epoch": 2.101771104917738, "grad_norm": 1.388322114944458, "learning_rate": 6.51014686452391e-06, "loss": 0.6521, "step": 11686 }, { "epoch": 2.1019509125236, "grad_norm": 1.0809903144836426, "learning_rate": 6.5095916307092425e-06, "loss": 0.8659, "step": 11687 }, { "epoch": 2.1021307201294617, "grad_norm": 1.6102161407470703, "learning_rate": 6.5090363764113985e-06, "loss": 0.7148, "step": 11688 }, { "epoch": 2.102310527735323, "grad_norm": 1.4328707456588745, "learning_rate": 6.50848110163791e-06, "loss": 0.6807, "step": 11689 }, { "epoch": 2.102490335341185, "grad_norm": 1.4244683980941772, "learning_rate": 6.507925806396314e-06, "loss": 0.6263, "step": 11690 }, { "epoch": 2.1026701429470465, "grad_norm": 1.4896775484085083, "learning_rate": 6.50737049069414e-06, "loss": 0.6518, "step": 11691 }, { "epoch": 2.1028499505529084, "grad_norm": 1.632053017616272, "learning_rate": 6.5068151545389305e-06, "loss": 0.6277, "step": 11692 }, { "epoch": 2.1030297581587702, "grad_norm": 1.581986904144287, "learning_rate": 6.506259797938214e-06, "loss": 0.6992, "step": 11693 }, { "epoch": 2.1032095657646317, "grad_norm": 1.7066570520401, "learning_rate": 6.50570442089953e-06, "loss": 0.6495, "step": 11694 }, { "epoch": 2.1033893733704936, "grad_norm": 1.0911237001419067, "learning_rate": 6.505149023430411e-06, "loss": 0.8249, "step": 11695 }, { "epoch": 2.1035691809763555, "grad_norm": 1.5000156164169312, "learning_rate": 6.504593605538396e-06, "loss": 0.6729, "step": 11696 }, { "epoch": 2.103748988582217, "grad_norm": 1.516714096069336, "learning_rate": 6.50403816723102e-06, "loss": 0.6787, "step": 11697 }, { "epoch": 2.103928796188079, "grad_norm": 1.4771385192871094, "learning_rate": 6.503482708515818e-06, "loss": 0.6385, "step": 11698 }, { "epoch": 2.1041086037939403, "grad_norm": 1.033920407295227, "learning_rate": 6.50292722940033e-06, "loss": 0.8818, "step": 11699 }, { "epoch": 2.104288411399802, "grad_norm": 1.546500325202942, "learning_rate": 6.502371729892091e-06, "loss": 0.6887, "step": 11700 }, { "epoch": 2.104468219005664, "grad_norm": 1.447968602180481, "learning_rate": 6.501816209998638e-06, "loss": 0.677, "step": 11701 }, { "epoch": 2.1046480266115255, "grad_norm": 1.5321940183639526, "learning_rate": 6.501260669727512e-06, "loss": 0.697, "step": 11702 }, { "epoch": 2.1048278342173874, "grad_norm": 1.6412286758422852, "learning_rate": 6.500705109086246e-06, "loss": 0.7084, "step": 11703 }, { "epoch": 2.1050076418232493, "grad_norm": 1.6179797649383545, "learning_rate": 6.500149528082382e-06, "loss": 0.7049, "step": 11704 }, { "epoch": 2.1051874494291107, "grad_norm": 1.4697974920272827, "learning_rate": 6.499593926723457e-06, "loss": 0.6519, "step": 11705 }, { "epoch": 2.1053672570349726, "grad_norm": 1.4962886571884155, "learning_rate": 6.499038305017011e-06, "loss": 0.7226, "step": 11706 }, { "epoch": 2.1055470646408345, "grad_norm": 1.5512886047363281, "learning_rate": 6.498482662970581e-06, "loss": 0.6655, "step": 11707 }, { "epoch": 2.105726872246696, "grad_norm": 1.5419403314590454, "learning_rate": 6.497927000591709e-06, "loss": 0.7076, "step": 11708 }, { "epoch": 2.105906679852558, "grad_norm": 1.289978265762329, "learning_rate": 6.497371317887932e-06, "loss": 0.8397, "step": 11709 }, { "epoch": 2.1060864874584193, "grad_norm": 1.4125680923461914, "learning_rate": 6.496815614866792e-06, "loss": 0.6321, "step": 11710 }, { "epoch": 2.106266295064281, "grad_norm": 1.5142287015914917, "learning_rate": 6.496259891535826e-06, "loss": 0.7033, "step": 11711 }, { "epoch": 2.106446102670143, "grad_norm": 1.1582027673721313, "learning_rate": 6.495704147902577e-06, "loss": 0.8302, "step": 11712 }, { "epoch": 2.1066259102760045, "grad_norm": 1.4998016357421875, "learning_rate": 6.495148383974586e-06, "loss": 0.6353, "step": 11713 }, { "epoch": 2.1068057178818664, "grad_norm": 1.549370288848877, "learning_rate": 6.494592599759394e-06, "loss": 0.6898, "step": 11714 }, { "epoch": 2.1069855254877283, "grad_norm": 1.7406708002090454, "learning_rate": 6.49403679526454e-06, "loss": 0.7636, "step": 11715 }, { "epoch": 2.1071653330935898, "grad_norm": 1.6602832078933716, "learning_rate": 6.493480970497569e-06, "loss": 0.6836, "step": 11716 }, { "epoch": 2.1073451406994517, "grad_norm": 1.6160238981246948, "learning_rate": 6.4929251254660186e-06, "loss": 0.7243, "step": 11717 }, { "epoch": 2.107524948305313, "grad_norm": 1.5098778009414673, "learning_rate": 6.492369260177435e-06, "loss": 0.7194, "step": 11718 }, { "epoch": 2.107704755911175, "grad_norm": 1.4964230060577393, "learning_rate": 6.491813374639359e-06, "loss": 0.6254, "step": 11719 }, { "epoch": 2.107884563517037, "grad_norm": 1.4914575815200806, "learning_rate": 6.491257468859332e-06, "loss": 0.6647, "step": 11720 }, { "epoch": 2.1080643711228984, "grad_norm": 1.5689420700073242, "learning_rate": 6.490701542844897e-06, "loss": 0.6845, "step": 11721 }, { "epoch": 2.1082441787287602, "grad_norm": 1.6156867742538452, "learning_rate": 6.490145596603599e-06, "loss": 0.6417, "step": 11722 }, { "epoch": 2.108423986334622, "grad_norm": 1.5682846307754517, "learning_rate": 6.48958963014298e-06, "loss": 0.7046, "step": 11723 }, { "epoch": 2.1086037939404836, "grad_norm": 1.6015710830688477, "learning_rate": 6.489033643470585e-06, "loss": 0.7382, "step": 11724 }, { "epoch": 2.1087836015463455, "grad_norm": 1.5530471801757812, "learning_rate": 6.488477636593957e-06, "loss": 0.7211, "step": 11725 }, { "epoch": 2.108963409152207, "grad_norm": 1.6079951524734497, "learning_rate": 6.48792160952064e-06, "loss": 0.6864, "step": 11726 }, { "epoch": 2.109143216758069, "grad_norm": 1.4747568368911743, "learning_rate": 6.487365562258181e-06, "loss": 0.6217, "step": 11727 }, { "epoch": 2.1093230243639307, "grad_norm": 1.6389294862747192, "learning_rate": 6.486809494814122e-06, "loss": 0.6559, "step": 11728 }, { "epoch": 2.109502831969792, "grad_norm": 1.6343703269958496, "learning_rate": 6.486253407196008e-06, "loss": 0.7318, "step": 11729 }, { "epoch": 2.109682639575654, "grad_norm": 1.5894017219543457, "learning_rate": 6.485697299411386e-06, "loss": 0.6424, "step": 11730 }, { "epoch": 2.109862447181516, "grad_norm": 1.466966986656189, "learning_rate": 6.485141171467801e-06, "loss": 0.6465, "step": 11731 }, { "epoch": 2.1100422547873774, "grad_norm": 1.4929786920547485, "learning_rate": 6.4845850233728005e-06, "loss": 0.7059, "step": 11732 }, { "epoch": 2.1102220623932393, "grad_norm": 1.0934836864471436, "learning_rate": 6.484028855133928e-06, "loss": 0.9056, "step": 11733 }, { "epoch": 2.110401869999101, "grad_norm": 1.426094651222229, "learning_rate": 6.48347266675873e-06, "loss": 0.5837, "step": 11734 }, { "epoch": 2.1105816776049626, "grad_norm": 1.6463319063186646, "learning_rate": 6.482916458254756e-06, "loss": 0.7136, "step": 11735 }, { "epoch": 2.1107614852108245, "grad_norm": 1.5557348728179932, "learning_rate": 6.482360229629551e-06, "loss": 0.6284, "step": 11736 }, { "epoch": 2.110941292816686, "grad_norm": 1.4987314939498901, "learning_rate": 6.481803980890663e-06, "loss": 0.6936, "step": 11737 }, { "epoch": 2.111121100422548, "grad_norm": 1.570436716079712, "learning_rate": 6.481247712045638e-06, "loss": 0.6718, "step": 11738 }, { "epoch": 2.1113009080284098, "grad_norm": 1.499825358390808, "learning_rate": 6.480691423102028e-06, "loss": 0.7002, "step": 11739 }, { "epoch": 2.111480715634271, "grad_norm": 1.5280261039733887, "learning_rate": 6.480135114067375e-06, "loss": 0.6299, "step": 11740 }, { "epoch": 2.111660523240133, "grad_norm": 1.5592654943466187, "learning_rate": 6.479578784949233e-06, "loss": 0.6263, "step": 11741 }, { "epoch": 2.111840330845995, "grad_norm": 1.1103206872940063, "learning_rate": 6.479022435755147e-06, "loss": 0.909, "step": 11742 }, { "epoch": 2.1120201384518564, "grad_norm": 1.445557713508606, "learning_rate": 6.478466066492668e-06, "loss": 0.6893, "step": 11743 }, { "epoch": 2.1121999460577183, "grad_norm": 1.6246238946914673, "learning_rate": 6.477909677169344e-06, "loss": 0.7389, "step": 11744 }, { "epoch": 2.1123797536635798, "grad_norm": 1.5287638902664185, "learning_rate": 6.477353267792725e-06, "loss": 0.6876, "step": 11745 }, { "epoch": 2.1125595612694417, "grad_norm": 1.546004295349121, "learning_rate": 6.476796838370359e-06, "loss": 0.695, "step": 11746 }, { "epoch": 2.1127393688753036, "grad_norm": 1.5727185010910034, "learning_rate": 6.4762403889098e-06, "loss": 0.6082, "step": 11747 }, { "epoch": 2.112919176481165, "grad_norm": 1.1358883380889893, "learning_rate": 6.475683919418596e-06, "loss": 0.8599, "step": 11748 }, { "epoch": 2.113098984087027, "grad_norm": 1.6117956638336182, "learning_rate": 6.475127429904297e-06, "loss": 0.6636, "step": 11749 }, { "epoch": 2.113278791692889, "grad_norm": 1.177088737487793, "learning_rate": 6.474570920374453e-06, "loss": 0.8848, "step": 11750 }, { "epoch": 2.1134585992987502, "grad_norm": 1.4475483894348145, "learning_rate": 6.474014390836618e-06, "loss": 0.6686, "step": 11751 }, { "epoch": 2.113638406904612, "grad_norm": 1.5729477405548096, "learning_rate": 6.473457841298342e-06, "loss": 0.7395, "step": 11752 }, { "epoch": 2.1138182145104736, "grad_norm": 1.683881402015686, "learning_rate": 6.472901271767176e-06, "loss": 0.7299, "step": 11753 }, { "epoch": 2.1139980221163355, "grad_norm": 1.5072851181030273, "learning_rate": 6.472344682250672e-06, "loss": 0.7447, "step": 11754 }, { "epoch": 2.1141778297221974, "grad_norm": 1.4821346998214722, "learning_rate": 6.471788072756383e-06, "loss": 0.6804, "step": 11755 }, { "epoch": 2.114357637328059, "grad_norm": 1.658779263496399, "learning_rate": 6.471231443291861e-06, "loss": 0.7066, "step": 11756 }, { "epoch": 2.1145374449339207, "grad_norm": 1.5844640731811523, "learning_rate": 6.470674793864657e-06, "loss": 0.7118, "step": 11757 }, { "epoch": 2.1147172525397826, "grad_norm": 1.4903901815414429, "learning_rate": 6.470118124482328e-06, "loss": 0.7133, "step": 11758 }, { "epoch": 2.114897060145644, "grad_norm": 1.15358567237854, "learning_rate": 6.469561435152425e-06, "loss": 0.8334, "step": 11759 }, { "epoch": 2.115076867751506, "grad_norm": 1.5597655773162842, "learning_rate": 6.4690047258825e-06, "loss": 0.7752, "step": 11760 }, { "epoch": 2.115256675357368, "grad_norm": 1.5906544923782349, "learning_rate": 6.4684479966801105e-06, "loss": 0.7075, "step": 11761 }, { "epoch": 2.1154364829632293, "grad_norm": 1.5586471557617188, "learning_rate": 6.467891247552806e-06, "loss": 0.6491, "step": 11762 }, { "epoch": 2.115616290569091, "grad_norm": 1.486968994140625, "learning_rate": 6.467334478508147e-06, "loss": 0.6436, "step": 11763 }, { "epoch": 2.1157960981749526, "grad_norm": 1.5809963941574097, "learning_rate": 6.466777689553681e-06, "loss": 0.7693, "step": 11764 }, { "epoch": 2.1159759057808145, "grad_norm": 1.1417325735092163, "learning_rate": 6.466220880696969e-06, "loss": 0.8572, "step": 11765 }, { "epoch": 2.1161557133866764, "grad_norm": 1.1069105863571167, "learning_rate": 6.4656640519455614e-06, "loss": 0.9121, "step": 11766 }, { "epoch": 2.116335520992538, "grad_norm": 1.4290125370025635, "learning_rate": 6.4651072033070165e-06, "loss": 0.625, "step": 11767 }, { "epoch": 2.1165153285983997, "grad_norm": 1.740692138671875, "learning_rate": 6.464550334788888e-06, "loss": 0.6938, "step": 11768 }, { "epoch": 2.1166951362042616, "grad_norm": 1.578474760055542, "learning_rate": 6.463993446398735e-06, "loss": 0.701, "step": 11769 }, { "epoch": 2.116874943810123, "grad_norm": 1.520422101020813, "learning_rate": 6.463436538144111e-06, "loss": 0.749, "step": 11770 }, { "epoch": 2.117054751415985, "grad_norm": 1.585430383682251, "learning_rate": 6.462879610032575e-06, "loss": 0.6931, "step": 11771 }, { "epoch": 2.1172345590218464, "grad_norm": 1.0527116060256958, "learning_rate": 6.46232266207168e-06, "loss": 0.8555, "step": 11772 }, { "epoch": 2.1174143666277083, "grad_norm": 1.4630717039108276, "learning_rate": 6.461765694268986e-06, "loss": 0.6689, "step": 11773 }, { "epoch": 2.11759417423357, "grad_norm": 1.5913333892822266, "learning_rate": 6.46120870663205e-06, "loss": 0.6757, "step": 11774 }, { "epoch": 2.1177739818394317, "grad_norm": 1.4849772453308105, "learning_rate": 6.46065169916843e-06, "loss": 0.6958, "step": 11775 }, { "epoch": 2.1179537894452936, "grad_norm": 1.4595720767974854, "learning_rate": 6.460094671885681e-06, "loss": 0.6194, "step": 11776 }, { "epoch": 2.1181335970511554, "grad_norm": 1.5210517644882202, "learning_rate": 6.459537624791363e-06, "loss": 0.6816, "step": 11777 }, { "epoch": 2.118313404657017, "grad_norm": 1.5103884935379028, "learning_rate": 6.458980557893036e-06, "loss": 0.7267, "step": 11778 }, { "epoch": 2.118493212262879, "grad_norm": 1.6157276630401611, "learning_rate": 6.458423471198257e-06, "loss": 0.7033, "step": 11779 }, { "epoch": 2.1186730198687402, "grad_norm": 1.4544892311096191, "learning_rate": 6.457866364714584e-06, "loss": 0.6157, "step": 11780 }, { "epoch": 2.118852827474602, "grad_norm": 1.5615766048431396, "learning_rate": 6.45730923844958e-06, "loss": 0.7201, "step": 11781 }, { "epoch": 2.119032635080464, "grad_norm": 1.5980448722839355, "learning_rate": 6.4567520924108e-06, "loss": 0.7118, "step": 11782 }, { "epoch": 2.1192124426863255, "grad_norm": 1.0177639722824097, "learning_rate": 6.456194926605805e-06, "loss": 0.8813, "step": 11783 }, { "epoch": 2.1193922502921874, "grad_norm": 1.4480130672454834, "learning_rate": 6.455637741042157e-06, "loss": 0.6609, "step": 11784 }, { "epoch": 2.1195720578980493, "grad_norm": 1.5601195096969604, "learning_rate": 6.455080535727415e-06, "loss": 0.6312, "step": 11785 }, { "epoch": 2.1197518655039107, "grad_norm": 1.5523377656936646, "learning_rate": 6.454523310669137e-06, "loss": 0.6322, "step": 11786 }, { "epoch": 2.1199316731097726, "grad_norm": 1.5242178440093994, "learning_rate": 6.453966065874889e-06, "loss": 0.6446, "step": 11787 }, { "epoch": 2.120111480715634, "grad_norm": 1.7328578233718872, "learning_rate": 6.453408801352228e-06, "loss": 0.7419, "step": 11788 }, { "epoch": 2.120291288321496, "grad_norm": 1.5474216938018799, "learning_rate": 6.452851517108716e-06, "loss": 0.7064, "step": 11789 }, { "epoch": 2.120471095927358, "grad_norm": 1.4574803113937378, "learning_rate": 6.4522942131519155e-06, "loss": 0.6978, "step": 11790 }, { "epoch": 2.1206509035332193, "grad_norm": 2.6288323402404785, "learning_rate": 6.451736889489388e-06, "loss": 0.6977, "step": 11791 }, { "epoch": 2.120830711139081, "grad_norm": 1.6080378293991089, "learning_rate": 6.451179546128696e-06, "loss": 0.6727, "step": 11792 }, { "epoch": 2.121010518744943, "grad_norm": 1.5103591680526733, "learning_rate": 6.450622183077403e-06, "loss": 0.7107, "step": 11793 }, { "epoch": 2.1211903263508045, "grad_norm": 1.7388416528701782, "learning_rate": 6.45006480034307e-06, "loss": 0.633, "step": 11794 }, { "epoch": 2.1213701339566664, "grad_norm": 1.52278470993042, "learning_rate": 6.449507397933259e-06, "loss": 0.6333, "step": 11795 }, { "epoch": 2.1215499415625283, "grad_norm": 1.4925817251205444, "learning_rate": 6.448949975855535e-06, "loss": 0.6274, "step": 11796 }, { "epoch": 2.1217297491683897, "grad_norm": 1.0342128276824951, "learning_rate": 6.4483925341174625e-06, "loss": 0.905, "step": 11797 }, { "epoch": 2.1219095567742516, "grad_norm": 1.5753710269927979, "learning_rate": 6.447835072726602e-06, "loss": 0.6696, "step": 11798 }, { "epoch": 2.122089364380113, "grad_norm": 1.6323046684265137, "learning_rate": 6.44727759169052e-06, "loss": 0.6466, "step": 11799 }, { "epoch": 2.122269171985975, "grad_norm": 1.6276865005493164, "learning_rate": 6.4467200910167795e-06, "loss": 0.6634, "step": 11800 }, { "epoch": 2.122448979591837, "grad_norm": 1.5024545192718506, "learning_rate": 6.446162570712947e-06, "loss": 0.6263, "step": 11801 }, { "epoch": 2.1226287871976983, "grad_norm": 1.4633429050445557, "learning_rate": 6.445605030786585e-06, "loss": 0.6005, "step": 11802 }, { "epoch": 2.12280859480356, "grad_norm": 1.5836035013198853, "learning_rate": 6.44504747124526e-06, "loss": 0.6316, "step": 11803 }, { "epoch": 2.122988402409422, "grad_norm": 1.5351461172103882, "learning_rate": 6.4444898920965356e-06, "loss": 0.7038, "step": 11804 }, { "epoch": 2.1231682100152836, "grad_norm": 1.5059611797332764, "learning_rate": 6.443932293347981e-06, "loss": 0.6526, "step": 11805 }, { "epoch": 2.1233480176211454, "grad_norm": 1.5299490690231323, "learning_rate": 6.443374675007158e-06, "loss": 0.7202, "step": 11806 }, { "epoch": 2.123527825227007, "grad_norm": 1.5498448610305786, "learning_rate": 6.4428170370816364e-06, "loss": 0.6408, "step": 11807 }, { "epoch": 2.123707632832869, "grad_norm": 1.4259940385818481, "learning_rate": 6.442259379578979e-06, "loss": 0.6718, "step": 11808 }, { "epoch": 2.1238874404387307, "grad_norm": 1.5290021896362305, "learning_rate": 6.441701702506755e-06, "loss": 0.6929, "step": 11809 }, { "epoch": 2.124067248044592, "grad_norm": 1.5554146766662598, "learning_rate": 6.441144005872531e-06, "loss": 0.7542, "step": 11810 }, { "epoch": 2.124247055650454, "grad_norm": 1.5607304573059082, "learning_rate": 6.440586289683872e-06, "loss": 0.6557, "step": 11811 }, { "epoch": 2.124426863256316, "grad_norm": 1.0599298477172852, "learning_rate": 6.440028553948349e-06, "loss": 0.8868, "step": 11812 }, { "epoch": 2.1246066708621774, "grad_norm": 1.6024326086044312, "learning_rate": 6.439470798673527e-06, "loss": 0.6418, "step": 11813 }, { "epoch": 2.1247864784680393, "grad_norm": 1.5135002136230469, "learning_rate": 6.438913023866976e-06, "loss": 0.6849, "step": 11814 }, { "epoch": 2.124966286073901, "grad_norm": 1.5757726430892944, "learning_rate": 6.4383552295362635e-06, "loss": 0.6603, "step": 11815 }, { "epoch": 2.1251460936797626, "grad_norm": 1.4128612279891968, "learning_rate": 6.437797415688956e-06, "loss": 0.6725, "step": 11816 }, { "epoch": 2.1253259012856245, "grad_norm": 1.5184012651443481, "learning_rate": 6.437239582332627e-06, "loss": 0.6442, "step": 11817 }, { "epoch": 2.125505708891486, "grad_norm": 1.6942551136016846, "learning_rate": 6.4366817294748406e-06, "loss": 0.6783, "step": 11818 }, { "epoch": 2.125685516497348, "grad_norm": 1.5520466566085815, "learning_rate": 6.43612385712317e-06, "loss": 0.7217, "step": 11819 }, { "epoch": 2.1258653241032097, "grad_norm": 1.3826406002044678, "learning_rate": 6.435565965285181e-06, "loss": 0.6352, "step": 11820 }, { "epoch": 2.126045131709071, "grad_norm": 1.6778510808944702, "learning_rate": 6.4350080539684455e-06, "loss": 0.7297, "step": 11821 }, { "epoch": 2.126224939314933, "grad_norm": 1.753050446510315, "learning_rate": 6.4344501231805345e-06, "loss": 0.6574, "step": 11822 }, { "epoch": 2.126404746920795, "grad_norm": 1.261266827583313, "learning_rate": 6.4338921729290184e-06, "loss": 0.8526, "step": 11823 }, { "epoch": 2.1265845545266564, "grad_norm": 1.4573582410812378, "learning_rate": 6.433334203221465e-06, "loss": 0.6582, "step": 11824 }, { "epoch": 2.1267643621325183, "grad_norm": 1.48486328125, "learning_rate": 6.432776214065449e-06, "loss": 0.7033, "step": 11825 }, { "epoch": 2.1269441697383797, "grad_norm": 1.4515321254730225, "learning_rate": 6.432218205468539e-06, "loss": 0.6996, "step": 11826 }, { "epoch": 2.1271239773442416, "grad_norm": 1.6406360864639282, "learning_rate": 6.431660177438308e-06, "loss": 0.624, "step": 11827 }, { "epoch": 2.1273037849501035, "grad_norm": 1.4991626739501953, "learning_rate": 6.431102129982326e-06, "loss": 0.6782, "step": 11828 }, { "epoch": 2.127483592555965, "grad_norm": 1.6785694360733032, "learning_rate": 6.430544063108166e-06, "loss": 0.68, "step": 11829 }, { "epoch": 2.127663400161827, "grad_norm": 1.4971610307693481, "learning_rate": 6.429985976823401e-06, "loss": 0.673, "step": 11830 }, { "epoch": 2.1278432077676888, "grad_norm": 1.5346614122390747, "learning_rate": 6.4294278711356004e-06, "loss": 0.628, "step": 11831 }, { "epoch": 2.12802301537355, "grad_norm": 1.1917608976364136, "learning_rate": 6.428869746052342e-06, "loss": 0.8579, "step": 11832 }, { "epoch": 2.128202822979412, "grad_norm": 1.5150569677352905, "learning_rate": 6.428311601581194e-06, "loss": 0.6106, "step": 11833 }, { "epoch": 2.1283826305852735, "grad_norm": 1.5017940998077393, "learning_rate": 6.4277534377297325e-06, "loss": 0.6179, "step": 11834 }, { "epoch": 2.1285624381911354, "grad_norm": 1.5590062141418457, "learning_rate": 6.4271952545055304e-06, "loss": 0.639, "step": 11835 }, { "epoch": 2.1287422457969973, "grad_norm": 1.164759635925293, "learning_rate": 6.426637051916161e-06, "loss": 0.8955, "step": 11836 }, { "epoch": 2.128922053402859, "grad_norm": 1.180466890335083, "learning_rate": 6.4260788299692e-06, "loss": 0.8806, "step": 11837 }, { "epoch": 2.1291018610087207, "grad_norm": 1.7032239437103271, "learning_rate": 6.425520588672218e-06, "loss": 0.6799, "step": 11838 }, { "epoch": 2.1292816686145826, "grad_norm": 2.0307092666625977, "learning_rate": 6.424962328032795e-06, "loss": 0.6753, "step": 11839 }, { "epoch": 2.129461476220444, "grad_norm": 1.7004475593566895, "learning_rate": 6.424404048058501e-06, "loss": 0.6784, "step": 11840 }, { "epoch": 2.129641283826306, "grad_norm": 1.5112851858139038, "learning_rate": 6.423845748756914e-06, "loss": 0.6626, "step": 11841 }, { "epoch": 2.1298210914321674, "grad_norm": 1.5907223224639893, "learning_rate": 6.423287430135608e-06, "loss": 0.7252, "step": 11842 }, { "epoch": 2.1300008990380292, "grad_norm": 1.0901395082473755, "learning_rate": 6.4227290922021576e-06, "loss": 0.8343, "step": 11843 }, { "epoch": 2.130180706643891, "grad_norm": 1.1719412803649902, "learning_rate": 6.422170734964141e-06, "loss": 0.9008, "step": 11844 }, { "epoch": 2.1303605142497526, "grad_norm": 1.5493674278259277, "learning_rate": 6.4216123584291355e-06, "loss": 0.6509, "step": 11845 }, { "epoch": 2.1305403218556145, "grad_norm": 1.4480255842208862, "learning_rate": 6.4210539626047145e-06, "loss": 0.667, "step": 11846 }, { "epoch": 2.1307201294614764, "grad_norm": 1.1155459880828857, "learning_rate": 6.420495547498455e-06, "loss": 0.8429, "step": 11847 }, { "epoch": 2.130899937067338, "grad_norm": 1.5714980363845825, "learning_rate": 6.419937113117937e-06, "loss": 0.7028, "step": 11848 }, { "epoch": 2.1310797446731997, "grad_norm": 1.531705379486084, "learning_rate": 6.419378659470733e-06, "loss": 0.6743, "step": 11849 }, { "epoch": 2.1312595522790616, "grad_norm": 1.6369951963424683, "learning_rate": 6.418820186564425e-06, "loss": 0.6594, "step": 11850 }, { "epoch": 2.131439359884923, "grad_norm": 1.4626046419143677, "learning_rate": 6.418261694406588e-06, "loss": 0.6932, "step": 11851 }, { "epoch": 2.131619167490785, "grad_norm": 1.433956503868103, "learning_rate": 6.417703183004801e-06, "loss": 0.6343, "step": 11852 }, { "epoch": 2.1317989750966464, "grad_norm": 1.1670175790786743, "learning_rate": 6.417144652366641e-06, "loss": 0.8887, "step": 11853 }, { "epoch": 2.1319787827025083, "grad_norm": 1.5086750984191895, "learning_rate": 6.416586102499688e-06, "loss": 0.6948, "step": 11854 }, { "epoch": 2.13215859030837, "grad_norm": 1.6314189434051514, "learning_rate": 6.41602753341152e-06, "loss": 0.7568, "step": 11855 }, { "epoch": 2.1323383979142316, "grad_norm": 1.6518265008926392, "learning_rate": 6.415468945109717e-06, "loss": 0.7379, "step": 11856 }, { "epoch": 2.1325182055200935, "grad_norm": 1.597000241279602, "learning_rate": 6.414910337601858e-06, "loss": 0.6916, "step": 11857 }, { "epoch": 2.1326980131259554, "grad_norm": 1.0686414241790771, "learning_rate": 6.414351710895523e-06, "loss": 0.8863, "step": 11858 }, { "epoch": 2.132877820731817, "grad_norm": 1.1438442468643188, "learning_rate": 6.413793064998289e-06, "loss": 0.8951, "step": 11859 }, { "epoch": 2.1330576283376788, "grad_norm": 0.9954789280891418, "learning_rate": 6.4132343999177405e-06, "loss": 0.8617, "step": 11860 }, { "epoch": 2.13323743594354, "grad_norm": 1.5610628128051758, "learning_rate": 6.412675715661454e-06, "loss": 0.6937, "step": 11861 }, { "epoch": 2.133417243549402, "grad_norm": 1.1711729764938354, "learning_rate": 6.412117012237013e-06, "loss": 0.8912, "step": 11862 }, { "epoch": 2.133597051155264, "grad_norm": 1.4184856414794922, "learning_rate": 6.411558289651995e-06, "loss": 0.6835, "step": 11863 }, { "epoch": 2.1337768587611254, "grad_norm": 1.1313098669052124, "learning_rate": 6.410999547913985e-06, "loss": 0.8505, "step": 11864 }, { "epoch": 2.1339566663669873, "grad_norm": 0.993800938129425, "learning_rate": 6.41044078703056e-06, "loss": 0.9127, "step": 11865 }, { "epoch": 2.1341364739728492, "grad_norm": 1.4903804063796997, "learning_rate": 6.409882007009307e-06, "loss": 0.709, "step": 11866 }, { "epoch": 2.1343162815787107, "grad_norm": 0.9858579039573669, "learning_rate": 6.409323207857803e-06, "loss": 0.8951, "step": 11867 }, { "epoch": 2.1344960891845726, "grad_norm": 1.5971277952194214, "learning_rate": 6.408764389583635e-06, "loss": 0.7014, "step": 11868 }, { "epoch": 2.1346758967904345, "grad_norm": 1.0617566108703613, "learning_rate": 6.408205552194379e-06, "loss": 0.8413, "step": 11869 }, { "epoch": 2.134855704396296, "grad_norm": 1.5662425756454468, "learning_rate": 6.407646695697625e-06, "loss": 0.7165, "step": 11870 }, { "epoch": 2.135035512002158, "grad_norm": 1.486695408821106, "learning_rate": 6.40708782010095e-06, "loss": 0.6179, "step": 11871 }, { "epoch": 2.1352153196080192, "grad_norm": 1.5329807996749878, "learning_rate": 6.406528925411941e-06, "loss": 0.7005, "step": 11872 }, { "epoch": 2.135395127213881, "grad_norm": 1.4909381866455078, "learning_rate": 6.40597001163818e-06, "loss": 0.6404, "step": 11873 }, { "epoch": 2.135574934819743, "grad_norm": 1.1334710121154785, "learning_rate": 6.405411078787251e-06, "loss": 0.8647, "step": 11874 }, { "epoch": 2.1357547424256045, "grad_norm": 1.481060266494751, "learning_rate": 6.404852126866736e-06, "loss": 0.6389, "step": 11875 }, { "epoch": 2.1359345500314664, "grad_norm": 0.9731302857398987, "learning_rate": 6.4042931558842224e-06, "loss": 0.914, "step": 11876 }, { "epoch": 2.1361143576373283, "grad_norm": 1.5458062887191772, "learning_rate": 6.403734165847292e-06, "loss": 0.6932, "step": 11877 }, { "epoch": 2.1362941652431897, "grad_norm": 1.7818186283111572, "learning_rate": 6.4031751567635325e-06, "loss": 0.6676, "step": 11878 }, { "epoch": 2.1364739728490516, "grad_norm": 1.5028395652770996, "learning_rate": 6.402616128640527e-06, "loss": 0.6748, "step": 11879 }, { "epoch": 2.136653780454913, "grad_norm": 1.447310209274292, "learning_rate": 6.40205708148586e-06, "loss": 0.6509, "step": 11880 }, { "epoch": 2.136833588060775, "grad_norm": 1.451502799987793, "learning_rate": 6.401498015307119e-06, "loss": 0.6602, "step": 11881 }, { "epoch": 2.137013395666637, "grad_norm": 1.1252241134643555, "learning_rate": 6.400938930111888e-06, "loss": 0.8844, "step": 11882 }, { "epoch": 2.1371932032724983, "grad_norm": 1.5481778383255005, "learning_rate": 6.400379825907754e-06, "loss": 0.671, "step": 11883 }, { "epoch": 2.13737301087836, "grad_norm": 1.5638236999511719, "learning_rate": 6.3998207027023056e-06, "loss": 0.6919, "step": 11884 }, { "epoch": 2.137552818484222, "grad_norm": 1.524825930595398, "learning_rate": 6.399261560503125e-06, "loss": 0.6532, "step": 11885 }, { "epoch": 2.1377326260900835, "grad_norm": 1.4879707098007202, "learning_rate": 6.398702399317802e-06, "loss": 0.6467, "step": 11886 }, { "epoch": 2.1379124336959454, "grad_norm": 1.0319973230361938, "learning_rate": 6.39814321915392e-06, "loss": 0.8906, "step": 11887 }, { "epoch": 2.138092241301807, "grad_norm": 1.1022777557373047, "learning_rate": 6.397584020019072e-06, "loss": 0.898, "step": 11888 }, { "epoch": 2.1382720489076688, "grad_norm": 0.9926478862762451, "learning_rate": 6.397024801920841e-06, "loss": 0.857, "step": 11889 }, { "epoch": 2.1384518565135306, "grad_norm": 1.5129036903381348, "learning_rate": 6.3964655648668185e-06, "loss": 0.6812, "step": 11890 }, { "epoch": 2.138631664119392, "grad_norm": 1.55315101146698, "learning_rate": 6.395906308864588e-06, "loss": 0.6515, "step": 11891 }, { "epoch": 2.138811471725254, "grad_norm": 1.4359110593795776, "learning_rate": 6.395347033921742e-06, "loss": 0.6497, "step": 11892 }, { "epoch": 2.138991279331116, "grad_norm": 1.4771479368209839, "learning_rate": 6.394787740045868e-06, "loss": 0.7702, "step": 11893 }, { "epoch": 2.1391710869369773, "grad_norm": 1.613619089126587, "learning_rate": 6.394228427244556e-06, "loss": 0.6551, "step": 11894 }, { "epoch": 2.139350894542839, "grad_norm": 1.4591751098632812, "learning_rate": 6.39366909552539e-06, "loss": 0.656, "step": 11895 }, { "epoch": 2.1395307021487007, "grad_norm": 1.5432573556900024, "learning_rate": 6.393109744895966e-06, "loss": 0.6173, "step": 11896 }, { "epoch": 2.1397105097545626, "grad_norm": 1.5638916492462158, "learning_rate": 6.392550375363868e-06, "loss": 0.6603, "step": 11897 }, { "epoch": 2.1398903173604245, "grad_norm": 1.1901015043258667, "learning_rate": 6.391990986936691e-06, "loss": 0.8837, "step": 11898 }, { "epoch": 2.140070124966286, "grad_norm": 1.5571540594100952, "learning_rate": 6.39143157962202e-06, "loss": 0.6761, "step": 11899 }, { "epoch": 2.140249932572148, "grad_norm": 1.506057620048523, "learning_rate": 6.390872153427452e-06, "loss": 0.6527, "step": 11900 }, { "epoch": 2.1404297401780097, "grad_norm": 1.7074882984161377, "learning_rate": 6.390312708360571e-06, "loss": 0.748, "step": 11901 }, { "epoch": 2.140609547783871, "grad_norm": 1.4985322952270508, "learning_rate": 6.389753244428973e-06, "loss": 0.7105, "step": 11902 }, { "epoch": 2.140789355389733, "grad_norm": 1.6177645921707153, "learning_rate": 6.3891937616402446e-06, "loss": 0.6553, "step": 11903 }, { "epoch": 2.140969162995595, "grad_norm": 1.547130823135376, "learning_rate": 6.388634260001982e-06, "loss": 0.661, "step": 11904 }, { "epoch": 2.1411489706014564, "grad_norm": 1.5167841911315918, "learning_rate": 6.388074739521772e-06, "loss": 0.6279, "step": 11905 }, { "epoch": 2.1413287782073183, "grad_norm": 1.0650280714035034, "learning_rate": 6.3875152002072125e-06, "loss": 0.8713, "step": 11906 }, { "epoch": 2.1415085858131797, "grad_norm": 1.0405707359313965, "learning_rate": 6.38695564206589e-06, "loss": 0.8539, "step": 11907 }, { "epoch": 2.1416883934190416, "grad_norm": 1.65547776222229, "learning_rate": 6.386396065105399e-06, "loss": 0.6696, "step": 11908 }, { "epoch": 2.1418682010249035, "grad_norm": 1.1523876190185547, "learning_rate": 6.3858364693333345e-06, "loss": 0.8697, "step": 11909 }, { "epoch": 2.142048008630765, "grad_norm": 1.5618252754211426, "learning_rate": 6.385276854757285e-06, "loss": 0.6795, "step": 11910 }, { "epoch": 2.142227816236627, "grad_norm": 1.5602707862854004, "learning_rate": 6.3847172213848475e-06, "loss": 0.6325, "step": 11911 }, { "epoch": 2.1424076238424887, "grad_norm": 1.0544500350952148, "learning_rate": 6.3841575692236145e-06, "loss": 0.8829, "step": 11912 }, { "epoch": 2.14258743144835, "grad_norm": 1.6120755672454834, "learning_rate": 6.383597898281179e-06, "loss": 0.6962, "step": 11913 }, { "epoch": 2.142767239054212, "grad_norm": 1.642067551612854, "learning_rate": 6.383038208565136e-06, "loss": 0.6614, "step": 11914 }, { "epoch": 2.1429470466600735, "grad_norm": 1.4914562702178955, "learning_rate": 6.382478500083079e-06, "loss": 0.6812, "step": 11915 }, { "epoch": 2.1431268542659354, "grad_norm": 1.4469271898269653, "learning_rate": 6.3819187728426036e-06, "loss": 0.6891, "step": 11916 }, { "epoch": 2.1433066618717973, "grad_norm": 1.490054726600647, "learning_rate": 6.381359026851303e-06, "loss": 0.7347, "step": 11917 }, { "epoch": 2.1434864694776588, "grad_norm": 1.8214188814163208, "learning_rate": 6.380799262116774e-06, "loss": 0.6865, "step": 11918 }, { "epoch": 2.1436662770835206, "grad_norm": 1.5212409496307373, "learning_rate": 6.380239478646609e-06, "loss": 0.6603, "step": 11919 }, { "epoch": 2.1438460846893825, "grad_norm": 1.6141070127487183, "learning_rate": 6.3796796764484045e-06, "loss": 0.6418, "step": 11920 }, { "epoch": 2.144025892295244, "grad_norm": 1.644128441810608, "learning_rate": 6.379119855529758e-06, "loss": 0.711, "step": 11921 }, { "epoch": 2.144205699901106, "grad_norm": 1.1751033067703247, "learning_rate": 6.378560015898266e-06, "loss": 0.8996, "step": 11922 }, { "epoch": 2.1443855075069678, "grad_norm": 1.4169209003448486, "learning_rate": 6.378000157561524e-06, "loss": 0.6843, "step": 11923 }, { "epoch": 2.144565315112829, "grad_norm": 1.5639777183532715, "learning_rate": 6.377440280527126e-06, "loss": 0.7459, "step": 11924 }, { "epoch": 2.144745122718691, "grad_norm": 0.9698696732521057, "learning_rate": 6.376880384802672e-06, "loss": 0.8509, "step": 11925 }, { "epoch": 2.1449249303245526, "grad_norm": 1.4461814165115356, "learning_rate": 6.376320470395757e-06, "loss": 0.6347, "step": 11926 }, { "epoch": 2.1451047379304145, "grad_norm": 1.5130994319915771, "learning_rate": 6.375760537313979e-06, "loss": 0.6607, "step": 11927 }, { "epoch": 2.1452845455362763, "grad_norm": 1.510752558708191, "learning_rate": 6.3752005855649365e-06, "loss": 0.677, "step": 11928 }, { "epoch": 2.145464353142138, "grad_norm": 1.1606016159057617, "learning_rate": 6.374640615156227e-06, "loss": 0.8326, "step": 11929 }, { "epoch": 2.1456441607479997, "grad_norm": 1.6004189252853394, "learning_rate": 6.3740806260954465e-06, "loss": 0.7033, "step": 11930 }, { "epoch": 2.145823968353861, "grad_norm": 1.4454981088638306, "learning_rate": 6.373520618390194e-06, "loss": 0.6817, "step": 11931 }, { "epoch": 2.146003775959723, "grad_norm": 1.4382766485214233, "learning_rate": 6.372960592048072e-06, "loss": 0.662, "step": 11932 }, { "epoch": 2.146183583565585, "grad_norm": 1.5562546253204346, "learning_rate": 6.372400547076675e-06, "loss": 0.7102, "step": 11933 }, { "epoch": 2.1463633911714464, "grad_norm": 1.584141492843628, "learning_rate": 6.3718404834836034e-06, "loss": 0.7102, "step": 11934 }, { "epoch": 2.1465431987773083, "grad_norm": 1.247259497642517, "learning_rate": 6.371280401276456e-06, "loss": 0.8795, "step": 11935 }, { "epoch": 2.14672300638317, "grad_norm": 1.8279805183410645, "learning_rate": 6.370720300462833e-06, "loss": 0.6471, "step": 11936 }, { "epoch": 2.1469028139890316, "grad_norm": 1.125186562538147, "learning_rate": 6.370160181050335e-06, "loss": 0.8227, "step": 11937 }, { "epoch": 2.1470826215948935, "grad_norm": 1.712113857269287, "learning_rate": 6.36960004304656e-06, "loss": 0.6756, "step": 11938 }, { "epoch": 2.1472624292007554, "grad_norm": 1.6145176887512207, "learning_rate": 6.36903988645911e-06, "loss": 0.6446, "step": 11939 }, { "epoch": 2.147442236806617, "grad_norm": 1.4489223957061768, "learning_rate": 6.3684797112955856e-06, "loss": 0.6104, "step": 11940 }, { "epoch": 2.1476220444124787, "grad_norm": 1.5071167945861816, "learning_rate": 6.367919517563587e-06, "loss": 0.6317, "step": 11941 }, { "epoch": 2.14780185201834, "grad_norm": 1.526929497718811, "learning_rate": 6.367359305270714e-06, "loss": 0.7187, "step": 11942 }, { "epoch": 2.147981659624202, "grad_norm": 1.0618681907653809, "learning_rate": 6.36679907442457e-06, "loss": 0.8425, "step": 11943 }, { "epoch": 2.148161467230064, "grad_norm": 1.6872806549072266, "learning_rate": 6.366238825032756e-06, "loss": 0.7143, "step": 11944 }, { "epoch": 2.1483412748359254, "grad_norm": 1.6491459608078003, "learning_rate": 6.365678557102875e-06, "loss": 0.7195, "step": 11945 }, { "epoch": 2.1485210824417873, "grad_norm": 1.5473676919937134, "learning_rate": 6.365118270642528e-06, "loss": 0.7203, "step": 11946 }, { "epoch": 2.148700890047649, "grad_norm": 1.6671072244644165, "learning_rate": 6.364557965659316e-06, "loss": 0.7565, "step": 11947 }, { "epoch": 2.1488806976535106, "grad_norm": 1.4761189222335815, "learning_rate": 6.363997642160844e-06, "loss": 0.7222, "step": 11948 }, { "epoch": 2.1490605052593725, "grad_norm": 1.4752799272537231, "learning_rate": 6.363437300154712e-06, "loss": 0.7127, "step": 11949 }, { "epoch": 2.149240312865234, "grad_norm": 1.5484281778335571, "learning_rate": 6.3628769396485265e-06, "loss": 0.6715, "step": 11950 }, { "epoch": 2.149420120471096, "grad_norm": 1.6199231147766113, "learning_rate": 6.3623165606498886e-06, "loss": 0.6784, "step": 11951 }, { "epoch": 2.1495999280769578, "grad_norm": 1.651742935180664, "learning_rate": 6.3617561631664015e-06, "loss": 0.6923, "step": 11952 }, { "epoch": 2.149779735682819, "grad_norm": 1.0737886428833008, "learning_rate": 6.3611957472056716e-06, "loss": 0.895, "step": 11953 }, { "epoch": 2.149959543288681, "grad_norm": 1.5956040620803833, "learning_rate": 6.360635312775302e-06, "loss": 0.7456, "step": 11954 }, { "epoch": 2.150139350894543, "grad_norm": 1.56465482711792, "learning_rate": 6.3600748598828945e-06, "loss": 0.666, "step": 11955 }, { "epoch": 2.1503191585004044, "grad_norm": 1.42697274684906, "learning_rate": 6.3595143885360575e-06, "loss": 0.7481, "step": 11956 }, { "epoch": 2.1504989661062663, "grad_norm": 0.9995969533920288, "learning_rate": 6.358953898742393e-06, "loss": 0.8815, "step": 11957 }, { "epoch": 2.1506787737121282, "grad_norm": 0.9687403440475464, "learning_rate": 6.358393390509509e-06, "loss": 0.8552, "step": 11958 }, { "epoch": 2.1508585813179897, "grad_norm": 1.3773455619812012, "learning_rate": 6.3578328638450075e-06, "loss": 0.5728, "step": 11959 }, { "epoch": 2.1510383889238516, "grad_norm": 1.6785776615142822, "learning_rate": 6.357272318756495e-06, "loss": 0.6264, "step": 11960 }, { "epoch": 2.151218196529713, "grad_norm": 1.4956111907958984, "learning_rate": 6.35671175525158e-06, "loss": 0.7232, "step": 11961 }, { "epoch": 2.151398004135575, "grad_norm": 1.4660565853118896, "learning_rate": 6.356151173337865e-06, "loss": 0.7187, "step": 11962 }, { "epoch": 2.151577811741437, "grad_norm": 1.4694112539291382, "learning_rate": 6.35559057302296e-06, "loss": 0.6103, "step": 11963 }, { "epoch": 2.1517576193472983, "grad_norm": 1.653381586074829, "learning_rate": 6.355029954314468e-06, "loss": 0.6431, "step": 11964 }, { "epoch": 2.15193742695316, "grad_norm": 1.074655532836914, "learning_rate": 6.354469317219997e-06, "loss": 0.8818, "step": 11965 }, { "epoch": 2.152117234559022, "grad_norm": 1.550134539604187, "learning_rate": 6.353908661747155e-06, "loss": 0.6656, "step": 11966 }, { "epoch": 2.1522970421648835, "grad_norm": 1.6124151945114136, "learning_rate": 6.35334798790355e-06, "loss": 0.6892, "step": 11967 }, { "epoch": 2.1524768497707454, "grad_norm": 1.6162655353546143, "learning_rate": 6.3527872956967885e-06, "loss": 0.6653, "step": 11968 }, { "epoch": 2.152656657376607, "grad_norm": 1.544651985168457, "learning_rate": 6.352226585134478e-06, "loss": 0.6372, "step": 11969 }, { "epoch": 2.1528364649824687, "grad_norm": 1.5061352252960205, "learning_rate": 6.351665856224226e-06, "loss": 0.6957, "step": 11970 }, { "epoch": 2.1530162725883306, "grad_norm": 1.219308614730835, "learning_rate": 6.351105108973644e-06, "loss": 0.8849, "step": 11971 }, { "epoch": 2.153196080194192, "grad_norm": 1.5205193758010864, "learning_rate": 6.3505443433903365e-06, "loss": 0.6537, "step": 11972 }, { "epoch": 2.153375887800054, "grad_norm": 1.0259431600570679, "learning_rate": 6.349983559481917e-06, "loss": 0.906, "step": 11973 }, { "epoch": 2.153555695405916, "grad_norm": 1.511903166770935, "learning_rate": 6.3494227572559895e-06, "loss": 0.5992, "step": 11974 }, { "epoch": 2.1537355030117773, "grad_norm": 1.5349643230438232, "learning_rate": 6.348861936720166e-06, "loss": 0.7114, "step": 11975 }, { "epoch": 2.153915310617639, "grad_norm": 1.5704821348190308, "learning_rate": 6.348301097882056e-06, "loss": 0.7013, "step": 11976 }, { "epoch": 2.154095118223501, "grad_norm": 1.5270130634307861, "learning_rate": 6.347740240749271e-06, "loss": 0.6642, "step": 11977 }, { "epoch": 2.1542749258293625, "grad_norm": 1.4334913492202759, "learning_rate": 6.347179365329417e-06, "loss": 0.7367, "step": 11978 }, { "epoch": 2.1544547334352244, "grad_norm": 1.698441505432129, "learning_rate": 6.346618471630108e-06, "loss": 0.6996, "step": 11979 }, { "epoch": 2.154634541041086, "grad_norm": 1.5096794366836548, "learning_rate": 6.3460575596589535e-06, "loss": 0.687, "step": 11980 }, { "epoch": 2.1548143486469478, "grad_norm": 1.541967511177063, "learning_rate": 6.345496629423564e-06, "loss": 0.6541, "step": 11981 }, { "epoch": 2.1549941562528097, "grad_norm": 1.5896517038345337, "learning_rate": 6.34493568093155e-06, "loss": 0.6481, "step": 11982 }, { "epoch": 2.155173963858671, "grad_norm": 1.5671874284744263, "learning_rate": 6.344374714190524e-06, "loss": 0.6704, "step": 11983 }, { "epoch": 2.155353771464533, "grad_norm": 1.610787272453308, "learning_rate": 6.343813729208097e-06, "loss": 0.7442, "step": 11984 }, { "epoch": 2.1555335790703944, "grad_norm": 1.6011747121810913, "learning_rate": 6.343252725991882e-06, "loss": 0.6413, "step": 11985 }, { "epoch": 2.1557133866762563, "grad_norm": 1.0981041193008423, "learning_rate": 6.342691704549489e-06, "loss": 0.8517, "step": 11986 }, { "epoch": 2.1558931942821182, "grad_norm": 1.5654268264770508, "learning_rate": 6.342130664888531e-06, "loss": 0.694, "step": 11987 }, { "epoch": 2.1560730018879797, "grad_norm": 1.5434867143630981, "learning_rate": 6.341569607016621e-06, "loss": 0.6781, "step": 11988 }, { "epoch": 2.1562528094938416, "grad_norm": 1.5322072505950928, "learning_rate": 6.341008530941373e-06, "loss": 0.7243, "step": 11989 }, { "epoch": 2.1564326170997035, "grad_norm": 1.5108468532562256, "learning_rate": 6.340447436670397e-06, "loss": 0.6759, "step": 11990 }, { "epoch": 2.156612424705565, "grad_norm": 1.5362874269485474, "learning_rate": 6.339886324211311e-06, "loss": 0.7008, "step": 11991 }, { "epoch": 2.156792232311427, "grad_norm": 1.4686328172683716, "learning_rate": 6.3393251935717225e-06, "loss": 0.6856, "step": 11992 }, { "epoch": 2.1569720399172887, "grad_norm": 1.1133043766021729, "learning_rate": 6.3387640447592505e-06, "loss": 0.907, "step": 11993 }, { "epoch": 2.15715184752315, "grad_norm": 1.5375638008117676, "learning_rate": 6.338202877781506e-06, "loss": 0.669, "step": 11994 }, { "epoch": 2.157331655129012, "grad_norm": 1.5983002185821533, "learning_rate": 6.337641692646106e-06, "loss": 0.6404, "step": 11995 }, { "epoch": 2.1575114627348735, "grad_norm": 1.5961238145828247, "learning_rate": 6.33708048936066e-06, "loss": 0.6523, "step": 11996 }, { "epoch": 2.1576912703407354, "grad_norm": 1.5679004192352295, "learning_rate": 6.336519267932789e-06, "loss": 0.6869, "step": 11997 }, { "epoch": 2.1578710779465973, "grad_norm": 1.415302038192749, "learning_rate": 6.335958028370104e-06, "loss": 0.6287, "step": 11998 }, { "epoch": 2.1580508855524587, "grad_norm": 1.7844470739364624, "learning_rate": 6.335396770680222e-06, "loss": 0.7316, "step": 11999 }, { "epoch": 2.1582306931583206, "grad_norm": 1.5891673564910889, "learning_rate": 6.334835494870759e-06, "loss": 0.7361, "step": 12000 }, { "epoch": 2.1582306931583206, "eval_loss": 0.7876812219619751, "eval_runtime": 150.5924, "eval_samples_per_second": 95.503, "eval_steps_per_second": 1.494, "step": 12000 }, { "epoch": 2.1584105007641825, "grad_norm": 1.5580545663833618, "learning_rate": 6.334274200949328e-06, "loss": 0.7108, "step": 12001 }, { "epoch": 2.158590308370044, "grad_norm": 1.043488621711731, "learning_rate": 6.333712888923549e-06, "loss": 0.8266, "step": 12002 }, { "epoch": 2.158770115975906, "grad_norm": 1.5257357358932495, "learning_rate": 6.333151558801035e-06, "loss": 0.6653, "step": 12003 }, { "epoch": 2.1589499235817673, "grad_norm": 1.1452361345291138, "learning_rate": 6.332590210589404e-06, "loss": 0.8636, "step": 12004 }, { "epoch": 2.159129731187629, "grad_norm": 1.5110243558883667, "learning_rate": 6.3320288442962715e-06, "loss": 0.663, "step": 12005 }, { "epoch": 2.159309538793491, "grad_norm": 1.5311797857284546, "learning_rate": 6.331467459929256e-06, "loss": 0.6488, "step": 12006 }, { "epoch": 2.1594893463993525, "grad_norm": 1.5602091550827026, "learning_rate": 6.3309060574959734e-06, "loss": 0.675, "step": 12007 }, { "epoch": 2.1596691540052144, "grad_norm": 1.594590425491333, "learning_rate": 6.330344637004042e-06, "loss": 0.6303, "step": 12008 }, { "epoch": 2.1598489616110763, "grad_norm": 0.962834894657135, "learning_rate": 6.32978319846108e-06, "loss": 0.8911, "step": 12009 }, { "epoch": 2.1600287692169378, "grad_norm": 1.562026858329773, "learning_rate": 6.329221741874705e-06, "loss": 0.6324, "step": 12010 }, { "epoch": 2.1602085768227997, "grad_norm": 1.5669783353805542, "learning_rate": 6.328660267252535e-06, "loss": 0.7503, "step": 12011 }, { "epoch": 2.1603883844286615, "grad_norm": 1.5140461921691895, "learning_rate": 6.328098774602188e-06, "loss": 0.6492, "step": 12012 }, { "epoch": 2.160568192034523, "grad_norm": 1.5273176431655884, "learning_rate": 6.327537263931285e-06, "loss": 0.7218, "step": 12013 }, { "epoch": 2.160747999640385, "grad_norm": 1.4816946983337402, "learning_rate": 6.326975735247441e-06, "loss": 0.6313, "step": 12014 }, { "epoch": 2.1609278072462463, "grad_norm": 1.6194167137145996, "learning_rate": 6.326414188558279e-06, "loss": 0.6433, "step": 12015 }, { "epoch": 2.1611076148521082, "grad_norm": 1.5609670877456665, "learning_rate": 6.325852623871416e-06, "loss": 0.6624, "step": 12016 }, { "epoch": 2.16128742245797, "grad_norm": 1.491760015487671, "learning_rate": 6.325291041194473e-06, "loss": 0.66, "step": 12017 }, { "epoch": 2.1614672300638316, "grad_norm": 1.4473137855529785, "learning_rate": 6.324729440535069e-06, "loss": 0.7053, "step": 12018 }, { "epoch": 2.1616470376696935, "grad_norm": 1.6393494606018066, "learning_rate": 6.324167821900825e-06, "loss": 0.6913, "step": 12019 }, { "epoch": 2.1618268452755554, "grad_norm": 1.535306453704834, "learning_rate": 6.32360618529936e-06, "loss": 0.7016, "step": 12020 }, { "epoch": 2.162006652881417, "grad_norm": 1.5114402770996094, "learning_rate": 6.323044530738298e-06, "loss": 0.7121, "step": 12021 }, { "epoch": 2.1621864604872787, "grad_norm": 1.459810495376587, "learning_rate": 6.322482858225256e-06, "loss": 0.683, "step": 12022 }, { "epoch": 2.16236626809314, "grad_norm": 1.48292875289917, "learning_rate": 6.32192116776786e-06, "loss": 0.6836, "step": 12023 }, { "epoch": 2.162546075699002, "grad_norm": 1.537438154220581, "learning_rate": 6.321359459373725e-06, "loss": 0.7158, "step": 12024 }, { "epoch": 2.162725883304864, "grad_norm": 1.0375553369522095, "learning_rate": 6.320797733050476e-06, "loss": 0.8204, "step": 12025 }, { "epoch": 2.1629056909107254, "grad_norm": 1.545062780380249, "learning_rate": 6.3202359888057365e-06, "loss": 0.6079, "step": 12026 }, { "epoch": 2.1630854985165873, "grad_norm": 1.5888481140136719, "learning_rate": 6.3196742266471265e-06, "loss": 0.6589, "step": 12027 }, { "epoch": 2.163265306122449, "grad_norm": 1.6565444469451904, "learning_rate": 6.319112446582268e-06, "loss": 0.6679, "step": 12028 }, { "epoch": 2.1634451137283106, "grad_norm": 1.5247421264648438, "learning_rate": 6.318550648618785e-06, "loss": 0.6927, "step": 12029 }, { "epoch": 2.1636249213341725, "grad_norm": 1.028867244720459, "learning_rate": 6.3179888327642995e-06, "loss": 0.8565, "step": 12030 }, { "epoch": 2.1638047289400344, "grad_norm": 1.636013150215149, "learning_rate": 6.317426999026436e-06, "loss": 0.7154, "step": 12031 }, { "epoch": 2.163984536545896, "grad_norm": 1.44709050655365, "learning_rate": 6.316865147412816e-06, "loss": 0.7064, "step": 12032 }, { "epoch": 2.1641643441517577, "grad_norm": 1.574623703956604, "learning_rate": 6.316303277931064e-06, "loss": 0.6995, "step": 12033 }, { "epoch": 2.164344151757619, "grad_norm": 1.066413164138794, "learning_rate": 6.315741390588803e-06, "loss": 0.8476, "step": 12034 }, { "epoch": 2.164523959363481, "grad_norm": 1.5602643489837646, "learning_rate": 6.315179485393659e-06, "loss": 0.714, "step": 12035 }, { "epoch": 2.164703766969343, "grad_norm": 1.4274498224258423, "learning_rate": 6.314617562353254e-06, "loss": 0.6424, "step": 12036 }, { "epoch": 2.1648835745752044, "grad_norm": 1.6498280763626099, "learning_rate": 6.314055621475214e-06, "loss": 0.7038, "step": 12037 }, { "epoch": 2.1650633821810663, "grad_norm": 1.569247841835022, "learning_rate": 6.3134936627671635e-06, "loss": 0.6614, "step": 12038 }, { "epoch": 2.1652431897869278, "grad_norm": 1.0151339769363403, "learning_rate": 6.312931686236729e-06, "loss": 0.9069, "step": 12039 }, { "epoch": 2.1654229973927897, "grad_norm": 1.8055471181869507, "learning_rate": 6.312369691891532e-06, "loss": 0.7194, "step": 12040 }, { "epoch": 2.1656028049986515, "grad_norm": 1.8940855264663696, "learning_rate": 6.3118076797392004e-06, "loss": 0.7123, "step": 12041 }, { "epoch": 2.165782612604513, "grad_norm": 1.7254102230072021, "learning_rate": 6.31124564978736e-06, "loss": 0.73, "step": 12042 }, { "epoch": 2.165962420210375, "grad_norm": 1.508174180984497, "learning_rate": 6.310683602043638e-06, "loss": 0.6667, "step": 12043 }, { "epoch": 2.1661422278162368, "grad_norm": 1.4868680238723755, "learning_rate": 6.310121536515658e-06, "loss": 0.6308, "step": 12044 }, { "epoch": 2.1663220354220982, "grad_norm": 1.5910488367080688, "learning_rate": 6.309559453211049e-06, "loss": 0.7462, "step": 12045 }, { "epoch": 2.16650184302796, "grad_norm": 1.63920259475708, "learning_rate": 6.308997352137435e-06, "loss": 0.6815, "step": 12046 }, { "epoch": 2.166681650633822, "grad_norm": 1.5039281845092773, "learning_rate": 6.308435233302446e-06, "loss": 0.6505, "step": 12047 }, { "epoch": 2.1668614582396835, "grad_norm": 1.5543878078460693, "learning_rate": 6.307873096713707e-06, "loss": 0.6437, "step": 12048 }, { "epoch": 2.1670412658455454, "grad_norm": 1.6179149150848389, "learning_rate": 6.307310942378847e-06, "loss": 0.75, "step": 12049 }, { "epoch": 2.167221073451407, "grad_norm": 1.5104535818099976, "learning_rate": 6.306748770305491e-06, "loss": 0.6761, "step": 12050 }, { "epoch": 2.1674008810572687, "grad_norm": 1.6268113851547241, "learning_rate": 6.30618658050127e-06, "loss": 0.6774, "step": 12051 }, { "epoch": 2.1675806886631306, "grad_norm": 1.565356731414795, "learning_rate": 6.305624372973811e-06, "loss": 0.71, "step": 12052 }, { "epoch": 2.167760496268992, "grad_norm": 1.5329300165176392, "learning_rate": 6.305062147730743e-06, "loss": 0.6226, "step": 12053 }, { "epoch": 2.167940303874854, "grad_norm": 1.492556095123291, "learning_rate": 6.304499904779693e-06, "loss": 0.6339, "step": 12054 }, { "epoch": 2.168120111480716, "grad_norm": 1.4778201580047607, "learning_rate": 6.303937644128292e-06, "loss": 0.6823, "step": 12055 }, { "epoch": 2.1682999190865773, "grad_norm": 1.4624885320663452, "learning_rate": 6.303375365784167e-06, "loss": 0.7013, "step": 12056 }, { "epoch": 2.168479726692439, "grad_norm": 1.4942878484725952, "learning_rate": 6.302813069754949e-06, "loss": 0.6925, "step": 12057 }, { "epoch": 2.1686595342983006, "grad_norm": 1.654660701751709, "learning_rate": 6.302250756048267e-06, "loss": 0.6271, "step": 12058 }, { "epoch": 2.1688393419041625, "grad_norm": 1.4932103157043457, "learning_rate": 6.301688424671751e-06, "loss": 0.6644, "step": 12059 }, { "epoch": 2.1690191495100244, "grad_norm": 1.2071928977966309, "learning_rate": 6.3011260756330304e-06, "loss": 0.8566, "step": 12060 }, { "epoch": 2.169198957115886, "grad_norm": 1.6373839378356934, "learning_rate": 6.300563708939738e-06, "loss": 0.71, "step": 12061 }, { "epoch": 2.1693787647217477, "grad_norm": 1.4687997102737427, "learning_rate": 6.3000013245995e-06, "loss": 0.6386, "step": 12062 }, { "epoch": 2.1695585723276096, "grad_norm": 1.4847750663757324, "learning_rate": 6.2994389226199525e-06, "loss": 0.6715, "step": 12063 }, { "epoch": 2.169738379933471, "grad_norm": 1.5495414733886719, "learning_rate": 6.298876503008722e-06, "loss": 0.6673, "step": 12064 }, { "epoch": 2.169918187539333, "grad_norm": 1.4734928607940674, "learning_rate": 6.2983140657734436e-06, "loss": 0.6233, "step": 12065 }, { "epoch": 2.170097995145195, "grad_norm": 1.5451815128326416, "learning_rate": 6.297751610921745e-06, "loss": 0.7586, "step": 12066 }, { "epoch": 2.1702778027510563, "grad_norm": 1.5536342859268188, "learning_rate": 6.297189138461262e-06, "loss": 0.6497, "step": 12067 }, { "epoch": 2.170457610356918, "grad_norm": 1.4777554273605347, "learning_rate": 6.296626648399622e-06, "loss": 0.62, "step": 12068 }, { "epoch": 2.1706374179627796, "grad_norm": 1.5265107154846191, "learning_rate": 6.296064140744461e-06, "loss": 0.669, "step": 12069 }, { "epoch": 2.1708172255686415, "grad_norm": 1.5150353908538818, "learning_rate": 6.29550161550341e-06, "loss": 0.7147, "step": 12070 }, { "epoch": 2.1709970331745034, "grad_norm": 1.5397489070892334, "learning_rate": 6.294939072684102e-06, "loss": 0.6963, "step": 12071 }, { "epoch": 2.171176840780365, "grad_norm": 1.4411333799362183, "learning_rate": 6.294376512294169e-06, "loss": 0.6409, "step": 12072 }, { "epoch": 2.1713566483862268, "grad_norm": 1.5376968383789062, "learning_rate": 6.293813934341246e-06, "loss": 0.6927, "step": 12073 }, { "epoch": 2.1715364559920887, "grad_norm": 1.4969751834869385, "learning_rate": 6.293251338832965e-06, "loss": 0.6871, "step": 12074 }, { "epoch": 2.17171626359795, "grad_norm": 1.5635671615600586, "learning_rate": 6.292688725776962e-06, "loss": 0.7063, "step": 12075 }, { "epoch": 2.171896071203812, "grad_norm": 1.1276479959487915, "learning_rate": 6.2921260951808676e-06, "loss": 0.8692, "step": 12076 }, { "epoch": 2.1720758788096735, "grad_norm": 1.4927948713302612, "learning_rate": 6.291563447052318e-06, "loss": 0.679, "step": 12077 }, { "epoch": 2.1722556864155353, "grad_norm": 1.463565707206726, "learning_rate": 6.291000781398947e-06, "loss": 0.6887, "step": 12078 }, { "epoch": 2.1724354940213972, "grad_norm": 1.5447865724563599, "learning_rate": 6.29043809822839e-06, "loss": 0.6601, "step": 12079 }, { "epoch": 2.1726153016272587, "grad_norm": 1.143075942993164, "learning_rate": 6.2898753975482795e-06, "loss": 0.8392, "step": 12080 }, { "epoch": 2.1727951092331206, "grad_norm": 1.4350684881210327, "learning_rate": 6.289312679366255e-06, "loss": 0.6038, "step": 12081 }, { "epoch": 2.1729749168389825, "grad_norm": 1.5532135963439941, "learning_rate": 6.2887499436899465e-06, "loss": 0.651, "step": 12082 }, { "epoch": 2.173154724444844, "grad_norm": 1.5910965204238892, "learning_rate": 6.288187190526993e-06, "loss": 0.6584, "step": 12083 }, { "epoch": 2.173334532050706, "grad_norm": 1.6263582706451416, "learning_rate": 6.28762441988503e-06, "loss": 0.7601, "step": 12084 }, { "epoch": 2.1735143396565677, "grad_norm": 1.0666803121566772, "learning_rate": 6.287061631771693e-06, "loss": 0.8939, "step": 12085 }, { "epoch": 2.173694147262429, "grad_norm": 1.6007033586502075, "learning_rate": 6.286498826194619e-06, "loss": 0.6854, "step": 12086 }, { "epoch": 2.173873954868291, "grad_norm": 1.5595624446868896, "learning_rate": 6.285936003161445e-06, "loss": 0.6738, "step": 12087 }, { "epoch": 2.1740537624741525, "grad_norm": 1.4602575302124023, "learning_rate": 6.285373162679804e-06, "loss": 0.6081, "step": 12088 }, { "epoch": 2.1742335700800144, "grad_norm": 1.6882050037384033, "learning_rate": 6.2848103047573386e-06, "loss": 0.7126, "step": 12089 }, { "epoch": 2.1744133776858763, "grad_norm": 1.5298577547073364, "learning_rate": 6.2842474294016816e-06, "loss": 0.6726, "step": 12090 }, { "epoch": 2.1745931852917377, "grad_norm": 1.42350435256958, "learning_rate": 6.283684536620472e-06, "loss": 0.6447, "step": 12091 }, { "epoch": 2.1747729928975996, "grad_norm": 1.548391342163086, "learning_rate": 6.2831216264213476e-06, "loss": 0.686, "step": 12092 }, { "epoch": 2.174952800503461, "grad_norm": 1.526084065437317, "learning_rate": 6.282558698811948e-06, "loss": 0.683, "step": 12093 }, { "epoch": 2.175132608109323, "grad_norm": 1.1280121803283691, "learning_rate": 6.281995753799908e-06, "loss": 0.8827, "step": 12094 }, { "epoch": 2.175312415715185, "grad_norm": 1.6842701435089111, "learning_rate": 6.281432791392867e-06, "loss": 0.7712, "step": 12095 }, { "epoch": 2.1754922233210463, "grad_norm": 1.1298761367797852, "learning_rate": 6.280869811598465e-06, "loss": 0.9057, "step": 12096 }, { "epoch": 2.175672030926908, "grad_norm": 1.4985167980194092, "learning_rate": 6.280306814424342e-06, "loss": 0.7407, "step": 12097 }, { "epoch": 2.17585183853277, "grad_norm": 1.539901852607727, "learning_rate": 6.2797437998781355e-06, "loss": 0.6768, "step": 12098 }, { "epoch": 2.1760316461386315, "grad_norm": 1.0568723678588867, "learning_rate": 6.279180767967482e-06, "loss": 0.8478, "step": 12099 }, { "epoch": 2.1762114537444934, "grad_norm": 1.5838881731033325, "learning_rate": 6.278617718700027e-06, "loss": 0.6844, "step": 12100 }, { "epoch": 2.1763912613503553, "grad_norm": 1.553257703781128, "learning_rate": 6.278054652083405e-06, "loss": 0.6567, "step": 12101 }, { "epoch": 2.1765710689562168, "grad_norm": 1.0536348819732666, "learning_rate": 6.2774915681252604e-06, "loss": 0.9226, "step": 12102 }, { "epoch": 2.1767508765620787, "grad_norm": 2.0578670501708984, "learning_rate": 6.276928466833229e-06, "loss": 0.7139, "step": 12103 }, { "epoch": 2.17693068416794, "grad_norm": 1.6958316564559937, "learning_rate": 6.2763653482149565e-06, "loss": 0.6934, "step": 12104 }, { "epoch": 2.177110491773802, "grad_norm": 1.6213948726654053, "learning_rate": 6.275802212278079e-06, "loss": 0.7141, "step": 12105 }, { "epoch": 2.177290299379664, "grad_norm": 1.5020744800567627, "learning_rate": 6.27523905903024e-06, "loss": 0.618, "step": 12106 }, { "epoch": 2.1774701069855253, "grad_norm": 1.6448436975479126, "learning_rate": 6.27467588847908e-06, "loss": 0.6541, "step": 12107 }, { "epoch": 2.1776499145913872, "grad_norm": 1.5129406452178955, "learning_rate": 6.274112700632242e-06, "loss": 0.7351, "step": 12108 }, { "epoch": 2.177829722197249, "grad_norm": 1.5292704105377197, "learning_rate": 6.273549495497365e-06, "loss": 0.7549, "step": 12109 }, { "epoch": 2.1780095298031106, "grad_norm": 1.4777835607528687, "learning_rate": 6.272986273082095e-06, "loss": 0.6716, "step": 12110 }, { "epoch": 2.1781893374089725, "grad_norm": 1.6005189418792725, "learning_rate": 6.272423033394068e-06, "loss": 0.7123, "step": 12111 }, { "epoch": 2.178369145014834, "grad_norm": 1.6062836647033691, "learning_rate": 6.271859776440933e-06, "loss": 0.7547, "step": 12112 }, { "epoch": 2.178548952620696, "grad_norm": 1.5496046543121338, "learning_rate": 6.2712965022303275e-06, "loss": 0.658, "step": 12113 }, { "epoch": 2.1787287602265577, "grad_norm": 1.5669448375701904, "learning_rate": 6.270733210769898e-06, "loss": 0.7175, "step": 12114 }, { "epoch": 2.178908567832419, "grad_norm": 1.6139366626739502, "learning_rate": 6.270169902067286e-06, "loss": 0.7114, "step": 12115 }, { "epoch": 2.179088375438281, "grad_norm": 1.6178734302520752, "learning_rate": 6.269606576130135e-06, "loss": 0.6392, "step": 12116 }, { "epoch": 2.179268183044143, "grad_norm": 1.5322911739349365, "learning_rate": 6.269043232966087e-06, "loss": 0.665, "step": 12117 }, { "epoch": 2.1794479906500044, "grad_norm": 1.5497430562973022, "learning_rate": 6.268479872582789e-06, "loss": 0.6225, "step": 12118 }, { "epoch": 2.1796277982558663, "grad_norm": 1.5370391607284546, "learning_rate": 6.267916494987883e-06, "loss": 0.6632, "step": 12119 }, { "epoch": 2.179807605861728, "grad_norm": 1.6303009986877441, "learning_rate": 6.2673531001890154e-06, "loss": 0.73, "step": 12120 }, { "epoch": 2.1799874134675896, "grad_norm": 1.5235437154769897, "learning_rate": 6.266789688193828e-06, "loss": 0.6564, "step": 12121 }, { "epoch": 2.1801672210734515, "grad_norm": 1.5180824995040894, "learning_rate": 6.266226259009967e-06, "loss": 0.6145, "step": 12122 }, { "epoch": 2.180347028679313, "grad_norm": 1.1832424402236938, "learning_rate": 6.265662812645077e-06, "loss": 0.8798, "step": 12123 }, { "epoch": 2.180526836285175, "grad_norm": 1.48867928981781, "learning_rate": 6.265099349106804e-06, "loss": 0.6588, "step": 12124 }, { "epoch": 2.1807066438910367, "grad_norm": 1.4400441646575928, "learning_rate": 6.264535868402791e-06, "loss": 0.6522, "step": 12125 }, { "epoch": 2.180886451496898, "grad_norm": 1.5380260944366455, "learning_rate": 6.263972370540687e-06, "loss": 0.657, "step": 12126 }, { "epoch": 2.18106625910276, "grad_norm": 1.4775726795196533, "learning_rate": 6.263408855528136e-06, "loss": 0.6569, "step": 12127 }, { "epoch": 2.181246066708622, "grad_norm": 1.1965574026107788, "learning_rate": 6.262845323372784e-06, "loss": 0.833, "step": 12128 }, { "epoch": 2.1814258743144834, "grad_norm": 1.8421835899353027, "learning_rate": 6.2622817740822786e-06, "loss": 0.7231, "step": 12129 }, { "epoch": 2.1816056819203453, "grad_norm": 1.506753921508789, "learning_rate": 6.261718207664267e-06, "loss": 0.692, "step": 12130 }, { "epoch": 2.1817854895262068, "grad_norm": 1.6190375089645386, "learning_rate": 6.2611546241263934e-06, "loss": 0.6725, "step": 12131 }, { "epoch": 2.1819652971320687, "grad_norm": 1.521877408027649, "learning_rate": 6.260591023476307e-06, "loss": 0.6897, "step": 12132 }, { "epoch": 2.1821451047379306, "grad_norm": 1.5659205913543701, "learning_rate": 6.260027405721654e-06, "loss": 0.6936, "step": 12133 }, { "epoch": 2.182324912343792, "grad_norm": 1.127922534942627, "learning_rate": 6.259463770870082e-06, "loss": 0.8583, "step": 12134 }, { "epoch": 2.182504719949654, "grad_norm": 1.513753056526184, "learning_rate": 6.25890011892924e-06, "loss": 0.6721, "step": 12135 }, { "epoch": 2.182684527555516, "grad_norm": 1.407103419303894, "learning_rate": 6.258336449906775e-06, "loss": 0.6531, "step": 12136 }, { "epoch": 2.1828643351613772, "grad_norm": 1.6014317274093628, "learning_rate": 6.257772763810336e-06, "loss": 0.706, "step": 12137 }, { "epoch": 2.183044142767239, "grad_norm": 1.4878522157669067, "learning_rate": 6.25720906064757e-06, "loss": 0.6627, "step": 12138 }, { "epoch": 2.1832239503731006, "grad_norm": 1.5750641822814941, "learning_rate": 6.256645340426126e-06, "loss": 0.6922, "step": 12139 }, { "epoch": 2.1834037579789625, "grad_norm": 1.1909621953964233, "learning_rate": 6.256081603153656e-06, "loss": 0.862, "step": 12140 }, { "epoch": 2.1835835655848244, "grad_norm": 1.5564274787902832, "learning_rate": 6.2555178488378045e-06, "loss": 0.6666, "step": 12141 }, { "epoch": 2.183763373190686, "grad_norm": 1.6005984544754028, "learning_rate": 6.254954077486226e-06, "loss": 0.6512, "step": 12142 }, { "epoch": 2.1839431807965477, "grad_norm": 1.4591845273971558, "learning_rate": 6.254390289106565e-06, "loss": 0.5867, "step": 12143 }, { "epoch": 2.1841229884024096, "grad_norm": 1.4356646537780762, "learning_rate": 6.253826483706474e-06, "loss": 0.6227, "step": 12144 }, { "epoch": 2.184302796008271, "grad_norm": 1.5230770111083984, "learning_rate": 6.2532626612936035e-06, "loss": 0.6352, "step": 12145 }, { "epoch": 2.184482603614133, "grad_norm": 1.5383633375167847, "learning_rate": 6.2526988218756035e-06, "loss": 0.6818, "step": 12146 }, { "epoch": 2.1846624112199944, "grad_norm": 1.5639231204986572, "learning_rate": 6.252134965460123e-06, "loss": 0.648, "step": 12147 }, { "epoch": 2.1848422188258563, "grad_norm": 1.6877689361572266, "learning_rate": 6.251571092054814e-06, "loss": 0.6787, "step": 12148 }, { "epoch": 2.185022026431718, "grad_norm": 1.6149497032165527, "learning_rate": 6.251007201667328e-06, "loss": 0.6785, "step": 12149 }, { "epoch": 2.1852018340375796, "grad_norm": 1.1910138130187988, "learning_rate": 6.250443294305315e-06, "loss": 0.8798, "step": 12150 }, { "epoch": 2.1853816416434415, "grad_norm": 1.4640684127807617, "learning_rate": 6.249879369976428e-06, "loss": 0.7148, "step": 12151 }, { "epoch": 2.1855614492493034, "grad_norm": 1.6059802770614624, "learning_rate": 6.2493154286883186e-06, "loss": 0.6809, "step": 12152 }, { "epoch": 2.185741256855165, "grad_norm": 1.5387985706329346, "learning_rate": 6.2487514704486375e-06, "loss": 0.6461, "step": 12153 }, { "epoch": 2.1859210644610267, "grad_norm": 1.1533774137496948, "learning_rate": 6.248187495265038e-06, "loss": 0.8367, "step": 12154 }, { "epoch": 2.1861008720668886, "grad_norm": 1.591949224472046, "learning_rate": 6.247623503145171e-06, "loss": 0.7569, "step": 12155 }, { "epoch": 2.18628067967275, "grad_norm": 1.5568077564239502, "learning_rate": 6.247059494096691e-06, "loss": 0.7063, "step": 12156 }, { "epoch": 2.186460487278612, "grad_norm": 1.6011394262313843, "learning_rate": 6.246495468127249e-06, "loss": 0.713, "step": 12157 }, { "epoch": 2.1866402948844734, "grad_norm": 1.0689350366592407, "learning_rate": 6.2459314252445e-06, "loss": 0.8929, "step": 12158 }, { "epoch": 2.1868201024903353, "grad_norm": 1.542122721672058, "learning_rate": 6.2453673654560955e-06, "loss": 0.7141, "step": 12159 }, { "epoch": 2.186999910096197, "grad_norm": 1.5543006658554077, "learning_rate": 6.2448032887696895e-06, "loss": 0.7165, "step": 12160 }, { "epoch": 2.1871797177020587, "grad_norm": 1.5294833183288574, "learning_rate": 6.2442391951929374e-06, "loss": 0.6567, "step": 12161 }, { "epoch": 2.1873595253079205, "grad_norm": 0.9480369687080383, "learning_rate": 6.243675084733492e-06, "loss": 0.9141, "step": 12162 }, { "epoch": 2.1875393329137824, "grad_norm": 1.501763939857483, "learning_rate": 6.243110957399008e-06, "loss": 0.6285, "step": 12163 }, { "epoch": 2.187719140519644, "grad_norm": 1.5855475664138794, "learning_rate": 6.242546813197139e-06, "loss": 0.6176, "step": 12164 }, { "epoch": 2.187898948125506, "grad_norm": 1.5929633378982544, "learning_rate": 6.2419826521355395e-06, "loss": 0.6748, "step": 12165 }, { "epoch": 2.1880787557313672, "grad_norm": 1.0262166261672974, "learning_rate": 6.241418474221865e-06, "loss": 0.8816, "step": 12166 }, { "epoch": 2.188258563337229, "grad_norm": 0.9460545778274536, "learning_rate": 6.240854279463771e-06, "loss": 0.8915, "step": 12167 }, { "epoch": 2.188438370943091, "grad_norm": 1.03887939453125, "learning_rate": 6.240290067868913e-06, "loss": 0.8809, "step": 12168 }, { "epoch": 2.1886181785489525, "grad_norm": 1.4538824558258057, "learning_rate": 6.239725839444946e-06, "loss": 0.6098, "step": 12169 }, { "epoch": 2.1887979861548144, "grad_norm": 1.83066725730896, "learning_rate": 6.239161594199528e-06, "loss": 0.6418, "step": 12170 }, { "epoch": 2.1889777937606762, "grad_norm": 1.5228806734085083, "learning_rate": 6.23859733214031e-06, "loss": 0.6666, "step": 12171 }, { "epoch": 2.1891576013665377, "grad_norm": 1.5999895334243774, "learning_rate": 6.238033053274953e-06, "loss": 0.6808, "step": 12172 }, { "epoch": 2.1893374089723996, "grad_norm": 1.0747640132904053, "learning_rate": 6.237468757611111e-06, "loss": 0.9038, "step": 12173 }, { "epoch": 2.1895172165782615, "grad_norm": 1.1043622493743896, "learning_rate": 6.236904445156442e-06, "loss": 0.8574, "step": 12174 }, { "epoch": 2.189697024184123, "grad_norm": 1.6097629070281982, "learning_rate": 6.236340115918602e-06, "loss": 0.6377, "step": 12175 }, { "epoch": 2.189876831789985, "grad_norm": 1.5090893507003784, "learning_rate": 6.235775769905251e-06, "loss": 0.6625, "step": 12176 }, { "epoch": 2.1900566393958463, "grad_norm": 1.4454474449157715, "learning_rate": 6.2352114071240425e-06, "loss": 0.6301, "step": 12177 }, { "epoch": 2.190236447001708, "grad_norm": 1.4593122005462646, "learning_rate": 6.2346470275826376e-06, "loss": 0.6107, "step": 12178 }, { "epoch": 2.19041625460757, "grad_norm": 1.669030785560608, "learning_rate": 6.23408263128869e-06, "loss": 0.6601, "step": 12179 }, { "epoch": 2.1905960622134315, "grad_norm": 1.5088330507278442, "learning_rate": 6.233518218249863e-06, "loss": 0.6876, "step": 12180 }, { "epoch": 2.1907758698192934, "grad_norm": 1.5663880109786987, "learning_rate": 6.2329537884738115e-06, "loss": 0.6909, "step": 12181 }, { "epoch": 2.1909556774251553, "grad_norm": 1.4716607332229614, "learning_rate": 6.232389341968193e-06, "loss": 0.6484, "step": 12182 }, { "epoch": 2.1911354850310167, "grad_norm": 1.4817341566085815, "learning_rate": 6.23182487874067e-06, "loss": 0.6997, "step": 12183 }, { "epoch": 2.1913152926368786, "grad_norm": 1.6011416912078857, "learning_rate": 6.2312603987989e-06, "loss": 0.7637, "step": 12184 }, { "epoch": 2.19149510024274, "grad_norm": 1.5687958002090454, "learning_rate": 6.230695902150541e-06, "loss": 0.7077, "step": 12185 }, { "epoch": 2.191674907848602, "grad_norm": 1.1679341793060303, "learning_rate": 6.230131388803255e-06, "loss": 0.8313, "step": 12186 }, { "epoch": 2.191854715454464, "grad_norm": 1.473321557044983, "learning_rate": 6.229566858764698e-06, "loss": 0.6311, "step": 12187 }, { "epoch": 2.1920345230603253, "grad_norm": 1.5702449083328247, "learning_rate": 6.229002312042534e-06, "loss": 0.6979, "step": 12188 }, { "epoch": 2.192214330666187, "grad_norm": 1.5129365921020508, "learning_rate": 6.228437748644421e-06, "loss": 0.6429, "step": 12189 }, { "epoch": 2.192394138272049, "grad_norm": 1.6830637454986572, "learning_rate": 6.227873168578018e-06, "loss": 0.7254, "step": 12190 }, { "epoch": 2.1925739458779105, "grad_norm": 1.7367966175079346, "learning_rate": 6.227308571850988e-06, "loss": 0.7163, "step": 12191 }, { "epoch": 2.1927537534837724, "grad_norm": 1.5699567794799805, "learning_rate": 6.226743958470991e-06, "loss": 0.6532, "step": 12192 }, { "epoch": 2.192933561089634, "grad_norm": 1.5896438360214233, "learning_rate": 6.2261793284456894e-06, "loss": 0.7384, "step": 12193 }, { "epoch": 2.193113368695496, "grad_norm": 1.5420787334442139, "learning_rate": 6.225614681782743e-06, "loss": 0.664, "step": 12194 }, { "epoch": 2.1932931763013577, "grad_norm": 1.4721544981002808, "learning_rate": 6.225050018489811e-06, "loss": 0.6745, "step": 12195 }, { "epoch": 2.193472983907219, "grad_norm": 1.227504849433899, "learning_rate": 6.2244853385745605e-06, "loss": 0.866, "step": 12196 }, { "epoch": 2.193652791513081, "grad_norm": 1.6409146785736084, "learning_rate": 6.22392064204465e-06, "loss": 0.7639, "step": 12197 }, { "epoch": 2.193832599118943, "grad_norm": 1.5363080501556396, "learning_rate": 6.223355928907741e-06, "loss": 0.7295, "step": 12198 }, { "epoch": 2.1940124067248044, "grad_norm": 1.5165640115737915, "learning_rate": 6.222791199171499e-06, "loss": 0.6501, "step": 12199 }, { "epoch": 2.1941922143306662, "grad_norm": 1.5500487089157104, "learning_rate": 6.222226452843585e-06, "loss": 0.6711, "step": 12200 }, { "epoch": 2.1943720219365277, "grad_norm": 1.1553843021392822, "learning_rate": 6.2216616899316595e-06, "loss": 0.8743, "step": 12201 }, { "epoch": 2.1945518295423896, "grad_norm": 1.5923449993133545, "learning_rate": 6.221096910443391e-06, "loss": 0.6505, "step": 12202 }, { "epoch": 2.1947316371482515, "grad_norm": 1.3993490934371948, "learning_rate": 6.220532114386437e-06, "loss": 0.6768, "step": 12203 }, { "epoch": 2.194911444754113, "grad_norm": 1.5357739925384521, "learning_rate": 6.2199673017684635e-06, "loss": 0.7182, "step": 12204 }, { "epoch": 2.195091252359975, "grad_norm": 1.0682283639907837, "learning_rate": 6.219402472597136e-06, "loss": 0.9012, "step": 12205 }, { "epoch": 2.1952710599658367, "grad_norm": 1.604288935661316, "learning_rate": 6.218837626880118e-06, "loss": 0.6383, "step": 12206 }, { "epoch": 2.195450867571698, "grad_norm": 1.5416784286499023, "learning_rate": 6.21827276462507e-06, "loss": 0.7226, "step": 12207 }, { "epoch": 2.19563067517756, "grad_norm": 1.0168401002883911, "learning_rate": 6.217707885839661e-06, "loss": 0.8643, "step": 12208 }, { "epoch": 2.195810482783422, "grad_norm": 1.549316167831421, "learning_rate": 6.217142990531553e-06, "loss": 0.6976, "step": 12209 }, { "epoch": 2.1959902903892834, "grad_norm": 1.6451934576034546, "learning_rate": 6.216578078708413e-06, "loss": 0.6348, "step": 12210 }, { "epoch": 2.1961700979951453, "grad_norm": 1.5535954236984253, "learning_rate": 6.216013150377902e-06, "loss": 0.7022, "step": 12211 }, { "epoch": 2.1963499056010067, "grad_norm": 1.5115810632705688, "learning_rate": 6.215448205547691e-06, "loss": 0.6603, "step": 12212 }, { "epoch": 2.1965297132068686, "grad_norm": 1.5757197141647339, "learning_rate": 6.214883244225441e-06, "loss": 0.6179, "step": 12213 }, { "epoch": 2.1967095208127305, "grad_norm": 1.486688494682312, "learning_rate": 6.21431826641882e-06, "loss": 0.6583, "step": 12214 }, { "epoch": 2.196889328418592, "grad_norm": 1.5289970636367798, "learning_rate": 6.213753272135492e-06, "loss": 0.6413, "step": 12215 }, { "epoch": 2.197069136024454, "grad_norm": 1.4864140748977661, "learning_rate": 6.213188261383127e-06, "loss": 0.6994, "step": 12216 }, { "epoch": 2.1972489436303158, "grad_norm": 1.5971652269363403, "learning_rate": 6.212623234169388e-06, "loss": 0.7228, "step": 12217 }, { "epoch": 2.197428751236177, "grad_norm": 1.6215033531188965, "learning_rate": 6.212058190501943e-06, "loss": 0.6422, "step": 12218 }, { "epoch": 2.197608558842039, "grad_norm": 1.5606738328933716, "learning_rate": 6.2114931303884595e-06, "loss": 0.7116, "step": 12219 }, { "epoch": 2.1977883664479005, "grad_norm": 1.164896011352539, "learning_rate": 6.210928053836603e-06, "loss": 0.8506, "step": 12220 }, { "epoch": 2.1979681740537624, "grad_norm": 1.0557371377944946, "learning_rate": 6.210362960854043e-06, "loss": 0.8804, "step": 12221 }, { "epoch": 2.1981479816596243, "grad_norm": 1.7465085983276367, "learning_rate": 6.209797851448444e-06, "loss": 0.6638, "step": 12222 }, { "epoch": 2.1983277892654858, "grad_norm": 1.6708402633666992, "learning_rate": 6.209232725627477e-06, "loss": 0.6897, "step": 12223 }, { "epoch": 2.1985075968713477, "grad_norm": 1.7047804594039917, "learning_rate": 6.208667583398808e-06, "loss": 0.6922, "step": 12224 }, { "epoch": 2.1986874044772096, "grad_norm": 1.4103918075561523, "learning_rate": 6.208102424770106e-06, "loss": 0.6747, "step": 12225 }, { "epoch": 2.198867212083071, "grad_norm": 1.4819477796554565, "learning_rate": 6.207537249749038e-06, "loss": 0.6267, "step": 12226 }, { "epoch": 2.199047019688933, "grad_norm": 1.6180694103240967, "learning_rate": 6.206972058343275e-06, "loss": 0.6616, "step": 12227 }, { "epoch": 2.199226827294795, "grad_norm": 1.7121294736862183, "learning_rate": 6.206406850560485e-06, "loss": 0.6733, "step": 12228 }, { "epoch": 2.1994066349006562, "grad_norm": 1.5748804807662964, "learning_rate": 6.205841626408337e-06, "loss": 0.7142, "step": 12229 }, { "epoch": 2.199586442506518, "grad_norm": 1.2114382982254028, "learning_rate": 6.2052763858945e-06, "loss": 0.8756, "step": 12230 }, { "epoch": 2.1997662501123796, "grad_norm": 1.6083124876022339, "learning_rate": 6.2047111290266435e-06, "loss": 0.6749, "step": 12231 }, { "epoch": 2.1999460577182415, "grad_norm": 1.274459719657898, "learning_rate": 6.204145855812439e-06, "loss": 0.8771, "step": 12232 }, { "epoch": 2.2001258653241034, "grad_norm": 2.033358573913574, "learning_rate": 6.203580566259555e-06, "loss": 0.6468, "step": 12233 }, { "epoch": 2.200305672929965, "grad_norm": 1.545122504234314, "learning_rate": 6.203015260375661e-06, "loss": 0.7121, "step": 12234 }, { "epoch": 2.2004854805358267, "grad_norm": 1.478517770767212, "learning_rate": 6.20244993816843e-06, "loss": 0.6347, "step": 12235 }, { "epoch": 2.200665288141688, "grad_norm": 1.4950636625289917, "learning_rate": 6.201884599645529e-06, "loss": 0.6365, "step": 12236 }, { "epoch": 2.20084509574755, "grad_norm": 1.533003568649292, "learning_rate": 6.201319244814632e-06, "loss": 0.6843, "step": 12237 }, { "epoch": 2.201024903353412, "grad_norm": 1.4713835716247559, "learning_rate": 6.20075387368341e-06, "loss": 0.739, "step": 12238 }, { "epoch": 2.2012047109592734, "grad_norm": 1.0874712467193604, "learning_rate": 6.200188486259533e-06, "loss": 0.9708, "step": 12239 }, { "epoch": 2.2013845185651353, "grad_norm": 1.5844242572784424, "learning_rate": 6.199623082550672e-06, "loss": 0.6674, "step": 12240 }, { "epoch": 2.201564326170997, "grad_norm": 1.464012861251831, "learning_rate": 6.199057662564501e-06, "loss": 0.7171, "step": 12241 }, { "epoch": 2.2017441337768586, "grad_norm": 1.4365170001983643, "learning_rate": 6.198492226308691e-06, "loss": 0.6589, "step": 12242 }, { "epoch": 2.2019239413827205, "grad_norm": 1.6267050504684448, "learning_rate": 6.1979267737909145e-06, "loss": 0.6899, "step": 12243 }, { "epoch": 2.2021037489885824, "grad_norm": 1.5484564304351807, "learning_rate": 6.197361305018842e-06, "loss": 0.6682, "step": 12244 }, { "epoch": 2.202283556594444, "grad_norm": 1.5502127408981323, "learning_rate": 6.1967958200001484e-06, "loss": 0.6854, "step": 12245 }, { "epoch": 2.2024633642003058, "grad_norm": 1.524660587310791, "learning_rate": 6.196230318742506e-06, "loss": 0.7178, "step": 12246 }, { "epoch": 2.202643171806167, "grad_norm": 1.6737412214279175, "learning_rate": 6.1956648012535885e-06, "loss": 0.7031, "step": 12247 }, { "epoch": 2.202822979412029, "grad_norm": 1.5327941179275513, "learning_rate": 6.195099267541067e-06, "loss": 0.6761, "step": 12248 }, { "epoch": 2.203002787017891, "grad_norm": 1.2048348188400269, "learning_rate": 6.1945337176126165e-06, "loss": 0.8717, "step": 12249 }, { "epoch": 2.2031825946237524, "grad_norm": 1.5950039625167847, "learning_rate": 6.193968151475911e-06, "loss": 0.7081, "step": 12250 }, { "epoch": 2.2033624022296143, "grad_norm": 1.6167939901351929, "learning_rate": 6.193402569138626e-06, "loss": 0.7125, "step": 12251 }, { "epoch": 2.203542209835476, "grad_norm": 1.5839132070541382, "learning_rate": 6.1928369706084325e-06, "loss": 0.6856, "step": 12252 }, { "epoch": 2.2037220174413377, "grad_norm": 1.6258008480072021, "learning_rate": 6.192271355893007e-06, "loss": 0.7143, "step": 12253 }, { "epoch": 2.2039018250471996, "grad_norm": 1.5841519832611084, "learning_rate": 6.1917057250000236e-06, "loss": 0.6872, "step": 12254 }, { "epoch": 2.204081632653061, "grad_norm": 1.604276418685913, "learning_rate": 6.191140077937158e-06, "loss": 0.7136, "step": 12255 }, { "epoch": 2.204261440258923, "grad_norm": 1.5478609800338745, "learning_rate": 6.190574414712083e-06, "loss": 0.6587, "step": 12256 }, { "epoch": 2.204441247864785, "grad_norm": 1.5218145847320557, "learning_rate": 6.190008735332477e-06, "loss": 0.6926, "step": 12257 }, { "epoch": 2.2046210554706462, "grad_norm": 1.0681891441345215, "learning_rate": 6.1894430398060115e-06, "loss": 0.8747, "step": 12258 }, { "epoch": 2.204800863076508, "grad_norm": 1.5135077238082886, "learning_rate": 6.188877328140366e-06, "loss": 0.6568, "step": 12259 }, { "epoch": 2.20498067068237, "grad_norm": 1.579724907875061, "learning_rate": 6.1883116003432155e-06, "loss": 0.6935, "step": 12260 }, { "epoch": 2.2051604782882315, "grad_norm": 0.9590626358985901, "learning_rate": 6.187745856422236e-06, "loss": 0.8674, "step": 12261 }, { "epoch": 2.2053402858940934, "grad_norm": 1.4675397872924805, "learning_rate": 6.187180096385102e-06, "loss": 0.6139, "step": 12262 }, { "epoch": 2.2055200934999553, "grad_norm": 1.6085432767868042, "learning_rate": 6.186614320239493e-06, "loss": 0.683, "step": 12263 }, { "epoch": 2.2056999011058167, "grad_norm": 0.9456951022148132, "learning_rate": 6.186048527993085e-06, "loss": 0.8718, "step": 12264 }, { "epoch": 2.2058797087116786, "grad_norm": 1.5224148035049438, "learning_rate": 6.185482719653555e-06, "loss": 0.7239, "step": 12265 }, { "epoch": 2.20605951631754, "grad_norm": 1.6977581977844238, "learning_rate": 6.1849168952285785e-06, "loss": 0.643, "step": 12266 }, { "epoch": 2.206239323923402, "grad_norm": 1.5616182088851929, "learning_rate": 6.184351054725837e-06, "loss": 0.7172, "step": 12267 }, { "epoch": 2.206419131529264, "grad_norm": 1.5917365550994873, "learning_rate": 6.183785198153004e-06, "loss": 0.6788, "step": 12268 }, { "epoch": 2.2065989391351253, "grad_norm": 1.5135506391525269, "learning_rate": 6.183219325517758e-06, "loss": 0.6944, "step": 12269 }, { "epoch": 2.206778746740987, "grad_norm": 1.6777468919754028, "learning_rate": 6.182653436827781e-06, "loss": 0.6856, "step": 12270 }, { "epoch": 2.206958554346849, "grad_norm": 1.4929533004760742, "learning_rate": 6.182087532090747e-06, "loss": 0.6882, "step": 12271 }, { "epoch": 2.2071383619527105, "grad_norm": 1.524556279182434, "learning_rate": 6.181521611314336e-06, "loss": 0.5956, "step": 12272 }, { "epoch": 2.2073181695585724, "grad_norm": 1.0563353300094604, "learning_rate": 6.180955674506228e-06, "loss": 0.898, "step": 12273 }, { "epoch": 2.207497977164434, "grad_norm": 1.4990805387496948, "learning_rate": 6.180389721674101e-06, "loss": 0.6528, "step": 12274 }, { "epoch": 2.2076777847702957, "grad_norm": 1.5034973621368408, "learning_rate": 6.179823752825635e-06, "loss": 0.6889, "step": 12275 }, { "epoch": 2.2078575923761576, "grad_norm": 1.5255922079086304, "learning_rate": 6.179257767968506e-06, "loss": 0.6593, "step": 12276 }, { "epoch": 2.208037399982019, "grad_norm": 1.6027003526687622, "learning_rate": 6.1786917671104e-06, "loss": 0.6701, "step": 12277 }, { "epoch": 2.208217207587881, "grad_norm": 1.5297930240631104, "learning_rate": 6.178125750258991e-06, "loss": 0.6767, "step": 12278 }, { "epoch": 2.208397015193743, "grad_norm": 1.4308433532714844, "learning_rate": 6.1775597174219616e-06, "loss": 0.5912, "step": 12279 }, { "epoch": 2.2085768227996043, "grad_norm": 1.5295546054840088, "learning_rate": 6.176993668606992e-06, "loss": 0.7312, "step": 12280 }, { "epoch": 2.208756630405466, "grad_norm": 1.1665788888931274, "learning_rate": 6.176427603821763e-06, "loss": 0.827, "step": 12281 }, { "epoch": 2.208936438011328, "grad_norm": 1.0876739025115967, "learning_rate": 6.175861523073955e-06, "loss": 0.8862, "step": 12282 }, { "epoch": 2.2091162456171896, "grad_norm": 1.5374438762664795, "learning_rate": 6.17529542637125e-06, "loss": 0.69, "step": 12283 }, { "epoch": 2.2092960532230514, "grad_norm": 1.377756118774414, "learning_rate": 6.174729313721326e-06, "loss": 0.6343, "step": 12284 }, { "epoch": 2.209475860828913, "grad_norm": 1.586971640586853, "learning_rate": 6.1741631851318685e-06, "loss": 0.6788, "step": 12285 }, { "epoch": 2.209655668434775, "grad_norm": 1.0804246664047241, "learning_rate": 6.1735970406105565e-06, "loss": 0.8634, "step": 12286 }, { "epoch": 2.2098354760406367, "grad_norm": 1.5817725658416748, "learning_rate": 6.1730308801650726e-06, "loss": 0.6651, "step": 12287 }, { "epoch": 2.210015283646498, "grad_norm": 1.4698586463928223, "learning_rate": 6.172464703803099e-06, "loss": 0.7271, "step": 12288 }, { "epoch": 2.21019509125236, "grad_norm": 1.1245739459991455, "learning_rate": 6.171898511532318e-06, "loss": 0.8771, "step": 12289 }, { "epoch": 2.2103748988582215, "grad_norm": 1.4264901876449585, "learning_rate": 6.171332303360411e-06, "loss": 0.6946, "step": 12290 }, { "epoch": 2.2105547064640834, "grad_norm": 1.6747348308563232, "learning_rate": 6.170766079295063e-06, "loss": 0.7388, "step": 12291 }, { "epoch": 2.2107345140699453, "grad_norm": 1.4671732187271118, "learning_rate": 6.170199839343954e-06, "loss": 0.7052, "step": 12292 }, { "epoch": 2.2109143216758067, "grad_norm": 1.5563931465148926, "learning_rate": 6.1696335835147704e-06, "loss": 0.6904, "step": 12293 }, { "epoch": 2.2110941292816686, "grad_norm": 1.5805493593215942, "learning_rate": 6.169067311815193e-06, "loss": 0.7035, "step": 12294 }, { "epoch": 2.2112739368875305, "grad_norm": 1.691291332244873, "learning_rate": 6.168501024252905e-06, "loss": 0.7086, "step": 12295 }, { "epoch": 2.211453744493392, "grad_norm": 1.6471022367477417, "learning_rate": 6.1679347208355925e-06, "loss": 0.6792, "step": 12296 }, { "epoch": 2.211633552099254, "grad_norm": 1.426890254020691, "learning_rate": 6.167368401570939e-06, "loss": 0.6936, "step": 12297 }, { "epoch": 2.2118133597051157, "grad_norm": 1.5273094177246094, "learning_rate": 6.166802066466626e-06, "loss": 0.6719, "step": 12298 }, { "epoch": 2.211993167310977, "grad_norm": 1.6622432470321655, "learning_rate": 6.166235715530342e-06, "loss": 0.6925, "step": 12299 }, { "epoch": 2.212172974916839, "grad_norm": 1.456339955329895, "learning_rate": 6.165669348769769e-06, "loss": 0.682, "step": 12300 }, { "epoch": 2.2123527825227005, "grad_norm": 1.4680943489074707, "learning_rate": 6.165102966192592e-06, "loss": 0.6869, "step": 12301 }, { "epoch": 2.2125325901285624, "grad_norm": 1.5720523595809937, "learning_rate": 6.164536567806496e-06, "loss": 0.6581, "step": 12302 }, { "epoch": 2.2127123977344243, "grad_norm": 1.5613266229629517, "learning_rate": 6.163970153619168e-06, "loss": 0.6988, "step": 12303 }, { "epoch": 2.2128922053402857, "grad_norm": 1.5553815364837646, "learning_rate": 6.16340372363829e-06, "loss": 0.6654, "step": 12304 }, { "epoch": 2.2130720129461476, "grad_norm": 1.7066140174865723, "learning_rate": 6.162837277871553e-06, "loss": 0.7013, "step": 12305 }, { "epoch": 2.2132518205520095, "grad_norm": 1.5181963443756104, "learning_rate": 6.162270816326639e-06, "loss": 0.6472, "step": 12306 }, { "epoch": 2.213431628157871, "grad_norm": 1.496889591217041, "learning_rate": 6.1617043390112355e-06, "loss": 0.7014, "step": 12307 }, { "epoch": 2.213611435763733, "grad_norm": 1.5336753129959106, "learning_rate": 6.161137845933026e-06, "loss": 0.7031, "step": 12308 }, { "epoch": 2.2137912433695943, "grad_norm": 1.4569936990737915, "learning_rate": 6.160571337099702e-06, "loss": 0.6336, "step": 12309 }, { "epoch": 2.213971050975456, "grad_norm": 1.4239767789840698, "learning_rate": 6.160004812518947e-06, "loss": 0.6196, "step": 12310 }, { "epoch": 2.214150858581318, "grad_norm": 1.1444408893585205, "learning_rate": 6.159438272198449e-06, "loss": 0.8527, "step": 12311 }, { "epoch": 2.2143306661871796, "grad_norm": 1.4566882848739624, "learning_rate": 6.158871716145895e-06, "loss": 0.6926, "step": 12312 }, { "epoch": 2.2145104737930414, "grad_norm": 1.4638512134552002, "learning_rate": 6.158305144368973e-06, "loss": 0.6384, "step": 12313 }, { "epoch": 2.2146902813989033, "grad_norm": 1.480883240699768, "learning_rate": 6.157738556875368e-06, "loss": 0.6497, "step": 12314 }, { "epoch": 2.214870089004765, "grad_norm": 1.5361801385879517, "learning_rate": 6.1571719536727715e-06, "loss": 0.6259, "step": 12315 }, { "epoch": 2.2150498966106267, "grad_norm": 1.5830541849136353, "learning_rate": 6.156605334768869e-06, "loss": 0.721, "step": 12316 }, { "epoch": 2.2152297042164886, "grad_norm": 1.5769726037979126, "learning_rate": 6.156038700171351e-06, "loss": 0.7393, "step": 12317 }, { "epoch": 2.21540951182235, "grad_norm": 1.6091487407684326, "learning_rate": 6.155472049887904e-06, "loss": 0.6953, "step": 12318 }, { "epoch": 2.215589319428212, "grad_norm": 1.7098478078842163, "learning_rate": 6.154905383926218e-06, "loss": 0.7245, "step": 12319 }, { "epoch": 2.2157691270340734, "grad_norm": 1.5205484628677368, "learning_rate": 6.15433870229398e-06, "loss": 0.6948, "step": 12320 }, { "epoch": 2.2159489346399353, "grad_norm": 1.487703561782837, "learning_rate": 6.153772004998882e-06, "loss": 0.7135, "step": 12321 }, { "epoch": 2.216128742245797, "grad_norm": 1.6523293256759644, "learning_rate": 6.15320529204861e-06, "loss": 0.6656, "step": 12322 }, { "epoch": 2.2163085498516586, "grad_norm": 1.5707335472106934, "learning_rate": 6.152638563450858e-06, "loss": 0.6779, "step": 12323 }, { "epoch": 2.2164883574575205, "grad_norm": 1.4796289205551147, "learning_rate": 6.152071819213311e-06, "loss": 0.6958, "step": 12324 }, { "epoch": 2.2166681650633824, "grad_norm": 1.6990514993667603, "learning_rate": 6.151505059343661e-06, "loss": 0.674, "step": 12325 }, { "epoch": 2.216847972669244, "grad_norm": 1.5469239950180054, "learning_rate": 6.150938283849599e-06, "loss": 0.6802, "step": 12326 }, { "epoch": 2.2170277802751057, "grad_norm": 1.712198257446289, "learning_rate": 6.150371492738815e-06, "loss": 0.6511, "step": 12327 }, { "epoch": 2.217207587880967, "grad_norm": 1.6337330341339111, "learning_rate": 6.149804686018998e-06, "loss": 0.6819, "step": 12328 }, { "epoch": 2.217387395486829, "grad_norm": 1.5821934938430786, "learning_rate": 6.149237863697843e-06, "loss": 0.7081, "step": 12329 }, { "epoch": 2.217567203092691, "grad_norm": 1.4648452997207642, "learning_rate": 6.148671025783035e-06, "loss": 0.6456, "step": 12330 }, { "epoch": 2.2177470106985524, "grad_norm": 1.493699550628662, "learning_rate": 6.1481041722822694e-06, "loss": 0.6128, "step": 12331 }, { "epoch": 2.2179268183044143, "grad_norm": 1.1266952753067017, "learning_rate": 6.147537303203237e-06, "loss": 0.8751, "step": 12332 }, { "epoch": 2.218106625910276, "grad_norm": 1.5174776315689087, "learning_rate": 6.146970418553629e-06, "loss": 0.7058, "step": 12333 }, { "epoch": 2.2182864335161376, "grad_norm": 1.5083602666854858, "learning_rate": 6.146403518341138e-06, "loss": 0.689, "step": 12334 }, { "epoch": 2.2184662411219995, "grad_norm": 1.6057708263397217, "learning_rate": 6.145836602573454e-06, "loss": 0.7601, "step": 12335 }, { "epoch": 2.2186460487278614, "grad_norm": 1.607863426208496, "learning_rate": 6.1452696712582706e-06, "loss": 0.69, "step": 12336 }, { "epoch": 2.218825856333723, "grad_norm": 1.5806888341903687, "learning_rate": 6.144702724403282e-06, "loss": 0.6931, "step": 12337 }, { "epoch": 2.2190056639395848, "grad_norm": 1.7217518091201782, "learning_rate": 6.144135762016179e-06, "loss": 0.7344, "step": 12338 }, { "epoch": 2.219185471545446, "grad_norm": 1.4683269262313843, "learning_rate": 6.143568784104655e-06, "loss": 0.6056, "step": 12339 }, { "epoch": 2.219365279151308, "grad_norm": 1.539719581604004, "learning_rate": 6.143001790676403e-06, "loss": 0.6737, "step": 12340 }, { "epoch": 2.21954508675717, "grad_norm": 1.1019808053970337, "learning_rate": 6.142434781739116e-06, "loss": 0.8736, "step": 12341 }, { "epoch": 2.2197248943630314, "grad_norm": 1.8437104225158691, "learning_rate": 6.1418677573004894e-06, "loss": 0.6434, "step": 12342 }, { "epoch": 2.2199047019688933, "grad_norm": 1.4926129579544067, "learning_rate": 6.141300717368214e-06, "loss": 0.6922, "step": 12343 }, { "epoch": 2.220084509574755, "grad_norm": 1.4230445623397827, "learning_rate": 6.140733661949987e-06, "loss": 0.6929, "step": 12344 }, { "epoch": 2.2202643171806167, "grad_norm": 1.6048065423965454, "learning_rate": 6.140166591053499e-06, "loss": 0.7242, "step": 12345 }, { "epoch": 2.2204441247864786, "grad_norm": 1.6183619499206543, "learning_rate": 6.139599504686448e-06, "loss": 0.7294, "step": 12346 }, { "epoch": 2.22062393239234, "grad_norm": 1.5911998748779297, "learning_rate": 6.139032402856527e-06, "loss": 0.6398, "step": 12347 }, { "epoch": 2.220803739998202, "grad_norm": 1.4351787567138672, "learning_rate": 6.1384652855714295e-06, "loss": 0.6556, "step": 12348 }, { "epoch": 2.220983547604064, "grad_norm": 1.5001758337020874, "learning_rate": 6.1378981528388525e-06, "loss": 0.6347, "step": 12349 }, { "epoch": 2.2211633552099252, "grad_norm": 1.7291107177734375, "learning_rate": 6.137331004666493e-06, "loss": 0.6546, "step": 12350 }, { "epoch": 2.221343162815787, "grad_norm": 1.455230474472046, "learning_rate": 6.136763841062041e-06, "loss": 0.6248, "step": 12351 }, { "epoch": 2.221522970421649, "grad_norm": 1.543638825416565, "learning_rate": 6.136196662033197e-06, "loss": 0.6646, "step": 12352 }, { "epoch": 2.2217027780275105, "grad_norm": 1.592919111251831, "learning_rate": 6.135629467587654e-06, "loss": 0.7147, "step": 12353 }, { "epoch": 2.2218825856333724, "grad_norm": 1.5905365943908691, "learning_rate": 6.13506225773311e-06, "loss": 0.6919, "step": 12354 }, { "epoch": 2.222062393239234, "grad_norm": 1.5626530647277832, "learning_rate": 6.13449503247726e-06, "loss": 0.6768, "step": 12355 }, { "epoch": 2.2222422008450957, "grad_norm": 1.5110785961151123, "learning_rate": 6.1339277918278014e-06, "loss": 0.6922, "step": 12356 }, { "epoch": 2.2224220084509576, "grad_norm": 1.5845905542373657, "learning_rate": 6.133360535792431e-06, "loss": 0.6938, "step": 12357 }, { "epoch": 2.222601816056819, "grad_norm": 1.060071349143982, "learning_rate": 6.132793264378843e-06, "loss": 0.8627, "step": 12358 }, { "epoch": 2.222781623662681, "grad_norm": 1.5138882398605347, "learning_rate": 6.132225977594739e-06, "loss": 0.637, "step": 12359 }, { "epoch": 2.222961431268543, "grad_norm": 1.5770665407180786, "learning_rate": 6.131658675447814e-06, "loss": 0.6847, "step": 12360 }, { "epoch": 2.2231412388744043, "grad_norm": 1.1255519390106201, "learning_rate": 6.131091357945765e-06, "loss": 0.8543, "step": 12361 }, { "epoch": 2.223321046480266, "grad_norm": 1.5015686750411987, "learning_rate": 6.130524025096292e-06, "loss": 0.6631, "step": 12362 }, { "epoch": 2.2235008540861276, "grad_norm": 1.5751112699508667, "learning_rate": 6.129956676907088e-06, "loss": 0.6938, "step": 12363 }, { "epoch": 2.2236806616919895, "grad_norm": 1.5796818733215332, "learning_rate": 6.129389313385858e-06, "loss": 0.6591, "step": 12364 }, { "epoch": 2.2238604692978514, "grad_norm": 1.5469226837158203, "learning_rate": 6.128821934540296e-06, "loss": 0.6856, "step": 12365 }, { "epoch": 2.224040276903713, "grad_norm": 1.5473278760910034, "learning_rate": 6.128254540378101e-06, "loss": 0.6813, "step": 12366 }, { "epoch": 2.2242200845095748, "grad_norm": 1.5569759607315063, "learning_rate": 6.127687130906972e-06, "loss": 0.7408, "step": 12367 }, { "epoch": 2.2243998921154366, "grad_norm": 1.1540043354034424, "learning_rate": 6.127119706134607e-06, "loss": 0.835, "step": 12368 }, { "epoch": 2.224579699721298, "grad_norm": 1.5355695486068726, "learning_rate": 6.126552266068708e-06, "loss": 0.5824, "step": 12369 }, { "epoch": 2.22475950732716, "grad_norm": 1.709369421005249, "learning_rate": 6.125984810716974e-06, "loss": 0.674, "step": 12370 }, { "epoch": 2.224939314933022, "grad_norm": 1.6306649446487427, "learning_rate": 6.125417340087103e-06, "loss": 0.6652, "step": 12371 }, { "epoch": 2.2251191225388833, "grad_norm": 1.5515698194503784, "learning_rate": 6.124849854186795e-06, "loss": 0.7095, "step": 12372 }, { "epoch": 2.2252989301447452, "grad_norm": 1.6596462726593018, "learning_rate": 6.124282353023751e-06, "loss": 0.6709, "step": 12373 }, { "epoch": 2.2254787377506067, "grad_norm": 1.6704102754592896, "learning_rate": 6.123714836605671e-06, "loss": 0.7459, "step": 12374 }, { "epoch": 2.2256585453564686, "grad_norm": 1.623213768005371, "learning_rate": 6.1231473049402535e-06, "loss": 0.6984, "step": 12375 }, { "epoch": 2.2258383529623305, "grad_norm": 1.4714860916137695, "learning_rate": 6.122579758035202e-06, "loss": 0.6181, "step": 12376 }, { "epoch": 2.226018160568192, "grad_norm": 3.24125075340271, "learning_rate": 6.122012195898216e-06, "loss": 0.6579, "step": 12377 }, { "epoch": 2.226197968174054, "grad_norm": 1.2639394998550415, "learning_rate": 6.121444618536997e-06, "loss": 0.8995, "step": 12378 }, { "epoch": 2.2263777757799157, "grad_norm": 1.5454961061477661, "learning_rate": 6.120877025959245e-06, "loss": 0.6744, "step": 12379 }, { "epoch": 2.226557583385777, "grad_norm": 1.111356258392334, "learning_rate": 6.120309418172663e-06, "loss": 0.8407, "step": 12380 }, { "epoch": 2.226737390991639, "grad_norm": 1.5633952617645264, "learning_rate": 6.1197417951849515e-06, "loss": 0.7111, "step": 12381 }, { "epoch": 2.2269171985975005, "grad_norm": 1.5982098579406738, "learning_rate": 6.119174157003814e-06, "loss": 0.7056, "step": 12382 }, { "epoch": 2.2270970062033624, "grad_norm": 1.0513380765914917, "learning_rate": 6.1186065036369516e-06, "loss": 0.8595, "step": 12383 }, { "epoch": 2.2272768138092243, "grad_norm": 1.6006009578704834, "learning_rate": 6.1180388350920675e-06, "loss": 0.6388, "step": 12384 }, { "epoch": 2.2274566214150857, "grad_norm": 0.9359574913978577, "learning_rate": 6.117471151376861e-06, "loss": 0.8807, "step": 12385 }, { "epoch": 2.2276364290209476, "grad_norm": 1.6382486820220947, "learning_rate": 6.11690345249904e-06, "loss": 0.6968, "step": 12386 }, { "epoch": 2.2278162366268095, "grad_norm": 1.1626046895980835, "learning_rate": 6.1163357384663035e-06, "loss": 0.8818, "step": 12387 }, { "epoch": 2.227996044232671, "grad_norm": 0.9297778010368347, "learning_rate": 6.115768009286356e-06, "loss": 0.8749, "step": 12388 }, { "epoch": 2.228175851838533, "grad_norm": 1.520356297492981, "learning_rate": 6.1152002649669e-06, "loss": 0.7005, "step": 12389 }, { "epoch": 2.2283556594443947, "grad_norm": 1.5354728698730469, "learning_rate": 6.114632505515639e-06, "loss": 0.621, "step": 12390 }, { "epoch": 2.228535467050256, "grad_norm": 1.5563061237335205, "learning_rate": 6.114064730940279e-06, "loss": 0.6365, "step": 12391 }, { "epoch": 2.228715274656118, "grad_norm": 1.5126256942749023, "learning_rate": 6.113496941248523e-06, "loss": 0.772, "step": 12392 }, { "epoch": 2.2288950822619795, "grad_norm": 1.510764241218567, "learning_rate": 6.112929136448072e-06, "loss": 0.6763, "step": 12393 }, { "epoch": 2.2290748898678414, "grad_norm": 1.4522433280944824, "learning_rate": 6.112361316546635e-06, "loss": 0.6701, "step": 12394 }, { "epoch": 2.2292546974737033, "grad_norm": 1.5626604557037354, "learning_rate": 6.111793481551916e-06, "loss": 0.6709, "step": 12395 }, { "epoch": 2.2294345050795648, "grad_norm": 1.542952299118042, "learning_rate": 6.111225631471616e-06, "loss": 0.7165, "step": 12396 }, { "epoch": 2.2296143126854266, "grad_norm": 1.5415641069412231, "learning_rate": 6.110657766313441e-06, "loss": 0.6797, "step": 12397 }, { "epoch": 2.229794120291288, "grad_norm": 1.503198504447937, "learning_rate": 6.1100898860851e-06, "loss": 0.6866, "step": 12398 }, { "epoch": 2.22997392789715, "grad_norm": 1.594161868095398, "learning_rate": 6.109521990794295e-06, "loss": 0.6913, "step": 12399 }, { "epoch": 2.230153735503012, "grad_norm": 1.5862964391708374, "learning_rate": 6.108954080448732e-06, "loss": 0.7144, "step": 12400 }, { "epoch": 2.2303335431088733, "grad_norm": 1.6347246170043945, "learning_rate": 6.108386155056118e-06, "loss": 0.6487, "step": 12401 }, { "epoch": 2.230513350714735, "grad_norm": 1.6579161882400513, "learning_rate": 6.107818214624157e-06, "loss": 0.7076, "step": 12402 }, { "epoch": 2.230693158320597, "grad_norm": 1.5539106130599976, "learning_rate": 6.107250259160558e-06, "loss": 0.695, "step": 12403 }, { "epoch": 2.2308729659264586, "grad_norm": 1.5805578231811523, "learning_rate": 6.106682288673025e-06, "loss": 0.6829, "step": 12404 }, { "epoch": 2.2310527735323205, "grad_norm": 1.667497158050537, "learning_rate": 6.106114303169265e-06, "loss": 0.6414, "step": 12405 }, { "epoch": 2.2312325811381823, "grad_norm": 1.4928795099258423, "learning_rate": 6.105546302656986e-06, "loss": 0.6905, "step": 12406 }, { "epoch": 2.231412388744044, "grad_norm": 1.602310061454773, "learning_rate": 6.104978287143894e-06, "loss": 0.6993, "step": 12407 }, { "epoch": 2.2315921963499057, "grad_norm": 1.474929690361023, "learning_rate": 6.1044102566376975e-06, "loss": 0.6389, "step": 12408 }, { "epoch": 2.231772003955767, "grad_norm": 1.5511757135391235, "learning_rate": 6.103842211146101e-06, "loss": 0.7396, "step": 12409 }, { "epoch": 2.231951811561629, "grad_norm": 1.653088092803955, "learning_rate": 6.103274150676816e-06, "loss": 0.693, "step": 12410 }, { "epoch": 2.232131619167491, "grad_norm": 1.542837142944336, "learning_rate": 6.102706075237546e-06, "loss": 0.6637, "step": 12411 }, { "epoch": 2.2323114267733524, "grad_norm": 1.592947006225586, "learning_rate": 6.102137984836003e-06, "loss": 0.6268, "step": 12412 }, { "epoch": 2.2324912343792143, "grad_norm": 1.4820879697799683, "learning_rate": 6.101569879479894e-06, "loss": 0.7122, "step": 12413 }, { "epoch": 2.232671041985076, "grad_norm": 1.6429449319839478, "learning_rate": 6.101001759176928e-06, "loss": 0.68, "step": 12414 }, { "epoch": 2.2328508495909376, "grad_norm": 1.5233291387557983, "learning_rate": 6.100433623934811e-06, "loss": 0.6589, "step": 12415 }, { "epoch": 2.2330306571967995, "grad_norm": 1.517467737197876, "learning_rate": 6.099865473761255e-06, "loss": 0.6487, "step": 12416 }, { "epoch": 2.233210464802661, "grad_norm": 1.1335140466690063, "learning_rate": 6.0992973086639664e-06, "loss": 0.8186, "step": 12417 }, { "epoch": 2.233390272408523, "grad_norm": 1.491817593574524, "learning_rate": 6.098729128650656e-06, "loss": 0.6283, "step": 12418 }, { "epoch": 2.2335700800143847, "grad_norm": 1.6700912714004517, "learning_rate": 6.098160933729034e-06, "loss": 0.672, "step": 12419 }, { "epoch": 2.233749887620246, "grad_norm": 1.485681176185608, "learning_rate": 6.097592723906809e-06, "loss": 0.6981, "step": 12420 }, { "epoch": 2.233929695226108, "grad_norm": 1.159919261932373, "learning_rate": 6.09702449919169e-06, "loss": 0.8895, "step": 12421 }, { "epoch": 2.23410950283197, "grad_norm": 1.640435814857483, "learning_rate": 6.096456259591388e-06, "loss": 0.7176, "step": 12422 }, { "epoch": 2.2342893104378314, "grad_norm": 1.6843924522399902, "learning_rate": 6.0958880051136125e-06, "loss": 0.6989, "step": 12423 }, { "epoch": 2.2344691180436933, "grad_norm": 1.4807828664779663, "learning_rate": 6.095319735766076e-06, "loss": 0.6811, "step": 12424 }, { "epoch": 2.234648925649555, "grad_norm": 1.761914610862732, "learning_rate": 6.094751451556488e-06, "loss": 0.6557, "step": 12425 }, { "epoch": 2.2348287332554166, "grad_norm": 1.4150534868240356, "learning_rate": 6.09418315249256e-06, "loss": 0.6515, "step": 12426 }, { "epoch": 2.2350085408612785, "grad_norm": 1.3985118865966797, "learning_rate": 6.093614838582001e-06, "loss": 0.657, "step": 12427 }, { "epoch": 2.23518834846714, "grad_norm": 1.5790475606918335, "learning_rate": 6.093046509832524e-06, "loss": 0.6721, "step": 12428 }, { "epoch": 2.235368156073002, "grad_norm": 1.549000859260559, "learning_rate": 6.092478166251839e-06, "loss": 0.6607, "step": 12429 }, { "epoch": 2.2355479636788638, "grad_norm": 1.6334447860717773, "learning_rate": 6.091909807847661e-06, "loss": 0.727, "step": 12430 }, { "epoch": 2.235727771284725, "grad_norm": 1.5960041284561157, "learning_rate": 6.091341434627698e-06, "loss": 0.6416, "step": 12431 }, { "epoch": 2.235907578890587, "grad_norm": 1.5851277112960815, "learning_rate": 6.090773046599665e-06, "loss": 0.6724, "step": 12432 }, { "epoch": 2.236087386496449, "grad_norm": 1.7156696319580078, "learning_rate": 6.0902046437712715e-06, "loss": 0.6685, "step": 12433 }, { "epoch": 2.2362671941023105, "grad_norm": 1.3995076417922974, "learning_rate": 6.0896362261502315e-06, "loss": 0.574, "step": 12434 }, { "epoch": 2.2364470017081723, "grad_norm": 1.6422381401062012, "learning_rate": 6.089067793744258e-06, "loss": 0.7049, "step": 12435 }, { "epoch": 2.236626809314034, "grad_norm": 1.7974472045898438, "learning_rate": 6.088499346561064e-06, "loss": 0.7379, "step": 12436 }, { "epoch": 2.2368066169198957, "grad_norm": 1.6002731323242188, "learning_rate": 6.0879308846083615e-06, "loss": 0.6771, "step": 12437 }, { "epoch": 2.2369864245257576, "grad_norm": 1.6694780588150024, "learning_rate": 6.087362407893866e-06, "loss": 0.6678, "step": 12438 }, { "epoch": 2.237166232131619, "grad_norm": 1.5048229694366455, "learning_rate": 6.086793916425288e-06, "loss": 0.6757, "step": 12439 }, { "epoch": 2.237346039737481, "grad_norm": 1.526621699333191, "learning_rate": 6.086225410210344e-06, "loss": 0.7144, "step": 12440 }, { "epoch": 2.237525847343343, "grad_norm": 1.4732260704040527, "learning_rate": 6.085656889256744e-06, "loss": 0.6958, "step": 12441 }, { "epoch": 2.2377056549492043, "grad_norm": 1.4907854795455933, "learning_rate": 6.085088353572206e-06, "loss": 0.6703, "step": 12442 }, { "epoch": 2.237885462555066, "grad_norm": 1.651746392250061, "learning_rate": 6.084519803164443e-06, "loss": 0.6557, "step": 12443 }, { "epoch": 2.2380652701609276, "grad_norm": 1.53757905960083, "learning_rate": 6.083951238041168e-06, "loss": 0.7124, "step": 12444 }, { "epoch": 2.2382450777667895, "grad_norm": 1.6280211210250854, "learning_rate": 6.083382658210098e-06, "loss": 0.6819, "step": 12445 }, { "epoch": 2.2384248853726514, "grad_norm": 1.572993278503418, "learning_rate": 6.082814063678948e-06, "loss": 0.7319, "step": 12446 }, { "epoch": 2.238604692978513, "grad_norm": 1.1482691764831543, "learning_rate": 6.08224545445543e-06, "loss": 0.8231, "step": 12447 }, { "epoch": 2.2387845005843747, "grad_norm": 1.6273417472839355, "learning_rate": 6.081676830547263e-06, "loss": 0.67, "step": 12448 }, { "epoch": 2.2389643081902366, "grad_norm": 1.517661452293396, "learning_rate": 6.081108191962158e-06, "loss": 0.7099, "step": 12449 }, { "epoch": 2.239144115796098, "grad_norm": 1.497862696647644, "learning_rate": 6.080539538707837e-06, "loss": 0.6171, "step": 12450 }, { "epoch": 2.23932392340196, "grad_norm": 1.5307412147521973, "learning_rate": 6.0799708707920095e-06, "loss": 0.7276, "step": 12451 }, { "epoch": 2.2395037310078214, "grad_norm": 1.5598962306976318, "learning_rate": 6.079402188222397e-06, "loss": 0.6765, "step": 12452 }, { "epoch": 2.2396835386136833, "grad_norm": 1.5444591045379639, "learning_rate": 6.078833491006711e-06, "loss": 0.6419, "step": 12453 }, { "epoch": 2.239863346219545, "grad_norm": 1.1014653444290161, "learning_rate": 6.07826477915267e-06, "loss": 0.8158, "step": 12454 }, { "epoch": 2.2400431538254066, "grad_norm": 1.5897105932235718, "learning_rate": 6.0776960526679904e-06, "loss": 0.7581, "step": 12455 }, { "epoch": 2.2402229614312685, "grad_norm": 1.4683313369750977, "learning_rate": 6.0771273115603905e-06, "loss": 0.6381, "step": 12456 }, { "epoch": 2.2404027690371304, "grad_norm": 1.0201643705368042, "learning_rate": 6.076558555837586e-06, "loss": 0.8873, "step": 12457 }, { "epoch": 2.240582576642992, "grad_norm": 1.5079959630966187, "learning_rate": 6.0759897855072944e-06, "loss": 0.719, "step": 12458 }, { "epoch": 2.2407623842488538, "grad_norm": 1.5980523824691772, "learning_rate": 6.075421000577234e-06, "loss": 0.6817, "step": 12459 }, { "epoch": 2.2409421918547157, "grad_norm": 1.5546766519546509, "learning_rate": 6.074852201055121e-06, "loss": 0.7108, "step": 12460 }, { "epoch": 2.241121999460577, "grad_norm": 1.5576660633087158, "learning_rate": 6.074283386948674e-06, "loss": 0.7058, "step": 12461 }, { "epoch": 2.241301807066439, "grad_norm": 1.4922349452972412, "learning_rate": 6.073714558265612e-06, "loss": 0.6841, "step": 12462 }, { "epoch": 2.2414816146723004, "grad_norm": 1.5172102451324463, "learning_rate": 6.073145715013651e-06, "loss": 0.5699, "step": 12463 }, { "epoch": 2.2416614222781623, "grad_norm": 1.299734354019165, "learning_rate": 6.072576857200512e-06, "loss": 0.8627, "step": 12464 }, { "epoch": 2.2418412298840242, "grad_norm": 1.1343377828598022, "learning_rate": 6.072007984833912e-06, "loss": 0.8269, "step": 12465 }, { "epoch": 2.2420210374898857, "grad_norm": 1.45199716091156, "learning_rate": 6.071439097921568e-06, "loss": 0.6899, "step": 12466 }, { "epoch": 2.2422008450957476, "grad_norm": 1.5571486949920654, "learning_rate": 6.070870196471203e-06, "loss": 0.6783, "step": 12467 }, { "epoch": 2.2423806527016095, "grad_norm": 1.1960878372192383, "learning_rate": 6.070301280490536e-06, "loss": 0.8721, "step": 12468 }, { "epoch": 2.242560460307471, "grad_norm": 1.597258448600769, "learning_rate": 6.069732349987284e-06, "loss": 0.6794, "step": 12469 }, { "epoch": 2.242740267913333, "grad_norm": 1.1629931926727295, "learning_rate": 6.0691634049691676e-06, "loss": 0.893, "step": 12470 }, { "epoch": 2.2429200755191943, "grad_norm": 1.5854997634887695, "learning_rate": 6.068594445443907e-06, "loss": 0.6495, "step": 12471 }, { "epoch": 2.243099883125056, "grad_norm": 1.4535858631134033, "learning_rate": 6.068025471419221e-06, "loss": 0.5881, "step": 12472 }, { "epoch": 2.243279690730918, "grad_norm": 1.2065445184707642, "learning_rate": 6.0674564829028315e-06, "loss": 0.8643, "step": 12473 }, { "epoch": 2.2434594983367795, "grad_norm": 1.4911768436431885, "learning_rate": 6.066887479902458e-06, "loss": 0.6902, "step": 12474 }, { "epoch": 2.2436393059426414, "grad_norm": 1.1157004833221436, "learning_rate": 6.066318462425822e-06, "loss": 0.8597, "step": 12475 }, { "epoch": 2.2438191135485033, "grad_norm": 1.4720722436904907, "learning_rate": 6.065749430480642e-06, "loss": 0.6579, "step": 12476 }, { "epoch": 2.2439989211543647, "grad_norm": 1.5604552030563354, "learning_rate": 6.065180384074642e-06, "loss": 0.6942, "step": 12477 }, { "epoch": 2.2441787287602266, "grad_norm": 1.5859266519546509, "learning_rate": 6.064611323215541e-06, "loss": 0.6283, "step": 12478 }, { "epoch": 2.2443585363660885, "grad_norm": 1.7138375043869019, "learning_rate": 6.064042247911061e-06, "loss": 0.7061, "step": 12479 }, { "epoch": 2.24453834397195, "grad_norm": 2.011265277862549, "learning_rate": 6.0634731581689245e-06, "loss": 0.6878, "step": 12480 }, { "epoch": 2.244718151577812, "grad_norm": 1.4558779001235962, "learning_rate": 6.062904053996853e-06, "loss": 0.6686, "step": 12481 }, { "epoch": 2.2448979591836733, "grad_norm": 1.6200714111328125, "learning_rate": 6.062334935402567e-06, "loss": 0.6852, "step": 12482 }, { "epoch": 2.245077766789535, "grad_norm": 1.8690372705459595, "learning_rate": 6.061765802393792e-06, "loss": 0.737, "step": 12483 }, { "epoch": 2.245257574395397, "grad_norm": 1.6485157012939453, "learning_rate": 6.061196654978246e-06, "loss": 0.6978, "step": 12484 }, { "epoch": 2.2454373820012585, "grad_norm": 1.561888575553894, "learning_rate": 6.060627493163656e-06, "loss": 0.6859, "step": 12485 }, { "epoch": 2.2456171896071204, "grad_norm": 1.592363715171814, "learning_rate": 6.060058316957741e-06, "loss": 0.6788, "step": 12486 }, { "epoch": 2.2457969972129823, "grad_norm": 1.542439579963684, "learning_rate": 6.059489126368226e-06, "loss": 0.6556, "step": 12487 }, { "epoch": 2.2459768048188438, "grad_norm": 1.5076022148132324, "learning_rate": 6.058919921402834e-06, "loss": 0.6412, "step": 12488 }, { "epoch": 2.2461566124247057, "grad_norm": 1.4814622402191162, "learning_rate": 6.058350702069287e-06, "loss": 0.6992, "step": 12489 }, { "epoch": 2.246336420030567, "grad_norm": 1.5850595235824585, "learning_rate": 6.05778146837531e-06, "loss": 0.6589, "step": 12490 }, { "epoch": 2.246516227636429, "grad_norm": 1.443237543106079, "learning_rate": 6.057212220328628e-06, "loss": 0.6643, "step": 12491 }, { "epoch": 2.246696035242291, "grad_norm": 1.4331834316253662, "learning_rate": 6.056642957936961e-06, "loss": 0.6205, "step": 12492 }, { "epoch": 2.2468758428481523, "grad_norm": 1.5655567646026611, "learning_rate": 6.056073681208038e-06, "loss": 0.7071, "step": 12493 }, { "epoch": 2.2470556504540142, "grad_norm": 1.5184919834136963, "learning_rate": 6.055504390149579e-06, "loss": 0.6636, "step": 12494 }, { "epoch": 2.247235458059876, "grad_norm": 1.6048294305801392, "learning_rate": 6.054935084769311e-06, "loss": 0.6738, "step": 12495 }, { "epoch": 2.2474152656657376, "grad_norm": 1.6327260732650757, "learning_rate": 6.054365765074958e-06, "loss": 0.6832, "step": 12496 }, { "epoch": 2.2475950732715995, "grad_norm": 1.525433897972107, "learning_rate": 6.053796431074246e-06, "loss": 0.6369, "step": 12497 }, { "epoch": 2.247774880877461, "grad_norm": 1.6351523399353027, "learning_rate": 6.0532270827748985e-06, "loss": 0.7245, "step": 12498 }, { "epoch": 2.247954688483323, "grad_norm": 1.6832387447357178, "learning_rate": 6.05265772018464e-06, "loss": 0.7039, "step": 12499 }, { "epoch": 2.2481344960891847, "grad_norm": 1.5138037204742432, "learning_rate": 6.052088343311199e-06, "loss": 0.7046, "step": 12500 }, { "epoch": 2.2481344960891847, "eval_loss": 0.7867943644523621, "eval_runtime": 150.8906, "eval_samples_per_second": 95.314, "eval_steps_per_second": 1.491, "step": 12500 }, { "epoch": 2.2481344960891847, "step": 12500, "total_flos": 4.2658895113318564e+18, "train_loss": 0.8141860727739334, "train_runtime": 209695.5843, "train_samples_per_second": 33.948, "train_steps_per_second": 0.133 } ], "logging_steps": 1.0, "max_steps": 27805, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "CustomEarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.2658895113318564e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }