| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1875, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016, |
| "grad_norm": 2.6935333483246744, |
| "learning_rate": 0.0, |
| "loss": 0.7427, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 2.5909334017343797, |
| "learning_rate": 5.319148936170213e-08, |
| "loss": 0.6925, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 2.634615998488796, |
| "learning_rate": 1.0638297872340426e-07, |
| "loss": 0.7508, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 2.688252553789578, |
| "learning_rate": 1.5957446808510638e-07, |
| "loss": 0.7672, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 2.5601208181109203, |
| "learning_rate": 2.1276595744680852e-07, |
| "loss": 0.7247, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 2.73714250020918, |
| "learning_rate": 2.6595744680851066e-07, |
| "loss": 0.7627, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 2.516096017216241, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 0.7199, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 2.380185041166624, |
| "learning_rate": 3.723404255319149e-07, |
| "loss": 0.7204, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 2.3858299652503874, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 0.7195, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 2.4594433717388835, |
| "learning_rate": 4.787234042553192e-07, |
| "loss": 0.7344, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 2.1734732417865814, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 0.7051, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 2.165675293533436, |
| "learning_rate": 5.851063829787235e-07, |
| "loss": 0.6932, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 2.162307197523733, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 0.7134, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 1.6600872031666853, |
| "learning_rate": 6.914893617021278e-07, |
| "loss": 0.7353, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 1.5649594550265764, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 0.7319, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 1.4448637028969793, |
| "learning_rate": 7.97872340425532e-07, |
| "loss": 0.7322, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0272, |
| "grad_norm": 1.320243789479434, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 0.6994, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 1.2934247567257413, |
| "learning_rate": 9.042553191489363e-07, |
| "loss": 0.687, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0304, |
| "grad_norm": 1.4694306194531632, |
| "learning_rate": 9.574468085106384e-07, |
| "loss": 0.6901, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.70725718529189, |
| "learning_rate": 1.0106382978723404e-06, |
| "loss": 0.7097, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0336, |
| "grad_norm": 1.4436761974162777, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 0.6627, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 1.4288626184062314, |
| "learning_rate": 1.1170212765957447e-06, |
| "loss": 0.6915, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0368, |
| "grad_norm": 1.2325251234185621, |
| "learning_rate": 1.170212765957447e-06, |
| "loss": 0.6957, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.9960617113038444, |
| "learning_rate": 1.223404255319149e-06, |
| "loss": 0.641, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.7519039803913088, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 0.6702, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 0.7051543380507642, |
| "learning_rate": 1.3297872340425533e-06, |
| "loss": 0.6357, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0432, |
| "grad_norm": 0.7485544728631365, |
| "learning_rate": 1.3829787234042555e-06, |
| "loss": 0.6915, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 0.7121634421588148, |
| "learning_rate": 1.4361702127659578e-06, |
| "loss": 0.6466, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0464, |
| "grad_norm": 0.6707318244947778, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 0.5835, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.6520140712514993, |
| "learning_rate": 1.5425531914893618e-06, |
| "loss": 0.6324, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0496, |
| "grad_norm": 0.5751188271208452, |
| "learning_rate": 1.595744680851064e-06, |
| "loss": 0.6115, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 0.5738242389446708, |
| "learning_rate": 1.648936170212766e-06, |
| "loss": 0.6302, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0528, |
| "grad_norm": 0.5597551825898859, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 0.6081, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 0.4818005589602406, |
| "learning_rate": 1.7553191489361704e-06, |
| "loss": 0.5973, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.4941719726639123, |
| "learning_rate": 1.8085106382978727e-06, |
| "loss": 0.6, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 0.49895853258896516, |
| "learning_rate": 1.8617021276595745e-06, |
| "loss": 0.6188, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0592, |
| "grad_norm": 0.5327659057373375, |
| "learning_rate": 1.9148936170212767e-06, |
| "loss": 0.5939, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 0.4687105645386323, |
| "learning_rate": 1.968085106382979e-06, |
| "loss": 0.6092, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0624, |
| "grad_norm": 0.4949900909049657, |
| "learning_rate": 2.021276595744681e-06, |
| "loss": 0.5981, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.42190602729284093, |
| "learning_rate": 2.074468085106383e-06, |
| "loss": 0.6017, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0656, |
| "grad_norm": 0.37536994218910175, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.5469, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 0.38124163784884785, |
| "learning_rate": 2.1808510638297876e-06, |
| "loss": 0.5726, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0688, |
| "grad_norm": 0.37655196660247514, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 0.5244, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 0.3934569860193009, |
| "learning_rate": 2.2872340425531916e-06, |
| "loss": 0.5599, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.41664976638546425, |
| "learning_rate": 2.340425531914894e-06, |
| "loss": 0.5856, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.33500487165544873, |
| "learning_rate": 2.393617021276596e-06, |
| "loss": 0.5274, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0752, |
| "grad_norm": 0.3808245038520983, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 0.5991, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 0.3249336009738689, |
| "learning_rate": 2.5e-06, |
| "loss": 0.5321, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0784, |
| "grad_norm": 0.2960564845360439, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 0.5217, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.33589601266912045, |
| "learning_rate": 2.6063829787234047e-06, |
| "loss": 0.534, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0816, |
| "grad_norm": 0.3314446644656821, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 0.5755, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 0.34135953173140776, |
| "learning_rate": 2.7127659574468084e-06, |
| "loss": 0.5576, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0848, |
| "grad_norm": 0.31636789884375627, |
| "learning_rate": 2.765957446808511e-06, |
| "loss": 0.5468, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 0.3084904380311999, |
| "learning_rate": 2.819148936170213e-06, |
| "loss": 0.5227, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.31597595967209435, |
| "learning_rate": 2.8723404255319155e-06, |
| "loss": 0.5257, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 0.6984361527154183, |
| "learning_rate": 2.9255319148936174e-06, |
| "loss": 0.5646, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0912, |
| "grad_norm": 0.39838768585133283, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.5187, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 0.3149538823943047, |
| "learning_rate": 3.031914893617022e-06, |
| "loss": 0.5058, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0944, |
| "grad_norm": 0.3127673654333588, |
| "learning_rate": 3.0851063829787237e-06, |
| "loss": 0.5572, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.3014590282578784, |
| "learning_rate": 3.1382978723404255e-06, |
| "loss": 0.5167, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0976, |
| "grad_norm": 0.30534162464263337, |
| "learning_rate": 3.191489361702128e-06, |
| "loss": 0.5137, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 0.2896634913580371, |
| "learning_rate": 3.24468085106383e-06, |
| "loss": 0.4934, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1008, |
| "grad_norm": 0.3081521608162006, |
| "learning_rate": 3.297872340425532e-06, |
| "loss": 0.5138, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.3069095847868297, |
| "learning_rate": 3.3510638297872345e-06, |
| "loss": 0.5294, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.2994762622641544, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 0.489, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 0.3140187672015218, |
| "learning_rate": 3.457446808510639e-06, |
| "loss": 0.4988, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1072, |
| "grad_norm": 0.2985872651347218, |
| "learning_rate": 3.510638297872341e-06, |
| "loss": 0.53, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 0.274882863033803, |
| "learning_rate": 3.5638297872340426e-06, |
| "loss": 0.4765, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1104, |
| "grad_norm": 0.3260517406320922, |
| "learning_rate": 3.6170212765957453e-06, |
| "loss": 0.499, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.2568681603754758, |
| "learning_rate": 3.670212765957447e-06, |
| "loss": 0.4544, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1136, |
| "grad_norm": 0.2962433866319457, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.5032, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 0.28222927389548674, |
| "learning_rate": 3.7765957446808516e-06, |
| "loss": 0.4774, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1168, |
| "grad_norm": 0.326896551546162, |
| "learning_rate": 3.8297872340425535e-06, |
| "loss": 0.4971, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.29328331437527605, |
| "learning_rate": 3.882978723404256e-06, |
| "loss": 0.4826, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.36071303110194936, |
| "learning_rate": 3.936170212765958e-06, |
| "loss": 0.4879, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 0.423702064011967, |
| "learning_rate": 3.98936170212766e-06, |
| "loss": 0.4692, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1232, |
| "grad_norm": 0.25706569864707063, |
| "learning_rate": 4.042553191489362e-06, |
| "loss": 0.4369, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 0.3074159930166736, |
| "learning_rate": 4.095744680851064e-06, |
| "loss": 0.4655, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1264, |
| "grad_norm": 0.26664664345884875, |
| "learning_rate": 4.148936170212766e-06, |
| "loss": 0.472, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.2716297434667834, |
| "learning_rate": 4.202127659574468e-06, |
| "loss": 0.4993, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1296, |
| "grad_norm": 0.267956886406374, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.4793, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 0.2887632140621916, |
| "learning_rate": 4.308510638297873e-06, |
| "loss": 0.4671, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1328, |
| "grad_norm": 0.28255508656318745, |
| "learning_rate": 4.361702127659575e-06, |
| "loss": 0.4687, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.256278683213602, |
| "learning_rate": 4.414893617021277e-06, |
| "loss": 0.4531, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.27913696623674544, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 0.4663, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 0.29736075931684885, |
| "learning_rate": 4.521276595744681e-06, |
| "loss": 0.4829, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1392, |
| "grad_norm": 0.25585546481318183, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 0.4624, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.2867031921023038, |
| "learning_rate": 4.6276595744680855e-06, |
| "loss": 0.5039, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1424, |
| "grad_norm": 0.2797440815749368, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 0.4301, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.2795390367233978, |
| "learning_rate": 4.73404255319149e-06, |
| "loss": 0.4788, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1456, |
| "grad_norm": 0.25723527077432407, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 0.4641, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.25625809790513926, |
| "learning_rate": 4.840425531914894e-06, |
| "loss": 0.4727, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1488, |
| "grad_norm": 0.25716070394308227, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 0.4514, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 0.2862157672036198, |
| "learning_rate": 4.946808510638298e-06, |
| "loss": 0.455, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.6295562076865021, |
| "learning_rate": 5e-06, |
| "loss": 0.4609, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.26734196590778503, |
| "learning_rate": 5.053191489361703e-06, |
| "loss": 0.4754, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1552, |
| "grad_norm": 0.2615792928982075, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 0.4613, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 0.2740038778201716, |
| "learning_rate": 5.159574468085107e-06, |
| "loss": 0.4602, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1584, |
| "grad_norm": 0.270871901070674, |
| "learning_rate": 5.212765957446809e-06, |
| "loss": 0.4839, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2831629741961585, |
| "learning_rate": 5.265957446808511e-06, |
| "loss": 0.4754, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1616, |
| "grad_norm": 0.26797264930001113, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.4702, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 0.3062894986461114, |
| "learning_rate": 5.372340425531915e-06, |
| "loss": 0.4524, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1648, |
| "grad_norm": 0.3018173984434311, |
| "learning_rate": 5.425531914893617e-06, |
| "loss": 0.4779, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.33864978644602667, |
| "learning_rate": 5.47872340425532e-06, |
| "loss": 0.4909, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.2919677991724035, |
| "learning_rate": 5.531914893617022e-06, |
| "loss": 0.4505, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 0.2613660927100871, |
| "learning_rate": 5.5851063829787235e-06, |
| "loss": 0.4617, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1712, |
| "grad_norm": 0.31397559497635025, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 0.5136, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.28721601144082376, |
| "learning_rate": 5.691489361702128e-06, |
| "loss": 0.4684, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1744, |
| "grad_norm": 0.26589638141976213, |
| "learning_rate": 5.744680851063831e-06, |
| "loss": 0.4515, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.30609423594553886, |
| "learning_rate": 5.7978723404255325e-06, |
| "loss": 0.4814, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1776, |
| "grad_norm": 0.27839304939931775, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 0.4849, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.299314432661347, |
| "learning_rate": 5.904255319148937e-06, |
| "loss": 0.5077, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1808, |
| "grad_norm": 0.2948993874679358, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.4774, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.2935523014148891, |
| "learning_rate": 6.010638297872341e-06, |
| "loss": 0.4577, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.38566730462420856, |
| "learning_rate": 6.063829787234044e-06, |
| "loss": 0.4671, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.36149447843927573, |
| "learning_rate": 6.117021276595745e-06, |
| "loss": 0.4573, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1872, |
| "grad_norm": 0.4079925918858063, |
| "learning_rate": 6.170212765957447e-06, |
| "loss": 0.4832, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.25767630409154185, |
| "learning_rate": 6.22340425531915e-06, |
| "loss": 0.481, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1904, |
| "grad_norm": 0.29192923760449985, |
| "learning_rate": 6.276595744680851e-06, |
| "loss": 0.4564, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.2620251447542489, |
| "learning_rate": 6.329787234042554e-06, |
| "loss": 0.4433, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1936, |
| "grad_norm": 0.29032987255841375, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.4625, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 0.2727341898569371, |
| "learning_rate": 6.436170212765958e-06, |
| "loss": 0.4521, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1968, |
| "grad_norm": 0.27611761654127837, |
| "learning_rate": 6.48936170212766e-06, |
| "loss": 0.4661, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.3163260760558545, |
| "learning_rate": 6.542553191489362e-06, |
| "loss": 0.4599, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.27842340277774, |
| "learning_rate": 6.595744680851064e-06, |
| "loss": 0.4588, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 0.2557917064773321, |
| "learning_rate": 6.648936170212767e-06, |
| "loss": 0.4376, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2032, |
| "grad_norm": 0.2841679042280421, |
| "learning_rate": 6.702127659574469e-06, |
| "loss": 0.4569, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.2905960105911881, |
| "learning_rate": 6.75531914893617e-06, |
| "loss": 0.4501, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2064, |
| "grad_norm": 0.28561629108364034, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 0.4381, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.2632319610068059, |
| "learning_rate": 6.861702127659575e-06, |
| "loss": 0.4507, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2096, |
| "grad_norm": 0.271066825664474, |
| "learning_rate": 6.914893617021278e-06, |
| "loss": 0.4418, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.25431874077995864, |
| "learning_rate": 6.968085106382979e-06, |
| "loss": 0.4293, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2128, |
| "grad_norm": 0.39971792700690223, |
| "learning_rate": 7.021276595744682e-06, |
| "loss": 0.4558, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.2629426503995384, |
| "learning_rate": 7.074468085106384e-06, |
| "loss": 0.4405, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.26106149577095644, |
| "learning_rate": 7.127659574468085e-06, |
| "loss": 0.4473, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.2621727873771089, |
| "learning_rate": 7.1808510638297875e-06, |
| "loss": 0.4405, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2192, |
| "grad_norm": 0.2745756643321701, |
| "learning_rate": 7.234042553191491e-06, |
| "loss": 0.4585, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 0.2760024894230963, |
| "learning_rate": 7.287234042553192e-06, |
| "loss": 0.4346, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2224, |
| "grad_norm": 0.26484765321802, |
| "learning_rate": 7.340425531914894e-06, |
| "loss": 0.4343, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.28158779765901615, |
| "learning_rate": 7.3936170212765965e-06, |
| "loss": 0.4444, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2256, |
| "grad_norm": 0.2688341483479552, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.4339, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 0.27618939698219686, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4477, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2288, |
| "grad_norm": 0.2733769805343615, |
| "learning_rate": 7.553191489361703e-06, |
| "loss": 0.452, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.28538229462176795, |
| "learning_rate": 7.606382978723405e-06, |
| "loss": 0.4462, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.25627646052411507, |
| "learning_rate": 7.659574468085107e-06, |
| "loss": 0.4566, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.2903737015158173, |
| "learning_rate": 7.71276595744681e-06, |
| "loss": 0.4361, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2352, |
| "grad_norm": 0.34407046875604036, |
| "learning_rate": 7.765957446808511e-06, |
| "loss": 0.465, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.2828220969143353, |
| "learning_rate": 7.819148936170213e-06, |
| "loss": 0.4423, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2384, |
| "grad_norm": 0.30439569951297, |
| "learning_rate": 7.872340425531916e-06, |
| "loss": 0.4576, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.3055180598961796, |
| "learning_rate": 7.925531914893617e-06, |
| "loss": 0.4655, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2416, |
| "grad_norm": 0.3563229631588604, |
| "learning_rate": 7.97872340425532e-06, |
| "loss": 0.442, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.2883756016355821, |
| "learning_rate": 8.031914893617022e-06, |
| "loss": 0.4086, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2448, |
| "grad_norm": 0.27570197867271257, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 0.4381, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.28177797827667467, |
| "learning_rate": 8.138297872340426e-06, |
| "loss": 0.4585, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.269270254510865, |
| "learning_rate": 8.191489361702128e-06, |
| "loss": 0.4377, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.29260738710754974, |
| "learning_rate": 8.24468085106383e-06, |
| "loss": 0.4414, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2512, |
| "grad_norm": 0.2609078246003012, |
| "learning_rate": 8.297872340425532e-06, |
| "loss": 0.4026, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 0.2850289272825885, |
| "learning_rate": 8.351063829787235e-06, |
| "loss": 0.43, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2544, |
| "grad_norm": 0.27265179870963097, |
| "learning_rate": 8.404255319148937e-06, |
| "loss": 0.4348, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.2812821928644855, |
| "learning_rate": 8.457446808510638e-06, |
| "loss": 0.4352, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2576, |
| "grad_norm": 0.27723620386328945, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.3987, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 0.29493310927820177, |
| "learning_rate": 8.563829787234044e-06, |
| "loss": 0.4548, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2608, |
| "grad_norm": 0.28177048953522205, |
| "learning_rate": 8.617021276595746e-06, |
| "loss": 0.4556, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.35518172508525553, |
| "learning_rate": 8.670212765957447e-06, |
| "loss": 0.4335, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.28438932439799675, |
| "learning_rate": 8.72340425531915e-06, |
| "loss": 0.4464, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 0.2931480338922887, |
| "learning_rate": 8.776595744680852e-06, |
| "loss": 0.4456, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2672, |
| "grad_norm": 0.27624501138261853, |
| "learning_rate": 8.829787234042555e-06, |
| "loss": 0.4285, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.31589904881548986, |
| "learning_rate": 8.882978723404256e-06, |
| "loss": 0.4515, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2704, |
| "grad_norm": 0.2900677749017539, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 0.4461, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.28855425158541476, |
| "learning_rate": 8.98936170212766e-06, |
| "loss": 0.4455, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2736, |
| "grad_norm": 0.26211572076628536, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 0.434, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.33884423024626903, |
| "learning_rate": 9.095744680851063e-06, |
| "loss": 0.446, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2768, |
| "grad_norm": 0.25687347571569014, |
| "learning_rate": 9.148936170212767e-06, |
| "loss": 0.4633, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.27978941009654734, |
| "learning_rate": 9.20212765957447e-06, |
| "loss": 0.4636, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.2735862661464922, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.4527, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.27180034873805003, |
| "learning_rate": 9.308510638297872e-06, |
| "loss": 0.4474, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2832, |
| "grad_norm": 0.2791769035443627, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 0.413, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.280006566195216, |
| "learning_rate": 9.414893617021279e-06, |
| "loss": 0.453, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2864, |
| "grad_norm": 0.26421085075287665, |
| "learning_rate": 9.46808510638298e-06, |
| "loss": 0.44, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.37124439805157333, |
| "learning_rate": 9.521276595744681e-06, |
| "loss": 0.4478, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2896, |
| "grad_norm": 0.27283610779400586, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 0.4236, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 0.2683447982787293, |
| "learning_rate": 9.627659574468086e-06, |
| "loss": 0.4422, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2928, |
| "grad_norm": 0.3435535340166353, |
| "learning_rate": 9.680851063829787e-06, |
| "loss": 0.4404, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.2636497917906626, |
| "learning_rate": 9.73404255319149e-06, |
| "loss": 0.4055, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.25200193149657396, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 0.4179, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 0.2876070346855186, |
| "learning_rate": 9.840425531914895e-06, |
| "loss": 0.4217, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2992, |
| "grad_norm": 0.2954414764532411, |
| "learning_rate": 9.893617021276596e-06, |
| "loss": 0.4603, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.30805455739035814, |
| "learning_rate": 9.946808510638298e-06, |
| "loss": 0.4245, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3024, |
| "grad_norm": 0.23652770470371356, |
| "learning_rate": 1e-05, |
| "loss": 0.4085, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.30781962769898696, |
| "learning_rate": 9.99999133019233e-06, |
| "loss": 0.4456, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3056, |
| "grad_norm": 0.24932866363381637, |
| "learning_rate": 9.999965320799377e-06, |
| "loss": 0.4493, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.28488635624262243, |
| "learning_rate": 9.999921971911345e-06, |
| "loss": 0.4489, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3088, |
| "grad_norm": 0.2778191909754922, |
| "learning_rate": 9.999861283678563e-06, |
| "loss": 0.4461, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 0.278466460179113, |
| "learning_rate": 9.999783256311494e-06, |
| "loss": 0.407, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.2815317000597932, |
| "learning_rate": 9.99968789008073e-06, |
| "loss": 0.4771, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.2766085033751692, |
| "learning_rate": 9.999575185316994e-06, |
| "loss": 0.4549, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3152, |
| "grad_norm": 0.2584447879083147, |
| "learning_rate": 9.999445142411139e-06, |
| "loss": 0.4404, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 0.2454946395328878, |
| "learning_rate": 9.99929776181414e-06, |
| "loss": 0.3973, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3184, |
| "grad_norm": 0.263898310150237, |
| "learning_rate": 9.999133044037107e-06, |
| "loss": 0.4426, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.27689937096431283, |
| "learning_rate": 9.998950989651261e-06, |
| "loss": 0.4054, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3216, |
| "grad_norm": 0.2597234056042459, |
| "learning_rate": 9.99875159928796e-06, |
| "loss": 0.4416, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.2807483647074538, |
| "learning_rate": 9.99853487363867e-06, |
| "loss": 0.4153, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3248, |
| "grad_norm": 0.26584539929243556, |
| "learning_rate": 9.998300813454981e-06, |
| "loss": 0.4227, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.8360756491331202, |
| "learning_rate": 9.998049419548597e-06, |
| "loss": 0.4091, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.26137334850701693, |
| "learning_rate": 9.997780692791329e-06, |
| "loss": 0.4409, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 0.2703199516442474, |
| "learning_rate": 9.997494634115101e-06, |
| "loss": 0.4158, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3312, |
| "grad_norm": 0.24855834069804883, |
| "learning_rate": 9.997191244511947e-06, |
| "loss": 0.4087, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.2695293290818504, |
| "learning_rate": 9.996870525033994e-06, |
| "loss": 0.4189, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3344, |
| "grad_norm": 0.2666876116118198, |
| "learning_rate": 9.996532476793475e-06, |
| "loss": 0.4121, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.27574452048115916, |
| "learning_rate": 9.996177100962714e-06, |
| "loss": 0.4406, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3376, |
| "grad_norm": 0.28420597939788517, |
| "learning_rate": 9.995804398774129e-06, |
| "loss": 0.4291, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.2804469579879033, |
| "learning_rate": 9.99541437152022e-06, |
| "loss": 0.4405, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3408, |
| "grad_norm": 0.2701107887650606, |
| "learning_rate": 9.995007020553572e-06, |
| "loss": 0.4311, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 0.2862059104196752, |
| "learning_rate": 9.994582347286849e-06, |
| "loss": 0.4675, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.26131389312582814, |
| "learning_rate": 9.994140353192782e-06, |
| "loss": 0.4229, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.23693193120621023, |
| "learning_rate": 9.993681039804176e-06, |
| "loss": 0.3979, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3472, |
| "grad_norm": 0.24965537098983853, |
| "learning_rate": 9.99320440871389e-06, |
| "loss": 0.4211, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 0.26982110889290406, |
| "learning_rate": 9.99271046157485e-06, |
| "loss": 0.4466, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3504, |
| "grad_norm": 0.2703359358630076, |
| "learning_rate": 9.99219920010002e-06, |
| "loss": 0.4394, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.2509367316632968, |
| "learning_rate": 9.991670626062422e-06, |
| "loss": 0.4478, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3536, |
| "grad_norm": 0.2395622207519759, |
| "learning_rate": 9.991124741295106e-06, |
| "loss": 0.4094, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 0.3841102876641139, |
| "learning_rate": 9.990561547691159e-06, |
| "loss": 0.4136, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3568, |
| "grad_norm": 0.24460051693476803, |
| "learning_rate": 9.989981047203693e-06, |
| "loss": 0.4381, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.2465369798042619, |
| "learning_rate": 9.98938324184584e-06, |
| "loss": 0.4125, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.2666601879116891, |
| "learning_rate": 9.988768133690741e-06, |
| "loss": 0.4646, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 0.2662551116697277, |
| "learning_rate": 9.988135724871546e-06, |
| "loss": 0.3957, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3632, |
| "grad_norm": 0.2950832686696794, |
| "learning_rate": 9.987486017581401e-06, |
| "loss": 0.4394, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.256251827914605, |
| "learning_rate": 9.986819014073436e-06, |
| "loss": 0.442, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3664, |
| "grad_norm": 0.2580166082956191, |
| "learning_rate": 9.986134716660774e-06, |
| "loss": 0.4479, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.258207985204533, |
| "learning_rate": 9.9854331277165e-06, |
| "loss": 0.4366, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3696, |
| "grad_norm": 0.29851596152123644, |
| "learning_rate": 9.984714249673676e-06, |
| "loss": 0.4018, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.26031954045463374, |
| "learning_rate": 9.98397808502531e-06, |
| "loss": 0.4222, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3728, |
| "grad_norm": 0.265854754111201, |
| "learning_rate": 9.983224636324369e-06, |
| "loss": 0.4395, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 0.28868784778928974, |
| "learning_rate": 9.982453906183754e-06, |
| "loss": 0.4614, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.2729475294759368, |
| "learning_rate": 9.981665897276298e-06, |
| "loss": 0.4433, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.27464063581077125, |
| "learning_rate": 9.980860612334753e-06, |
| "loss": 0.443, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3792, |
| "grad_norm": 0.2555116742716383, |
| "learning_rate": 9.980038054151789e-06, |
| "loss": 0.425, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.24418995381001668, |
| "learning_rate": 9.979198225579968e-06, |
| "loss": 0.4223, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3824, |
| "grad_norm": 0.2922434284567578, |
| "learning_rate": 9.97834112953176e-06, |
| "loss": 0.5082, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.24153416024660354, |
| "learning_rate": 9.9774667689795e-06, |
| "loss": 0.4166, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3856, |
| "grad_norm": 0.2775638323534193, |
| "learning_rate": 9.976575146955409e-06, |
| "loss": 0.4289, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 0.2658361930824293, |
| "learning_rate": 9.97566626655156e-06, |
| "loss": 0.4034, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3888, |
| "grad_norm": 0.25393342040042427, |
| "learning_rate": 9.974740130919883e-06, |
| "loss": 0.4193, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 0.27036137121415993, |
| "learning_rate": 9.973796743272141e-06, |
| "loss": 0.4153, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.266536013501484, |
| "learning_rate": 9.972836106879936e-06, |
| "loss": 0.4004, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.2558365684077581, |
| "learning_rate": 9.971858225074672e-06, |
| "loss": 0.3974, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3952, |
| "grad_norm": 0.24857379029995358, |
| "learning_rate": 9.970863101247578e-06, |
| "loss": 0.4145, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 0.2749190773308589, |
| "learning_rate": 9.96985073884966e-06, |
| "loss": 0.442, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3984, |
| "grad_norm": 0.25332377372329845, |
| "learning_rate": 9.968821141391716e-06, |
| "loss": 0.4094, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.2743838938759049, |
| "learning_rate": 9.96777431244431e-06, |
| "loss": 0.4397, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4016, |
| "grad_norm": 0.24930684396794509, |
| "learning_rate": 9.966710255637764e-06, |
| "loss": 0.4281, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 0.24651943931999892, |
| "learning_rate": 9.965628974662145e-06, |
| "loss": 0.4136, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4048, |
| "grad_norm": 0.2555098899557693, |
| "learning_rate": 9.964530473267253e-06, |
| "loss": 0.4282, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 0.262142447156648, |
| "learning_rate": 9.963414755262606e-06, |
| "loss": 0.417, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.2653965919701334, |
| "learning_rate": 9.962281824517427e-06, |
| "loss": 0.4512, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.26211639116047253, |
| "learning_rate": 9.961131684960635e-06, |
| "loss": 0.4461, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4112, |
| "grad_norm": 0.26328300386281683, |
| "learning_rate": 9.959964340580823e-06, |
| "loss": 0.425, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 0.2769662696145052, |
| "learning_rate": 9.958779795426253e-06, |
| "loss": 0.4781, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4144, |
| "grad_norm": 0.33160560133399125, |
| "learning_rate": 9.957578053604837e-06, |
| "loss": 0.4202, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.2604033259085469, |
| "learning_rate": 9.956359119284123e-06, |
| "loss": 0.427, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4176, |
| "grad_norm": 0.264873315139669, |
| "learning_rate": 9.955122996691278e-06, |
| "loss": 0.4311, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 0.27618412647854707, |
| "learning_rate": 9.953869690113085e-06, |
| "loss": 0.455, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4208, |
| "grad_norm": 0.2379513147100074, |
| "learning_rate": 9.952599203895912e-06, |
| "loss": 0.4276, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.26309918173216623, |
| "learning_rate": 9.95131154244571e-06, |
| "loss": 0.4219, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.2719742287380322, |
| "learning_rate": 9.950006710227986e-06, |
| "loss": 0.4466, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.2570325313074606, |
| "learning_rate": 9.9486847117678e-06, |
| "loss": 0.4438, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4272, |
| "grad_norm": 0.2795210068023867, |
| "learning_rate": 9.947345551649741e-06, |
| "loss": 0.4173, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.25351611662709517, |
| "learning_rate": 9.945989234517913e-06, |
| "loss": 0.3895, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4304, |
| "grad_norm": 0.2416110470540454, |
| "learning_rate": 9.94461576507592e-06, |
| "loss": 0.4037, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.27140153530651845, |
| "learning_rate": 9.943225148086846e-06, |
| "loss": 0.4125, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4336, |
| "grad_norm": 0.29304176004892296, |
| "learning_rate": 9.941817388373248e-06, |
| "loss": 0.4634, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.27081097145703015, |
| "learning_rate": 9.940392490817124e-06, |
| "loss": 0.4596, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4368, |
| "grad_norm": 0.25404599377434295, |
| "learning_rate": 9.938950460359912e-06, |
| "loss": 0.4418, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.2633584137977437, |
| "learning_rate": 9.937491302002462e-06, |
| "loss": 0.4332, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.2546192009095884, |
| "learning_rate": 9.936015020805022e-06, |
| "loss": 0.4099, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 0.26752133466236283, |
| "learning_rate": 9.934521621887223e-06, |
| "loss": 0.4388, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4432, |
| "grad_norm": 0.28570636146519374, |
| "learning_rate": 9.933011110428058e-06, |
| "loss": 0.4564, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 0.26473170108956584, |
| "learning_rate": 9.93148349166586e-06, |
| "loss": 0.43, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4464, |
| "grad_norm": 0.260258828441617, |
| "learning_rate": 9.929938770898299e-06, |
| "loss": 0.4198, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.28950439091685687, |
| "learning_rate": 9.928376953482343e-06, |
| "loss": 0.4535, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4496, |
| "grad_norm": 0.3236807121545921, |
| "learning_rate": 9.926798044834261e-06, |
| "loss": 0.4365, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.2608730146744826, |
| "learning_rate": 9.92520205042958e-06, |
| "loss": 0.4096, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4528, |
| "grad_norm": 0.2720016870837516, |
| "learning_rate": 9.92358897580309e-06, |
| "loss": 0.4401, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 0.2815418979243203, |
| "learning_rate": 9.921958826548808e-06, |
| "loss": 0.3994, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.26285472022999373, |
| "learning_rate": 9.920311608319968e-06, |
| "loss": 0.4237, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.2556351714117404, |
| "learning_rate": 9.918647326828993e-06, |
| "loss": 0.4081, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4592, |
| "grad_norm": 0.26704431574389864, |
| "learning_rate": 9.916965987847485e-06, |
| "loss": 0.4062, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.27710153477197647, |
| "learning_rate": 9.915267597206198e-06, |
| "loss": 0.4279, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4624, |
| "grad_norm": 0.28664323837475547, |
| "learning_rate": 9.913552160795022e-06, |
| "loss": 0.4291, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.24749845106808663, |
| "learning_rate": 9.911819684562954e-06, |
| "loss": 0.4171, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4656, |
| "grad_norm": 0.3004469167954851, |
| "learning_rate": 9.910070174518093e-06, |
| "loss": 0.4428, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 0.2958731908750294, |
| "learning_rate": 9.908303636727604e-06, |
| "loss": 0.4387, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4688, |
| "grad_norm": 0.28219464047049303, |
| "learning_rate": 9.9065200773177e-06, |
| "loss": 0.4356, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.27345292732203896, |
| "learning_rate": 9.904719502473635e-06, |
| "loss": 0.4322, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.2713655768710724, |
| "learning_rate": 9.902901918439658e-06, |
| "loss": 0.4388, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 0.2972506754371077, |
| "learning_rate": 9.901067331519013e-06, |
| "loss": 0.4275, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4752, |
| "grad_norm": 0.27424228672400885, |
| "learning_rate": 9.899215748073906e-06, |
| "loss": 0.4332, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.27332449868754216, |
| "learning_rate": 9.897347174525487e-06, |
| "loss": 0.4058, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.4784, |
| "grad_norm": 0.2543725013946674, |
| "learning_rate": 9.895461617353823e-06, |
| "loss": 0.4111, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.2646116402889093, |
| "learning_rate": 9.893559083097885e-06, |
| "loss": 0.4334, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4816, |
| "grad_norm": 0.25742572175100914, |
| "learning_rate": 9.891639578355511e-06, |
| "loss": 0.44, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.31423878273511435, |
| "learning_rate": 9.8897031097834e-06, |
| "loss": 0.3995, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4848, |
| "grad_norm": 0.2805923243045686, |
| "learning_rate": 9.887749684097072e-06, |
| "loss": 0.426, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.24992932119026864, |
| "learning_rate": 9.88577930807086e-06, |
| "loss": 0.3947, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.2550143536933856, |
| "learning_rate": 9.883791988537874e-06, |
| "loss": 0.4158, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 0.27435946520841087, |
| "learning_rate": 9.881787732389987e-06, |
| "loss": 0.4188, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4912, |
| "grad_norm": 0.2705794622776453, |
| "learning_rate": 9.879766546577805e-06, |
| "loss": 0.4212, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 0.2852300872953745, |
| "learning_rate": 9.877728438110645e-06, |
| "loss": 0.4142, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4944, |
| "grad_norm": 0.24895797808543246, |
| "learning_rate": 9.87567341405651e-06, |
| "loss": 0.3918, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.2775145205697356, |
| "learning_rate": 9.873601481542065e-06, |
| "loss": 0.4198, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4976, |
| "grad_norm": 0.2470372346024297, |
| "learning_rate": 9.871512647752612e-06, |
| "loss": 0.3928, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 0.26233635910080166, |
| "learning_rate": 9.86940691993207e-06, |
| "loss": 0.4089, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5008, |
| "grad_norm": 0.26519333486860885, |
| "learning_rate": 9.867284305382936e-06, |
| "loss": 0.4522, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.26526188559044256, |
| "learning_rate": 9.865144811466275e-06, |
| "loss": 0.4229, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.27408627530343427, |
| "learning_rate": 9.86298844560169e-06, |
| "loss": 0.435, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.2535584200640407, |
| "learning_rate": 9.860815215267288e-06, |
| "loss": 0.4041, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5072, |
| "grad_norm": 0.2743044120106905, |
| "learning_rate": 9.858625127999668e-06, |
| "loss": 0.4263, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.2610615583729268, |
| "learning_rate": 9.856418191393881e-06, |
| "loss": 0.4093, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5104, |
| "grad_norm": 0.26438138612697265, |
| "learning_rate": 9.854194413103418e-06, |
| "loss": 0.4292, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.273602828627136, |
| "learning_rate": 9.851953800840166e-06, |
| "loss": 0.4321, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5136, |
| "grad_norm": 0.24649148317677128, |
| "learning_rate": 9.849696362374399e-06, |
| "loss": 0.3966, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 0.2433943623052224, |
| "learning_rate": 9.847422105534739e-06, |
| "loss": 0.4273, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5168, |
| "grad_norm": 0.2604679160640333, |
| "learning_rate": 9.845131038208135e-06, |
| "loss": 0.4042, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.7634501751681075, |
| "learning_rate": 9.84282316833983e-06, |
| "loss": 0.4146, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.2620524687831515, |
| "learning_rate": 9.84049850393334e-06, |
| "loss": 0.434, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.2583696659150591, |
| "learning_rate": 9.838157053050423e-06, |
| "loss": 0.4309, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5232, |
| "grad_norm": 0.2577780941557524, |
| "learning_rate": 9.83579882381105e-06, |
| "loss": 0.4287, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 0.24058187071429757, |
| "learning_rate": 9.83342382439338e-06, |
| "loss": 0.3921, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5264, |
| "grad_norm": 0.2552387026212205, |
| "learning_rate": 9.831032063033726e-06, |
| "loss": 0.416, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.2552897731502488, |
| "learning_rate": 9.828623548026533e-06, |
| "loss": 0.4311, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5296, |
| "grad_norm": 0.22942217648980823, |
| "learning_rate": 9.826198287724346e-06, |
| "loss": 0.3975, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.2549464588194328, |
| "learning_rate": 9.823756290537783e-06, |
| "loss": 0.4095, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5328, |
| "grad_norm": 0.24500978961666606, |
| "learning_rate": 9.821297564935499e-06, |
| "loss": 0.4153, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.2578965372601706, |
| "learning_rate": 9.81882211944417e-06, |
| "loss": 0.4166, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.26750854907779215, |
| "learning_rate": 9.816329962648444e-06, |
| "loss": 0.4517, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 0.2494374014343831, |
| "learning_rate": 9.813821103190932e-06, |
| "loss": 0.4238, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5392, |
| "grad_norm": 0.25335777193765024, |
| "learning_rate": 9.811295549772169e-06, |
| "loss": 0.4222, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.25810264393423754, |
| "learning_rate": 9.808753311150575e-06, |
| "loss": 0.4096, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5424, |
| "grad_norm": 0.27751681673218015, |
| "learning_rate": 9.80619439614244e-06, |
| "loss": 0.4273, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.2349942470045688, |
| "learning_rate": 9.803618813621885e-06, |
| "loss": 0.395, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5456, |
| "grad_norm": 0.2549403282621449, |
| "learning_rate": 9.801026572520832e-06, |
| "loss": 0.4242, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 0.23212440548205485, |
| "learning_rate": 9.798417681828972e-06, |
| "loss": 0.3979, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5488, |
| "grad_norm": 0.25053489468522966, |
| "learning_rate": 9.795792150593739e-06, |
| "loss": 0.4272, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.23096167443216806, |
| "learning_rate": 9.793149987920273e-06, |
| "loss": 0.4173, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.24503803073896976, |
| "learning_rate": 9.79049120297139e-06, |
| "loss": 0.4016, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.24704700973410243, |
| "learning_rate": 9.787815804967554e-06, |
| "loss": 0.4067, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5552, |
| "grad_norm": 0.2547541279449318, |
| "learning_rate": 9.785123803186834e-06, |
| "loss": 0.4588, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 0.23881790479528361, |
| "learning_rate": 9.782415206964892e-06, |
| "loss": 0.4208, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5584, |
| "grad_norm": 0.23953308997266623, |
| "learning_rate": 9.779690025694926e-06, |
| "loss": 0.4334, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.23590588442749005, |
| "learning_rate": 9.776948268827658e-06, |
| "loss": 0.4276, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5616, |
| "grad_norm": 0.24019258359989243, |
| "learning_rate": 9.77418994587129e-06, |
| "loss": 0.4133, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 0.24959277328520646, |
| "learning_rate": 9.771415066391473e-06, |
| "loss": 0.3873, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5648, |
| "grad_norm": 0.2687821078534511, |
| "learning_rate": 9.768623640011272e-06, |
| "loss": 0.4364, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 0.23443352179113627, |
| "learning_rate": 9.765815676411145e-06, |
| "loss": 0.3999, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.2395325277995752, |
| "learning_rate": 9.762991185328891e-06, |
| "loss": 0.399, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 0.2474928263232878, |
| "learning_rate": 9.760150176559627e-06, |
| "loss": 0.4193, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5712, |
| "grad_norm": 0.23546985707021315, |
| "learning_rate": 9.757292659955755e-06, |
| "loss": 0.4094, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.2828846704947998, |
| "learning_rate": 9.754418645426919e-06, |
| "loss": 0.4261, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5744, |
| "grad_norm": 0.2207733845598482, |
| "learning_rate": 9.751528142939986e-06, |
| "loss": 0.3868, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.2628417557201298, |
| "learning_rate": 9.74862116251899e-06, |
| "loss": 0.4176, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5776, |
| "grad_norm": 0.2528530970184287, |
| "learning_rate": 9.74569771424512e-06, |
| "loss": 0.4531, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.2553590919529084, |
| "learning_rate": 9.742757808256667e-06, |
| "loss": 0.4108, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5808, |
| "grad_norm": 0.26548592755577416, |
| "learning_rate": 9.739801454749e-06, |
| "loss": 0.4291, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 0.23394208640104958, |
| "learning_rate": 9.736828663974527e-06, |
| "loss": 0.4022, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.23968283233843202, |
| "learning_rate": 9.733839446242655e-06, |
| "loss": 0.4055, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.24249295128235912, |
| "learning_rate": 9.730833811919763e-06, |
| "loss": 0.4188, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5872, |
| "grad_norm": 0.23172247667846696, |
| "learning_rate": 9.727811771429158e-06, |
| "loss": 0.3944, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.25162769583845757, |
| "learning_rate": 9.724773335251046e-06, |
| "loss": 0.4306, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5904, |
| "grad_norm": 0.26740547073862814, |
| "learning_rate": 9.721718513922488e-06, |
| "loss": 0.3897, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.24626125802541626, |
| "learning_rate": 9.71864731803737e-06, |
| "loss": 0.3781, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5936, |
| "grad_norm": 0.37108593281897295, |
| "learning_rate": 9.715559758246363e-06, |
| "loss": 0.4041, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 0.22564877989448434, |
| "learning_rate": 9.712455845256888e-06, |
| "loss": 0.3904, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5968, |
| "grad_norm": 0.24093350504732436, |
| "learning_rate": 9.709335589833076e-06, |
| "loss": 0.4064, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.24207734809875767, |
| "learning_rate": 9.70619900279573e-06, |
| "loss": 0.4291, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.24484583824948708, |
| "learning_rate": 9.703046095022297e-06, |
| "loss": 0.4081, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 0.3215013054573237, |
| "learning_rate": 9.699876877446815e-06, |
| "loss": 0.4508, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6032, |
| "grad_norm": 0.2404708752405106, |
| "learning_rate": 9.696691361059886e-06, |
| "loss": 0.3916, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 0.25229353450088776, |
| "learning_rate": 9.693489556908641e-06, |
| "loss": 0.4066, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6064, |
| "grad_norm": 0.24344139574233004, |
| "learning_rate": 9.690271476096686e-06, |
| "loss": 0.429, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.24874058906953497, |
| "learning_rate": 9.68703712978408e-06, |
| "loss": 0.3955, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6096, |
| "grad_norm": 0.23326814408435695, |
| "learning_rate": 9.683786529187287e-06, |
| "loss": 0.3994, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.2594186242625738, |
| "learning_rate": 9.680519685579137e-06, |
| "loss": 0.4019, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6128, |
| "grad_norm": 0.24873442307493876, |
| "learning_rate": 9.677236610288797e-06, |
| "loss": 0.4057, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 0.26704130268838505, |
| "learning_rate": 9.673937314701714e-06, |
| "loss": 0.4536, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.23554030004907517, |
| "learning_rate": 9.670621810259596e-06, |
| "loss": 0.4033, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 0.26968474668823555, |
| "learning_rate": 9.667290108460354e-06, |
| "loss": 0.4206, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6192, |
| "grad_norm": 0.24885328646903404, |
| "learning_rate": 9.663942220858075e-06, |
| "loss": 0.3969, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 0.244180389981599, |
| "learning_rate": 9.660578159062977e-06, |
| "loss": 0.4242, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6224, |
| "grad_norm": 0.23834284699363076, |
| "learning_rate": 9.657197934741366e-06, |
| "loss": 0.398, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.25708628814194484, |
| "learning_rate": 9.6538015596156e-06, |
| "loss": 0.4111, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6256, |
| "grad_norm": 0.24625296227291354, |
| "learning_rate": 9.650389045464046e-06, |
| "loss": 0.4104, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.25393028266817624, |
| "learning_rate": 9.646960404121042e-06, |
| "loss": 0.4128, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6288, |
| "grad_norm": 0.2548061080792936, |
| "learning_rate": 9.643515647476851e-06, |
| "loss": 0.4047, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.2420266948866382, |
| "learning_rate": 9.640054787477626e-06, |
| "loss": 0.4048, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.24786139476437924, |
| "learning_rate": 9.63657783612536e-06, |
| "loss": 0.4068, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.23649061547693379, |
| "learning_rate": 9.633084805477857e-06, |
| "loss": 0.3903, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6352, |
| "grad_norm": 0.25991189798352177, |
| "learning_rate": 9.629575707648675e-06, |
| "loss": 0.4087, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.24768551487811422, |
| "learning_rate": 9.626050554807096e-06, |
| "loss": 0.4422, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6384, |
| "grad_norm": 0.24946708335590428, |
| "learning_rate": 9.62250935917808e-06, |
| "loss": 0.4196, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.27890283133930316, |
| "learning_rate": 9.618952133042223e-06, |
| "loss": 0.3884, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6416, |
| "grad_norm": 0.23605075284527735, |
| "learning_rate": 9.615378888735706e-06, |
| "loss": 0.401, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 0.2533503191446513, |
| "learning_rate": 9.611789638650269e-06, |
| "loss": 0.4267, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6448, |
| "grad_norm": 0.24271588021534124, |
| "learning_rate": 9.608184395233156e-06, |
| "loss": 0.402, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.2579781179952206, |
| "learning_rate": 9.604563170987072e-06, |
| "loss": 0.4057, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.2394357811306976, |
| "learning_rate": 9.600925978470143e-06, |
| "loss": 0.4472, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.24839805117234262, |
| "learning_rate": 9.597272830295877e-06, |
| "loss": 0.4206, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6512, |
| "grad_norm": 0.2424825405473693, |
| "learning_rate": 9.593603739133105e-06, |
| "loss": 0.4031, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.24463845088245798, |
| "learning_rate": 9.589918717705957e-06, |
| "loss": 0.4264, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6544, |
| "grad_norm": 0.22769688146873315, |
| "learning_rate": 9.586217778793804e-06, |
| "loss": 0.3793, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.2360410211040705, |
| "learning_rate": 9.582500935231215e-06, |
| "loss": 0.3942, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6576, |
| "grad_norm": 0.24588742952193718, |
| "learning_rate": 9.57876819990792e-06, |
| "loss": 0.4085, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.24390152748798177, |
| "learning_rate": 9.575019585768758e-06, |
| "loss": 0.4191, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6608, |
| "grad_norm": 0.2500206757295772, |
| "learning_rate": 9.571255105813632e-06, |
| "loss": 0.4058, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.24755737513943982, |
| "learning_rate": 9.567474773097469e-06, |
| "loss": 0.4216, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.23713830329952726, |
| "learning_rate": 9.563678600730175e-06, |
| "loss": 0.3976, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.23586602642387916, |
| "learning_rate": 9.559866601876581e-06, |
| "loss": 0.4094, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6672, |
| "grad_norm": 0.26408281276843976, |
| "learning_rate": 9.556038789756407e-06, |
| "loss": 0.4063, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 0.24219260926914504, |
| "learning_rate": 9.55219517764421e-06, |
| "loss": 0.4028, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6704, |
| "grad_norm": 0.2622583949132118, |
| "learning_rate": 9.548335778869342e-06, |
| "loss": 0.4208, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.23241983555924844, |
| "learning_rate": 9.544460606815901e-06, |
| "loss": 0.3875, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6736, |
| "grad_norm": 0.252480064714981, |
| "learning_rate": 9.540569674922685e-06, |
| "loss": 0.4145, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.3355387667956865, |
| "learning_rate": 9.536662996683146e-06, |
| "loss": 0.3847, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6768, |
| "grad_norm": 0.24692781636480424, |
| "learning_rate": 9.532740585645346e-06, |
| "loss": 0.4213, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.26087547993874427, |
| "learning_rate": 9.528802455411902e-06, |
| "loss": 0.4593, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.23515334284767903, |
| "learning_rate": 9.52484861963995e-06, |
| "loss": 0.4035, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 0.23762584949634818, |
| "learning_rate": 9.520879092041085e-06, |
| "loss": 0.4005, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6832, |
| "grad_norm": 0.25279354452080266, |
| "learning_rate": 9.516893886381324e-06, |
| "loss": 0.438, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 0.23480471995075008, |
| "learning_rate": 9.512893016481053e-06, |
| "loss": 0.3856, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6864, |
| "grad_norm": 0.2527751166813315, |
| "learning_rate": 9.508876496214983e-06, |
| "loss": 0.4571, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.24128323772807392, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.418, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6896, |
| "grad_norm": 0.23567373236863423, |
| "learning_rate": 9.500796560355603e-06, |
| "loss": 0.4049, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 0.24335646562712338, |
| "learning_rate": 9.496733172782889e-06, |
| "loss": 0.3863, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6928, |
| "grad_norm": 0.2424231472857203, |
| "learning_rate": 9.492654190885469e-06, |
| "loss": 0.3949, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.2518376330341733, |
| "learning_rate": 9.488559628808939e-06, |
| "loss": 0.4196, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.2363621548764173, |
| "learning_rate": 9.484449500752927e-06, |
| "loss": 0.4032, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.23200593154980625, |
| "learning_rate": 9.480323820971039e-06, |
| "loss": 0.4157, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6992, |
| "grad_norm": 0.23621780025226854, |
| "learning_rate": 9.476182603770814e-06, |
| "loss": 0.388, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.2701586746408444, |
| "learning_rate": 9.472025863513676e-06, |
| "loss": 0.3994, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7024, |
| "grad_norm": 0.23650750009514723, |
| "learning_rate": 9.467853614614883e-06, |
| "loss": 0.4053, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.25259614744142017, |
| "learning_rate": 9.46366587154347e-06, |
| "loss": 0.4108, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7056, |
| "grad_norm": 0.2249562183301669, |
| "learning_rate": 9.459462648822209e-06, |
| "loss": 0.3891, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.26130155894795554, |
| "learning_rate": 9.45524396102755e-06, |
| "loss": 0.4477, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7088, |
| "grad_norm": 0.23834779340468057, |
| "learning_rate": 9.451009822789583e-06, |
| "loss": 0.3876, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 0.27932069630775286, |
| "learning_rate": 9.44676024879197e-06, |
| "loss": 0.4189, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.2611663386145865, |
| "learning_rate": 9.442495253771909e-06, |
| "loss": 0.4476, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.29247043941624573, |
| "learning_rate": 9.438214852520073e-06, |
| "loss": 0.4152, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7152, |
| "grad_norm": 0.23494049103942932, |
| "learning_rate": 9.433919059880564e-06, |
| "loss": 0.4083, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 0.24952743892453258, |
| "learning_rate": 9.429607890750863e-06, |
| "loss": 0.4044, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7184, |
| "grad_norm": 0.2519535072786672, |
| "learning_rate": 9.425281360081769e-06, |
| "loss": 0.4149, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.266817450931765, |
| "learning_rate": 9.420939482877359e-06, |
| "loss": 0.4305, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7216, |
| "grad_norm": 0.27115856924921494, |
| "learning_rate": 9.416582274194929e-06, |
| "loss": 0.4503, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.23382210558345803, |
| "learning_rate": 9.412209749144947e-06, |
| "loss": 0.3929, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7248, |
| "grad_norm": 0.23021848560398114, |
| "learning_rate": 9.40782192289099e-06, |
| "loss": 0.3909, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 0.5127374552501491, |
| "learning_rate": 9.4034188106497e-06, |
| "loss": 0.4443, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.22587428439728058, |
| "learning_rate": 9.399000427690736e-06, |
| "loss": 0.4032, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.24588733373553853, |
| "learning_rate": 9.394566789336707e-06, |
| "loss": 0.3928, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7312, |
| "grad_norm": 0.24916911686760326, |
| "learning_rate": 9.390117910963132e-06, |
| "loss": 0.4171, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.27406750479940084, |
| "learning_rate": 9.385653807998376e-06, |
| "loss": 0.4302, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7344, |
| "grad_norm": 0.2471790183630099, |
| "learning_rate": 9.381174495923608e-06, |
| "loss": 0.411, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.23407210943978232, |
| "learning_rate": 9.376679990272736e-06, |
| "loss": 0.4001, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7376, |
| "grad_norm": 0.24411212762974027, |
| "learning_rate": 9.37217030663236e-06, |
| "loss": 0.4181, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.24779600768963947, |
| "learning_rate": 9.367645460641716e-06, |
| "loss": 0.4183, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7408, |
| "grad_norm": 0.23829375861072322, |
| "learning_rate": 9.36310546799262e-06, |
| "loss": 0.4163, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 0.2364441645933766, |
| "learning_rate": 9.358550344429421e-06, |
| "loss": 0.4005, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.2454398007123733, |
| "learning_rate": 9.353980105748934e-06, |
| "loss": 0.4446, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.24112755357085913, |
| "learning_rate": 9.349394767800397e-06, |
| "loss": 0.4129, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7472, |
| "grad_norm": 0.23884592293927956, |
| "learning_rate": 9.344794346485408e-06, |
| "loss": 0.4394, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 0.27243110372812157, |
| "learning_rate": 9.340178857757876e-06, |
| "loss": 0.429, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7504, |
| "grad_norm": 0.2647143894376016, |
| "learning_rate": 9.335548317623957e-06, |
| "loss": 0.4547, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.24896424647762036, |
| "learning_rate": 9.330902742142013e-06, |
| "loss": 0.4176, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7536, |
| "grad_norm": 0.23541784981253153, |
| "learning_rate": 9.326242147422538e-06, |
| "loss": 0.3869, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 0.3013037013522296, |
| "learning_rate": 9.321566549628118e-06, |
| "loss": 0.404, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7568, |
| "grad_norm": 0.23664923519055645, |
| "learning_rate": 9.316875964973366e-06, |
| "loss": 0.3946, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.24771978833942737, |
| "learning_rate": 9.31217040972487e-06, |
| "loss": 0.4205, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.22641384060935327, |
| "learning_rate": 9.307449900201132e-06, |
| "loss": 0.4141, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 0.23931400418527723, |
| "learning_rate": 9.302714452772515e-06, |
| "loss": 0.4086, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7632, |
| "grad_norm": 0.22814256259937488, |
| "learning_rate": 9.29796408386119e-06, |
| "loss": 0.4014, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.25025024308751254, |
| "learning_rate": 9.293198809941067e-06, |
| "loss": 0.4235, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7664, |
| "grad_norm": 0.23296486397762242, |
| "learning_rate": 9.288418647537752e-06, |
| "loss": 0.4168, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.24186035674860898, |
| "learning_rate": 9.283623613228479e-06, |
| "loss": 0.4089, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7696, |
| "grad_norm": 0.24405655566173448, |
| "learning_rate": 9.27881372364206e-06, |
| "loss": 0.4173, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.22770661204996145, |
| "learning_rate": 9.27398899545882e-06, |
| "loss": 0.3886, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7728, |
| "grad_norm": 0.25313957389681824, |
| "learning_rate": 9.269149445410545e-06, |
| "loss": 0.4372, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.2268287592983455, |
| "learning_rate": 9.264295090280424e-06, |
| "loss": 0.4086, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.25646407988966896, |
| "learning_rate": 9.259425946902987e-06, |
| "loss": 0.4085, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 0.24329905740284236, |
| "learning_rate": 9.254542032164047e-06, |
| "loss": 0.4101, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7792, |
| "grad_norm": 0.3142551616018468, |
| "learning_rate": 9.249643363000645e-06, |
| "loss": 0.3949, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.24387609226845128, |
| "learning_rate": 9.24472995640099e-06, |
| "loss": 0.3934, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7824, |
| "grad_norm": 0.2580441886544725, |
| "learning_rate": 9.239801829404396e-06, |
| "loss": 0.387, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.2573459560882049, |
| "learning_rate": 9.234858999101232e-06, |
| "loss": 0.3978, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7856, |
| "grad_norm": 0.2617093127244288, |
| "learning_rate": 9.22990148263285e-06, |
| "loss": 0.4298, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 0.261610499848751, |
| "learning_rate": 9.224929297191536e-06, |
| "loss": 0.4038, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7888, |
| "grad_norm": 0.23996402976214165, |
| "learning_rate": 9.219942460020447e-06, |
| "loss": 0.3719, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 0.25000394307558105, |
| "learning_rate": 9.214940988413552e-06, |
| "loss": 0.4264, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.23975518284045635, |
| "learning_rate": 9.20992489971557e-06, |
| "loss": 0.4116, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.2637434764454566, |
| "learning_rate": 9.204894211321906e-06, |
| "loss": 0.4204, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7952, |
| "grad_norm": 0.25445793585742843, |
| "learning_rate": 9.199848940678607e-06, |
| "loss": 0.395, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.2391464112976764, |
| "learning_rate": 9.194789105282277e-06, |
| "loss": 0.4094, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7984, |
| "grad_norm": 0.2511630149266665, |
| "learning_rate": 9.189714722680041e-06, |
| "loss": 0.4026, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.22636947294322163, |
| "learning_rate": 9.184625810469468e-06, |
| "loss": 0.4007, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8016, |
| "grad_norm": 0.2351956635652042, |
| "learning_rate": 9.179522386298508e-06, |
| "loss": 0.388, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.2520278541351905, |
| "learning_rate": 9.174404467865447e-06, |
| "loss": 0.4127, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8048, |
| "grad_norm": 0.24150883882324126, |
| "learning_rate": 9.169272072918834e-06, |
| "loss": 0.4135, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.3533318958903199, |
| "learning_rate": 9.164125219257419e-06, |
| "loss": 0.3896, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.2409710587954575, |
| "learning_rate": 9.158963924730092e-06, |
| "loss": 0.4188, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.2397048612506856, |
| "learning_rate": 9.153788207235827e-06, |
| "loss": 0.3941, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8112, |
| "grad_norm": 0.24460257620237383, |
| "learning_rate": 9.148598084723615e-06, |
| "loss": 0.3845, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 0.26393012185502734, |
| "learning_rate": 9.143393575192402e-06, |
| "loss": 0.4137, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8144, |
| "grad_norm": 0.24538146526455115, |
| "learning_rate": 9.138174696691025e-06, |
| "loss": 0.3836, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.24983923014892706, |
| "learning_rate": 9.132941467318152e-06, |
| "loss": 0.416, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8176, |
| "grad_norm": 0.2883856466824079, |
| "learning_rate": 9.127693905222223e-06, |
| "loss": 0.4163, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 0.24167156602777856, |
| "learning_rate": 9.122432028601377e-06, |
| "loss": 0.427, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8208, |
| "grad_norm": 0.2721892332119354, |
| "learning_rate": 9.1171558557034e-06, |
| "loss": 0.4099, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.2681466205421472, |
| "learning_rate": 9.111865404825652e-06, |
| "loss": 0.4143, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.253088745126355, |
| "learning_rate": 9.10656069431501e-06, |
| "loss": 0.418, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.23793295337988632, |
| "learning_rate": 9.101241742567802e-06, |
| "loss": 0.3837, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8272, |
| "grad_norm": 0.24524641552840834, |
| "learning_rate": 9.095908568029741e-06, |
| "loss": 0.4365, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.26614604939479614, |
| "learning_rate": 9.09056118919587e-06, |
| "loss": 0.4074, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8304, |
| "grad_norm": 0.2472867990647052, |
| "learning_rate": 9.085199624610486e-06, |
| "loss": 0.4226, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.23868807651194116, |
| "learning_rate": 9.079823892867083e-06, |
| "loss": 0.4066, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8336, |
| "grad_norm": 0.25878915691768595, |
| "learning_rate": 9.074434012608282e-06, |
| "loss": 0.4251, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.2495152527158786, |
| "learning_rate": 9.069030002525777e-06, |
| "loss": 0.4085, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8368, |
| "grad_norm": 0.27326125251612854, |
| "learning_rate": 9.063611881360258e-06, |
| "loss": 0.401, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 0.24301893610908135, |
| "learning_rate": 9.05817966790135e-06, |
| "loss": 0.4101, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.24074133542582551, |
| "learning_rate": 9.052733380987555e-06, |
| "loss": 0.3978, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.2539918099066782, |
| "learning_rate": 9.047273039506174e-06, |
| "loss": 0.4065, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8432, |
| "grad_norm": 0.24872928347956014, |
| "learning_rate": 9.041798662393255e-06, |
| "loss": 0.4181, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 0.26576211519986476, |
| "learning_rate": 9.036310268633515e-06, |
| "loss": 0.4342, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8464, |
| "grad_norm": 0.24371177971968497, |
| "learning_rate": 9.030807877260278e-06, |
| "loss": 0.4076, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.2786966773171972, |
| "learning_rate": 9.025291507355419e-06, |
| "loss": 0.4388, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8496, |
| "grad_norm": 0.27593397405499215, |
| "learning_rate": 9.01976117804928e-06, |
| "loss": 0.4514, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.24060873446437048, |
| "learning_rate": 9.014216908520619e-06, |
| "loss": 0.3901, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8528, |
| "grad_norm": 0.2522297432577823, |
| "learning_rate": 9.008658717996538e-06, |
| "loss": 0.4096, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.2610138032164781, |
| "learning_rate": 9.003086625752414e-06, |
| "loss": 0.4316, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.2752622140943053, |
| "learning_rate": 8.997500651111833e-06, |
| "loss": 0.4, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.2589287233882201, |
| "learning_rate": 8.991900813446523e-06, |
| "loss": 0.4309, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8592, |
| "grad_norm": 0.25797512725439886, |
| "learning_rate": 8.986287132176295e-06, |
| "loss": 0.4252, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.26890161549840247, |
| "learning_rate": 8.980659626768961e-06, |
| "loss": 0.418, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8624, |
| "grad_norm": 0.23548789508539825, |
| "learning_rate": 8.975018316740278e-06, |
| "loss": 0.3911, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.25730643723381863, |
| "learning_rate": 8.969363221653875e-06, |
| "loss": 0.4215, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8656, |
| "grad_norm": 0.2494413517858074, |
| "learning_rate": 8.963694361121186e-06, |
| "loss": 0.4233, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.23620259244215902, |
| "learning_rate": 8.958011754801383e-06, |
| "loss": 0.4094, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8688, |
| "grad_norm": 0.2610464121840486, |
| "learning_rate": 8.952315422401307e-06, |
| "loss": 0.419, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.2333917305691583, |
| "learning_rate": 8.946605383675403e-06, |
| "loss": 0.373, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.24544814961631137, |
| "learning_rate": 8.940881658425645e-06, |
| "loss": 0.3925, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 0.2545369565944543, |
| "learning_rate": 8.93514426650147e-06, |
| "loss": 0.4024, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8752, |
| "grad_norm": 0.23672473372754915, |
| "learning_rate": 8.929393227799715e-06, |
| "loss": 0.3904, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 0.2330746946376594, |
| "learning_rate": 8.923628562264536e-06, |
| "loss": 0.3717, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8784, |
| "grad_norm": 0.26087240648338994, |
| "learning_rate": 8.917850289887353e-06, |
| "loss": 0.3977, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.2523301116317474, |
| "learning_rate": 8.91205843070677e-06, |
| "loss": 0.3957, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8816, |
| "grad_norm": 0.2399828866538443, |
| "learning_rate": 8.906253004808506e-06, |
| "loss": 0.408, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.26175825792189905, |
| "learning_rate": 8.900434032325332e-06, |
| "loss": 0.4527, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8848, |
| "grad_norm": 0.22797812496608819, |
| "learning_rate": 8.894601533437e-06, |
| "loss": 0.3743, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.24985308668549885, |
| "learning_rate": 8.888755528370163e-06, |
| "loss": 0.4187, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.22478959733451018, |
| "learning_rate": 8.882896037398322e-06, |
| "loss": 0.3957, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.2274148568632047, |
| "learning_rate": 8.877023080841739e-06, |
| "loss": 0.3739, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8912, |
| "grad_norm": 0.24010503497821398, |
| "learning_rate": 8.871136679067372e-06, |
| "loss": 0.4175, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.21397621286280114, |
| "learning_rate": 8.865236852488813e-06, |
| "loss": 0.3693, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8944, |
| "grad_norm": 0.24213436654140114, |
| "learning_rate": 8.859323621566207e-06, |
| "loss": 0.4259, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.2221007854473944, |
| "learning_rate": 8.853397006806183e-06, |
| "loss": 0.4035, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8976, |
| "grad_norm": 0.24017713178930467, |
| "learning_rate": 8.847457028761783e-06, |
| "loss": 0.4296, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.23203835295180225, |
| "learning_rate": 8.841503708032398e-06, |
| "loss": 0.3994, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9008, |
| "grad_norm": 0.23083503593902732, |
| "learning_rate": 8.835537065263684e-06, |
| "loss": 0.4089, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.25679597002643756, |
| "learning_rate": 8.829557121147499e-06, |
| "loss": 0.3818, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.26988032638116893, |
| "learning_rate": 8.82356389642183e-06, |
| "loss": 0.4159, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 0.22810997338223302, |
| "learning_rate": 8.817557411870717e-06, |
| "loss": 0.4171, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9072, |
| "grad_norm": 0.22573629621613647, |
| "learning_rate": 8.811537688324187e-06, |
| "loss": 0.3853, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.2280386603300284, |
| "learning_rate": 8.805504746658183e-06, |
| "loss": 0.3901, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9104, |
| "grad_norm": 0.23847356544832135, |
| "learning_rate": 8.799458607794476e-06, |
| "loss": 0.4151, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.21569591450929188, |
| "learning_rate": 8.793399292700616e-06, |
| "loss": 0.3985, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9136, |
| "grad_norm": 0.2262230703611567, |
| "learning_rate": 8.787326822389836e-06, |
| "loss": 0.3956, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.22237482204990475, |
| "learning_rate": 8.781241217921e-06, |
| "loss": 0.3745, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9168, |
| "grad_norm": 0.23283271815396875, |
| "learning_rate": 8.775142500398513e-06, |
| "loss": 0.4212, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.25203977383418574, |
| "learning_rate": 8.769030690972262e-06, |
| "loss": 0.4266, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.22711538243055765, |
| "learning_rate": 8.76290581083753e-06, |
| "loss": 0.404, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.20778928511083974, |
| "learning_rate": 8.756767881234928e-06, |
| "loss": 0.349, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9232, |
| "grad_norm": 0.23640856403954574, |
| "learning_rate": 8.750616923450328e-06, |
| "loss": 0.4051, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.2522617735623199, |
| "learning_rate": 8.744452958814775e-06, |
| "loss": 0.4027, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9264, |
| "grad_norm": 0.2342252652199819, |
| "learning_rate": 8.738276008704426e-06, |
| "loss": 0.3973, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.2504199739670626, |
| "learning_rate": 8.732086094540467e-06, |
| "loss": 0.4092, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9296, |
| "grad_norm": 0.24048446146826216, |
| "learning_rate": 8.725883237789046e-06, |
| "loss": 0.4265, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.23120555626030487, |
| "learning_rate": 8.719667459961191e-06, |
| "loss": 0.3945, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9328, |
| "grad_norm": 0.24704700377859024, |
| "learning_rate": 8.713438782612743e-06, |
| "loss": 0.4026, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 0.22625929022642643, |
| "learning_rate": 8.707197227344275e-06, |
| "loss": 0.4018, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.23098764435915073, |
| "learning_rate": 8.700942815801023e-06, |
| "loss": 0.3957, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 0.250636921873658, |
| "learning_rate": 8.6946755696728e-06, |
| "loss": 0.4193, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9392, |
| "grad_norm": 0.23948197618358344, |
| "learning_rate": 8.688395510693939e-06, |
| "loss": 0.4029, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.23159535644454657, |
| "learning_rate": 8.682102660643196e-06, |
| "loss": 0.418, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9424, |
| "grad_norm": 0.2695090299623303, |
| "learning_rate": 8.675797041343696e-06, |
| "loss": 0.4159, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.21256909734442456, |
| "learning_rate": 8.669478674662839e-06, |
| "loss": 0.3794, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9456, |
| "grad_norm": 0.2955820349546049, |
| "learning_rate": 8.663147582512232e-06, |
| "loss": 0.4107, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 0.254294794274739, |
| "learning_rate": 8.65680378684762e-06, |
| "loss": 0.414, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9488, |
| "grad_norm": 0.23052609221817652, |
| "learning_rate": 8.6504473096688e-06, |
| "loss": 0.3822, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 0.24882171005390902, |
| "learning_rate": 8.64407817301954e-06, |
| "loss": 0.435, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.22751899907297213, |
| "learning_rate": 8.637696398987517e-06, |
| "loss": 0.3831, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 0.23742793544048132, |
| "learning_rate": 8.631302009704235e-06, |
| "loss": 0.4164, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9552, |
| "grad_norm": 0.26590599425122596, |
| "learning_rate": 8.624895027344943e-06, |
| "loss": 0.4168, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.23356206892857917, |
| "learning_rate": 8.618475474128563e-06, |
| "loss": 0.3972, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9584, |
| "grad_norm": 0.25747308524387813, |
| "learning_rate": 8.61204337231761e-06, |
| "loss": 0.3956, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.24339006199767227, |
| "learning_rate": 8.605598744218122e-06, |
| "loss": 0.4212, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9616, |
| "grad_norm": 0.23507478504214782, |
| "learning_rate": 8.599141612179572e-06, |
| "loss": 0.421, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 0.22175701534506762, |
| "learning_rate": 8.592671998594794e-06, |
| "loss": 0.382, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9648, |
| "grad_norm": 0.3510453466116925, |
| "learning_rate": 8.586189925899913e-06, |
| "loss": 0.4058, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.23001994852736846, |
| "learning_rate": 8.57969541657426e-06, |
| "loss": 0.3808, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.243100532965684, |
| "learning_rate": 8.57318849314029e-06, |
| "loss": 0.4045, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 0.22752046208029042, |
| "learning_rate": 8.566669178163513e-06, |
| "loss": 0.38, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9712, |
| "grad_norm": 0.22925986705507403, |
| "learning_rate": 8.560137494252416e-06, |
| "loss": 0.403, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.2642303151823748, |
| "learning_rate": 8.553593464058374e-06, |
| "loss": 0.4298, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9744, |
| "grad_norm": 0.2706739011960767, |
| "learning_rate": 8.54703711027558e-06, |
| "loss": 0.4172, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.2401834144740019, |
| "learning_rate": 8.540468455640964e-06, |
| "loss": 0.3874, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9776, |
| "grad_norm": 0.2381019666776129, |
| "learning_rate": 8.533887522934114e-06, |
| "loss": 0.3601, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.24911639753365442, |
| "learning_rate": 8.527294334977201e-06, |
| "loss": 0.4122, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9808, |
| "grad_norm": 0.22981845026993572, |
| "learning_rate": 8.520688914634894e-06, |
| "loss": 0.4011, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 0.24106171685222139, |
| "learning_rate": 8.51407128481428e-06, |
| "loss": 0.3938, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.2219291609844236, |
| "learning_rate": 8.507441468464792e-06, |
| "loss": 0.3777, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 0.24982218930020303, |
| "learning_rate": 8.50079948857812e-06, |
| "loss": 0.4295, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9872, |
| "grad_norm": 0.22917004828027057, |
| "learning_rate": 8.494145368188143e-06, |
| "loss": 0.4159, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 0.24419866532804627, |
| "learning_rate": 8.487479130370838e-06, |
| "loss": 0.4101, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9904, |
| "grad_norm": 0.23564916687155973, |
| "learning_rate": 8.480800798244202e-06, |
| "loss": 0.4193, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.23818371631077295, |
| "learning_rate": 8.47411039496818e-06, |
| "loss": 0.4112, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9936, |
| "grad_norm": 0.24030429080124868, |
| "learning_rate": 8.467407943744574e-06, |
| "loss": 0.4226, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 0.2488519046451105, |
| "learning_rate": 8.460693467816972e-06, |
| "loss": 0.3982, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9968, |
| "grad_norm": 0.22654435305805945, |
| "learning_rate": 8.453966990470656e-06, |
| "loss": 0.396, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 0.23431926041871312, |
| "learning_rate": 8.447228535032536e-06, |
| "loss": 0.417, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.238300447529776, |
| "learning_rate": 8.440478124871054e-06, |
| "loss": 0.4079, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.0016, |
| "grad_norm": 0.22995568577737516, |
| "learning_rate": 8.433715783396115e-06, |
| "loss": 0.3501, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.0032, |
| "grad_norm": 0.23143966955270773, |
| "learning_rate": 8.426941534058999e-06, |
| "loss": 0.3534, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0048, |
| "grad_norm": 0.24075541529151612, |
| "learning_rate": 8.420155400352279e-06, |
| "loss": 0.3923, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.0064, |
| "grad_norm": 0.24231408012204808, |
| "learning_rate": 8.413357405809748e-06, |
| "loss": 0.3543, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 0.2341801523052073, |
| "learning_rate": 8.406547574006326e-06, |
| "loss": 0.3856, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.0096, |
| "grad_norm": 0.2768610799592341, |
| "learning_rate": 8.399725928557985e-06, |
| "loss": 0.3772, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.0112, |
| "grad_norm": 0.2475239485113371, |
| "learning_rate": 8.39289249312167e-06, |
| "loss": 0.3511, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0128, |
| "grad_norm": 0.2504345322211535, |
| "learning_rate": 8.386047291395208e-06, |
| "loss": 0.3876, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.0144, |
| "grad_norm": 0.22399909529317844, |
| "learning_rate": 8.37919034711723e-06, |
| "loss": 0.3575, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 0.2473464199117568, |
| "learning_rate": 8.372321684067092e-06, |
| "loss": 0.3482, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.0176, |
| "grad_norm": 0.25145180884900536, |
| "learning_rate": 8.36544132606479e-06, |
| "loss": 0.3678, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.0192, |
| "grad_norm": 0.23352219346478964, |
| "learning_rate": 8.358549296970877e-06, |
| "loss": 0.3621, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.0208, |
| "grad_norm": 0.241505764688004, |
| "learning_rate": 8.351645620686377e-06, |
| "loss": 0.3813, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.0224, |
| "grad_norm": 0.23651767983964536, |
| "learning_rate": 8.34473032115271e-06, |
| "loss": 0.3587, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.24736796854172843, |
| "learning_rate": 8.337803422351602e-06, |
| "loss": 0.3642, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0256, |
| "grad_norm": 0.2522322279371966, |
| "learning_rate": 8.33086494830501e-06, |
| "loss": 0.3532, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.0272, |
| "grad_norm": 0.2487037584473878, |
| "learning_rate": 8.323914923075018e-06, |
| "loss": 0.367, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.0288, |
| "grad_norm": 0.2559969838353199, |
| "learning_rate": 8.316953370763788e-06, |
| "loss": 0.3516, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.0304, |
| "grad_norm": 0.25870526132286725, |
| "learning_rate": 8.309980315513444e-06, |
| "loss": 0.3609, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 0.24505073946557515, |
| "learning_rate": 8.302995781506007e-06, |
| "loss": 0.3702, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.0336, |
| "grad_norm": 0.2538099641627038, |
| "learning_rate": 8.295999792963301e-06, |
| "loss": 0.3755, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.0352, |
| "grad_norm": 0.25401108701853237, |
| "learning_rate": 8.288992374146878e-06, |
| "loss": 0.3683, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.0368, |
| "grad_norm": 0.24323894462969362, |
| "learning_rate": 8.281973549357927e-06, |
| "loss": 0.3839, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.0384, |
| "grad_norm": 0.22432817591997448, |
| "learning_rate": 8.274943342937191e-06, |
| "loss": 0.3697, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.22791714178424244, |
| "learning_rate": 8.267901779264889e-06, |
| "loss": 0.3553, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0416, |
| "grad_norm": 0.263351889848764, |
| "learning_rate": 8.260848882760616e-06, |
| "loss": 0.3932, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.0432, |
| "grad_norm": 0.23509686262722226, |
| "learning_rate": 8.25378467788328e-06, |
| "loss": 0.3592, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0448, |
| "grad_norm": 0.24087674828747757, |
| "learning_rate": 8.246709189130997e-06, |
| "loss": 0.3879, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.0464, |
| "grad_norm": 0.2223937526638392, |
| "learning_rate": 8.23962244104102e-06, |
| "loss": 0.3479, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 0.24964160783427594, |
| "learning_rate": 8.232524458189644e-06, |
| "loss": 0.3839, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.0496, |
| "grad_norm": 0.2771583617428097, |
| "learning_rate": 8.225415265192126e-06, |
| "loss": 0.3984, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0512, |
| "grad_norm": 0.2327740204649558, |
| "learning_rate": 8.218294886702606e-06, |
| "loss": 0.3634, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.0528, |
| "grad_norm": 0.24982879926241056, |
| "learning_rate": 8.211163347414005e-06, |
| "loss": 0.3664, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0544, |
| "grad_norm": 0.22096913886156352, |
| "learning_rate": 8.20402067205795e-06, |
| "loss": 0.349, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.2398826843134357, |
| "learning_rate": 8.196866885404697e-06, |
| "loss": 0.3617, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.0576, |
| "grad_norm": 0.24683947755121083, |
| "learning_rate": 8.18970201226302e-06, |
| "loss": 0.3785, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.0592, |
| "grad_norm": 0.22545047205371002, |
| "learning_rate": 8.182526077480153e-06, |
| "loss": 0.3508, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.0608, |
| "grad_norm": 0.24877276262993037, |
| "learning_rate": 8.175339105941685e-06, |
| "loss": 0.3666, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.0624, |
| "grad_norm": 0.2449623136136464, |
| "learning_rate": 8.168141122571478e-06, |
| "loss": 0.3833, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.23573718231725757, |
| "learning_rate": 8.160932152331587e-06, |
| "loss": 0.3558, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.0656, |
| "grad_norm": 0.2466692026744667, |
| "learning_rate": 8.153712220222163e-06, |
| "loss": 0.395, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.0672, |
| "grad_norm": 0.24159553832801883, |
| "learning_rate": 8.14648135128138e-06, |
| "loss": 0.3642, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.0688, |
| "grad_norm": 0.24712330344277622, |
| "learning_rate": 8.139239570585334e-06, |
| "loss": 0.3668, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.0704, |
| "grad_norm": 0.2343891457912322, |
| "learning_rate": 8.131986903247959e-06, |
| "loss": 0.3641, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.2630926407120644, |
| "learning_rate": 8.124723374420951e-06, |
| "loss": 0.372, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0735999999999999, |
| "grad_norm": 0.24855905944217216, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.3976, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.0752, |
| "grad_norm": 0.22886545557355706, |
| "learning_rate": 8.11016383309305e-06, |
| "loss": 0.3442, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.0768, |
| "grad_norm": 0.2796573424817469, |
| "learning_rate": 8.102867871083528e-06, |
| "loss": 0.3546, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.0784, |
| "grad_norm": 0.24404418001269088, |
| "learning_rate": 8.095561148566932e-06, |
| "loss": 0.3725, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.2374704825705093, |
| "learning_rate": 8.088243690882421e-06, |
| "loss": 0.365, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.0816, |
| "grad_norm": 0.2426085044606215, |
| "learning_rate": 8.080915523406371e-06, |
| "loss": 0.37, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.0832, |
| "grad_norm": 0.2299282447450723, |
| "learning_rate": 8.073576671552303e-06, |
| "loss": 0.3602, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.0848, |
| "grad_norm": 0.24010382043217865, |
| "learning_rate": 8.06622716077079e-06, |
| "loss": 0.3668, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.0864, |
| "grad_norm": 0.25831230203127714, |
| "learning_rate": 8.058867016549372e-06, |
| "loss": 0.3949, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.25004895014388584, |
| "learning_rate": 8.051496264412464e-06, |
| "loss": 0.3711, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0896, |
| "grad_norm": 0.2382703141729499, |
| "learning_rate": 8.044114929921264e-06, |
| "loss": 0.3661, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.0912, |
| "grad_norm": 0.23255354693120406, |
| "learning_rate": 8.036723038673675e-06, |
| "loss": 0.3644, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0928, |
| "grad_norm": 0.24402958206027864, |
| "learning_rate": 8.029320616304204e-06, |
| "loss": 0.3621, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.0944, |
| "grad_norm": 0.22073217042257495, |
| "learning_rate": 8.021907688483885e-06, |
| "loss": 0.3587, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 0.26874831387802883, |
| "learning_rate": 8.01448428092018e-06, |
| "loss": 0.3797, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.0976, |
| "grad_norm": 0.24152400254894602, |
| "learning_rate": 8.007050419356898e-06, |
| "loss": 0.3726, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0992, |
| "grad_norm": 0.2101694303730928, |
| "learning_rate": 7.999606129574096e-06, |
| "loss": 0.3337, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.1008, |
| "grad_norm": 0.25386678512314076, |
| "learning_rate": 7.992151437387999e-06, |
| "loss": 0.3755, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1024, |
| "grad_norm": 0.23062685716015766, |
| "learning_rate": 7.984686368650907e-06, |
| "loss": 0.3425, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 0.24074961719484028, |
| "learning_rate": 7.977210949251102e-06, |
| "loss": 0.385, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1056, |
| "grad_norm": 0.2606958730741747, |
| "learning_rate": 7.969725205112766e-06, |
| "loss": 0.3668, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.1072, |
| "grad_norm": 0.27675712764515004, |
| "learning_rate": 7.962229162195882e-06, |
| "loss": 0.3632, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.1088, |
| "grad_norm": 0.31162062322950745, |
| "learning_rate": 7.95472284649615e-06, |
| "loss": 0.3611, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.1104, |
| "grad_norm": 0.23649945967876732, |
| "learning_rate": 7.947206284044896e-06, |
| "loss": 0.38, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 0.25680423257501667, |
| "learning_rate": 7.939679500908982e-06, |
| "loss": 0.3852, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.1136, |
| "grad_norm": 0.25203908847654877, |
| "learning_rate": 7.932142523190711e-06, |
| "loss": 0.3696, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.1152, |
| "grad_norm": 0.2291293597838047, |
| "learning_rate": 7.924595377027741e-06, |
| "loss": 0.3602, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.1168, |
| "grad_norm": 0.24181129161156387, |
| "learning_rate": 7.917038088592997e-06, |
| "loss": 0.3577, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.1184, |
| "grad_norm": 0.24300961283860056, |
| "learning_rate": 7.90947068409457e-06, |
| "loss": 0.377, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.22271702256458342, |
| "learning_rate": 7.90189318977564e-06, |
| "loss": 0.3333, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1216, |
| "grad_norm": 0.2270850922164437, |
| "learning_rate": 7.894305631914373e-06, |
| "loss": 0.3516, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.1232, |
| "grad_norm": 0.23028002614941268, |
| "learning_rate": 7.886708036823838e-06, |
| "loss": 0.3511, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.1248, |
| "grad_norm": 0.25322320552023925, |
| "learning_rate": 7.879100430851907e-06, |
| "loss": 0.3625, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.1264, |
| "grad_norm": 0.23115829652893918, |
| "learning_rate": 7.871482840381174e-06, |
| "loss": 0.343, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 0.24161860239532376, |
| "learning_rate": 7.863855291828857e-06, |
| "loss": 0.3839, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.1296, |
| "grad_norm": 0.23314353558820583, |
| "learning_rate": 7.856217811646707e-06, |
| "loss": 0.3527, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.1312, |
| "grad_norm": 0.22650097462750796, |
| "learning_rate": 7.848570426320918e-06, |
| "loss": 0.3549, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.1328, |
| "grad_norm": 0.24778208136418295, |
| "learning_rate": 7.840913162372032e-06, |
| "loss": 0.3585, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.1344, |
| "grad_norm": 0.24513387147011148, |
| "learning_rate": 7.833246046354856e-06, |
| "loss": 0.377, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.27402358926204995, |
| "learning_rate": 7.825569104858353e-06, |
| "loss": 0.3852, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1376, |
| "grad_norm": 0.2325179339266529, |
| "learning_rate": 7.81788236450557e-06, |
| "loss": 0.3706, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.1392, |
| "grad_norm": 0.23151725335406784, |
| "learning_rate": 7.810185851953529e-06, |
| "loss": 0.3808, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.1408, |
| "grad_norm": 0.24023202934137133, |
| "learning_rate": 7.802479593893142e-06, |
| "loss": 0.3652, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.1424, |
| "grad_norm": 0.2284172833941629, |
| "learning_rate": 7.794763617049124e-06, |
| "loss": 0.3574, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 0.22157788840235765, |
| "learning_rate": 7.787037948179884e-06, |
| "loss": 0.3431, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.1456, |
| "grad_norm": 0.22882698734504006, |
| "learning_rate": 7.779302614077449e-06, |
| "loss": 0.3755, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.1472, |
| "grad_norm": 0.22708820291779222, |
| "learning_rate": 7.771557641567363e-06, |
| "loss": 0.366, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.1488, |
| "grad_norm": 0.22621106994715276, |
| "learning_rate": 7.763803057508594e-06, |
| "loss": 0.3652, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1504, |
| "grad_norm": 0.22721276874484447, |
| "learning_rate": 7.756038888793446e-06, |
| "loss": 0.3649, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.2232041276804206, |
| "learning_rate": 7.748265162347455e-06, |
| "loss": 0.3581, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1536, |
| "grad_norm": 0.2309354001449771, |
| "learning_rate": 7.740481905129307e-06, |
| "loss": 0.3815, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.1552, |
| "grad_norm": 0.22026286468065007, |
| "learning_rate": 7.732689144130741e-06, |
| "loss": 0.3315, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.1568, |
| "grad_norm": 0.23507456095191312, |
| "learning_rate": 7.724886906376451e-06, |
| "loss": 0.3627, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.1584, |
| "grad_norm": 0.24402503423922267, |
| "learning_rate": 7.717075218923998e-06, |
| "loss": 0.3755, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.23257043182703363, |
| "learning_rate": 7.709254108863714e-06, |
| "loss": 0.3827, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.1616, |
| "grad_norm": 0.24349835889775898, |
| "learning_rate": 7.701423603318605e-06, |
| "loss": 0.3669, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.1632, |
| "grad_norm": 0.24526217703007208, |
| "learning_rate": 7.693583729444263e-06, |
| "loss": 0.3818, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.1648, |
| "grad_norm": 0.23913074626732142, |
| "learning_rate": 7.685734514428767e-06, |
| "loss": 0.3927, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.1663999999999999, |
| "grad_norm": 0.22469460524304702, |
| "learning_rate": 7.677875985492591e-06, |
| "loss": 0.3565, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.23256521326680651, |
| "learning_rate": 7.67000816988851e-06, |
| "loss": 0.372, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1696, |
| "grad_norm": 0.22288384328523506, |
| "learning_rate": 7.662131094901499e-06, |
| "loss": 0.3385, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.1712, |
| "grad_norm": 0.23187100119114679, |
| "learning_rate": 7.654244787848655e-06, |
| "loss": 0.3685, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.1728, |
| "grad_norm": 0.23929342224440606, |
| "learning_rate": 7.646349276079079e-06, |
| "loss": 0.3751, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.1743999999999999, |
| "grad_norm": 0.23086718961581304, |
| "learning_rate": 7.6384445869738e-06, |
| "loss": 0.3523, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 0.2590684013894752, |
| "learning_rate": 7.630530747945672e-06, |
| "loss": 0.3864, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.1776, |
| "grad_norm": 0.2273990675887813, |
| "learning_rate": 7.622607786439279e-06, |
| "loss": 0.363, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.1792, |
| "grad_norm": 0.2259274401770562, |
| "learning_rate": 7.6146757299308406e-06, |
| "loss": 0.3469, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.1808, |
| "grad_norm": 0.2525186450650305, |
| "learning_rate": 7.606734605928123e-06, |
| "loss": 0.3828, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.1824, |
| "grad_norm": 0.22142903728383576, |
| "learning_rate": 7.598784441970329e-06, |
| "loss": 0.3503, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.22157592217447475, |
| "learning_rate": 7.590825265628019e-06, |
| "loss": 0.3693, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.1856, |
| "grad_norm": 0.22000055738078747, |
| "learning_rate": 7.5828571045030005e-06, |
| "loss": 0.3614, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.1872, |
| "grad_norm": 0.23930133579855614, |
| "learning_rate": 7.574879986228245e-06, |
| "loss": 0.3808, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.1888, |
| "grad_norm": 0.22952446871060206, |
| "learning_rate": 7.566893938467788e-06, |
| "loss": 0.3519, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.1904, |
| "grad_norm": 0.23214837601461755, |
| "learning_rate": 7.558898988916624e-06, |
| "loss": 0.3765, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 0.2348126058133705, |
| "learning_rate": 7.550895165300626e-06, |
| "loss": 0.3643, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.1936, |
| "grad_norm": 0.23600586472915674, |
| "learning_rate": 7.542882495376437e-06, |
| "loss": 0.3658, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.1952, |
| "grad_norm": 0.24444049045676508, |
| "learning_rate": 7.5348610069313795e-06, |
| "loss": 0.3585, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.1968, |
| "grad_norm": 0.2337212945195045, |
| "learning_rate": 7.5268307277833605e-06, |
| "loss": 0.3662, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.1984, |
| "grad_norm": 0.23097648314557714, |
| "learning_rate": 7.518791685780769e-06, |
| "loss": 0.3573, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2538166938385362, |
| "learning_rate": 7.5107439088023845e-06, |
| "loss": 0.3875, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.2016, |
| "grad_norm": 0.2334617553052207, |
| "learning_rate": 7.502687424757278e-06, |
| "loss": 0.3768, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.2032, |
| "grad_norm": 0.24210202115319063, |
| "learning_rate": 7.4946222615847165e-06, |
| "loss": 0.3769, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.2048, |
| "grad_norm": 0.21438308504986922, |
| "learning_rate": 7.486548447254065e-06, |
| "loss": 0.3701, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.2064, |
| "grad_norm": 0.2529795493735992, |
| "learning_rate": 7.478466009764692e-06, |
| "loss": 0.3834, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 0.2314903415596924, |
| "learning_rate": 7.470374977145867e-06, |
| "loss": 0.3492, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.2096, |
| "grad_norm": 0.22934826911506342, |
| "learning_rate": 7.462275377456671e-06, |
| "loss": 0.3541, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.2112, |
| "grad_norm": 0.24204618724381194, |
| "learning_rate": 7.4541672387858895e-06, |
| "loss": 0.3509, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.2128, |
| "grad_norm": 0.23731177200006567, |
| "learning_rate": 7.446050589251928e-06, |
| "loss": 0.3467, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.2144, |
| "grad_norm": 0.2252929094883662, |
| "learning_rate": 7.437925457002697e-06, |
| "loss": 0.3301, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.2921849691515967, |
| "learning_rate": 7.429791870215535e-06, |
| "loss": 0.3739, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.2176, |
| "grad_norm": 0.23217998334047332, |
| "learning_rate": 7.421649857097092e-06, |
| "loss": 0.3653, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.2192, |
| "grad_norm": 0.2552557384152627, |
| "learning_rate": 7.413499445883245e-06, |
| "loss": 0.3777, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.2208, |
| "grad_norm": 0.2321135603078171, |
| "learning_rate": 7.405340664838994e-06, |
| "loss": 0.3606, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.2224, |
| "grad_norm": 0.2360936661565432, |
| "learning_rate": 7.39717354225836e-06, |
| "loss": 0.3686, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 0.23726213652977857, |
| "learning_rate": 7.3889981064643e-06, |
| "loss": 0.341, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.2256, |
| "grad_norm": 0.2430392951389663, |
| "learning_rate": 7.380814385808594e-06, |
| "loss": 0.3337, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.2272, |
| "grad_norm": 0.2379745519630755, |
| "learning_rate": 7.372622408671757e-06, |
| "loss": 0.3671, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.2288000000000001, |
| "grad_norm": 0.24180232015401681, |
| "learning_rate": 7.364422203462935e-06, |
| "loss": 0.3698, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.2304, |
| "grad_norm": 0.23971627747555865, |
| "learning_rate": 7.3562137986198065e-06, |
| "loss": 0.3739, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.22310752108681453, |
| "learning_rate": 7.3479972226084925e-06, |
| "loss": 0.3739, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.2336, |
| "grad_norm": 0.225533904812999, |
| "learning_rate": 7.339772503923445e-06, |
| "loss": 0.3576, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.2352, |
| "grad_norm": 0.24901633508751497, |
| "learning_rate": 7.331539671087353e-06, |
| "loss": 0.3607, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.2368000000000001, |
| "grad_norm": 0.23434697398335294, |
| "learning_rate": 7.32329875265105e-06, |
| "loss": 0.3508, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.2384, |
| "grad_norm": 0.24826902067363907, |
| "learning_rate": 7.315049777193407e-06, |
| "loss": 0.3855, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.22045772381155812, |
| "learning_rate": 7.306792773321234e-06, |
| "loss": 0.3643, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.2416, |
| "grad_norm": 0.22739884348004483, |
| "learning_rate": 7.298527769669188e-06, |
| "loss": 0.3672, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.2432, |
| "grad_norm": 0.22795934559877223, |
| "learning_rate": 7.290254794899665e-06, |
| "loss": 0.3678, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.2448, |
| "grad_norm": 0.23994602544341062, |
| "learning_rate": 7.281973877702705e-06, |
| "loss": 0.3679, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.2464, |
| "grad_norm": 0.25188152173029527, |
| "learning_rate": 7.2736850467958905e-06, |
| "loss": 0.3927, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 0.22813300257454686, |
| "learning_rate": 7.26538833092425e-06, |
| "loss": 0.3763, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.2496, |
| "grad_norm": 0.24064894144758955, |
| "learning_rate": 7.257083758860159e-06, |
| "loss": 0.3686, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.2511999999999999, |
| "grad_norm": 0.2558714847694326, |
| "learning_rate": 7.248771359403231e-06, |
| "loss": 0.3802, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.2528000000000001, |
| "grad_norm": 0.22563511583260015, |
| "learning_rate": 7.240451161380226e-06, |
| "loss": 0.371, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.2544, |
| "grad_norm": 0.23514408502994164, |
| "learning_rate": 7.232123193644957e-06, |
| "loss": 0.3587, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.24323227496466798, |
| "learning_rate": 7.22378748507817e-06, |
| "loss": 0.3717, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.2576, |
| "grad_norm": 0.22669308973478888, |
| "learning_rate": 7.215444064587462e-06, |
| "loss": 0.3571, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.2591999999999999, |
| "grad_norm": 0.23968563542443533, |
| "learning_rate": 7.207092961107176e-06, |
| "loss": 0.3615, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.2608, |
| "grad_norm": 0.22515373828545365, |
| "learning_rate": 7.198734203598294e-06, |
| "loss": 0.3547, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.2624, |
| "grad_norm": 0.30118109996859654, |
| "learning_rate": 7.190367821048346e-06, |
| "loss": 0.3927, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.23564138285634403, |
| "learning_rate": 7.181993842471301e-06, |
| "loss": 0.366, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.2656, |
| "grad_norm": 0.2501873686641576, |
| "learning_rate": 7.173612296907473e-06, |
| "loss": 0.375, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.2671999999999999, |
| "grad_norm": 0.22201129251860996, |
| "learning_rate": 7.165223213423416e-06, |
| "loss": 0.3441, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.2688, |
| "grad_norm": 0.21813238755196274, |
| "learning_rate": 7.15682662111183e-06, |
| "loss": 0.3301, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.2704, |
| "grad_norm": 0.25947272015006384, |
| "learning_rate": 7.148422549091447e-06, |
| "loss": 0.39, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 0.23602317096666003, |
| "learning_rate": 7.140011026506945e-06, |
| "loss": 0.3678, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.2736, |
| "grad_norm": 0.24935018836011677, |
| "learning_rate": 7.131592082528837e-06, |
| "loss": 0.3795, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.2752, |
| "grad_norm": 0.23341383417175113, |
| "learning_rate": 7.12316574635337e-06, |
| "loss": 0.3615, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.2768, |
| "grad_norm": 0.2362158140536948, |
| "learning_rate": 7.114732047202433e-06, |
| "loss": 0.3604, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.2784, |
| "grad_norm": 0.4306218637416112, |
| "learning_rate": 7.106291014323445e-06, |
| "loss": 0.3738, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.2634022313827097, |
| "learning_rate": 7.0978426769892585e-06, |
| "loss": 0.3582, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2816, |
| "grad_norm": 0.24895711964640957, |
| "learning_rate": 7.089387064498057e-06, |
| "loss": 0.3552, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.2832, |
| "grad_norm": 0.23564404590776392, |
| "learning_rate": 7.080924206173253e-06, |
| "loss": 0.374, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.2848, |
| "grad_norm": 0.22903803048838287, |
| "learning_rate": 7.072454131363391e-06, |
| "loss": 0.3669, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.2864, |
| "grad_norm": 0.23340543117105647, |
| "learning_rate": 7.063976869442037e-06, |
| "loss": 0.3712, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 0.24682049755770297, |
| "learning_rate": 7.055492449807684e-06, |
| "loss": 0.3826, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.2896, |
| "grad_norm": 0.26213375411127715, |
| "learning_rate": 7.047000901883646e-06, |
| "loss": 0.3718, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.2912, |
| "grad_norm": 0.21834587594215416, |
| "learning_rate": 7.038502255117957e-06, |
| "loss": 0.3505, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.2928, |
| "grad_norm": 0.2291598951681205, |
| "learning_rate": 7.029996538983273e-06, |
| "loss": 0.3522, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.2944, |
| "grad_norm": 0.2680432492153926, |
| "learning_rate": 7.021483782976759e-06, |
| "loss": 0.3891, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.24933056884674484, |
| "learning_rate": 7.012964016620002e-06, |
| "loss": 0.3587, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2976, |
| "grad_norm": 0.23408706706803598, |
| "learning_rate": 7.004437269458894e-06, |
| "loss": 0.3591, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.2992, |
| "grad_norm": 0.2288809180965506, |
| "learning_rate": 6.995903571063541e-06, |
| "loss": 0.3547, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.3008, |
| "grad_norm": 0.2704402902744824, |
| "learning_rate": 6.987362951028147e-06, |
| "loss": 0.3686, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.3024, |
| "grad_norm": 0.22384695676031427, |
| "learning_rate": 6.97881543897093e-06, |
| "loss": 0.3662, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 0.24581029307065752, |
| "learning_rate": 6.970261064534003e-06, |
| "loss": 0.3725, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.3056, |
| "grad_norm": 0.2372491040176523, |
| "learning_rate": 6.961699857383279e-06, |
| "loss": 0.3607, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.3072, |
| "grad_norm": 0.23895869611008547, |
| "learning_rate": 6.953131847208365e-06, |
| "loss": 0.3904, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.3088, |
| "grad_norm": 0.23053341696167742, |
| "learning_rate": 6.944557063722459e-06, |
| "loss": 0.3523, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.3104, |
| "grad_norm": 0.24399469419778422, |
| "learning_rate": 6.935975536662254e-06, |
| "loss": 0.3742, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.23898409300224782, |
| "learning_rate": 6.9273872957878255e-06, |
| "loss": 0.3898, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.3136, |
| "grad_norm": 0.2183167438831092, |
| "learning_rate": 6.91879237088253e-06, |
| "loss": 0.3553, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.3152, |
| "grad_norm": 0.24373723092386604, |
| "learning_rate": 6.910190791752907e-06, |
| "loss": 0.3557, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.3168, |
| "grad_norm": 0.2547305079761196, |
| "learning_rate": 6.90158258822857e-06, |
| "loss": 0.3719, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.3184, |
| "grad_norm": 0.23599985987321415, |
| "learning_rate": 6.892967790162109e-06, |
| "loss": 0.3555, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.22871139917797068, |
| "learning_rate": 6.884346427428978e-06, |
| "loss": 0.332, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.3216, |
| "grad_norm": 0.23485404002268567, |
| "learning_rate": 6.875718529927404e-06, |
| "loss": 0.3674, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.3232, |
| "grad_norm": 0.22976305976497138, |
| "learning_rate": 6.867084127578267e-06, |
| "loss": 0.3595, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.3248, |
| "grad_norm": 0.2391178708702739, |
| "learning_rate": 6.858443250325013e-06, |
| "loss": 0.3602, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.3264, |
| "grad_norm": 0.22744459724479651, |
| "learning_rate": 6.849795928133538e-06, |
| "loss": 0.3618, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 0.23519857489397125, |
| "learning_rate": 6.841142190992092e-06, |
| "loss": 0.374, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.3296000000000001, |
| "grad_norm": 0.22982509805234475, |
| "learning_rate": 6.832482068911167e-06, |
| "loss": 0.3766, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.3312, |
| "grad_norm": 0.2076036752469916, |
| "learning_rate": 6.823815591923402e-06, |
| "loss": 0.3482, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.3328, |
| "grad_norm": 0.2338440864259155, |
| "learning_rate": 6.815142790083473e-06, |
| "loss": 0.3826, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.3344, |
| "grad_norm": 0.22584957304045628, |
| "learning_rate": 6.8064636934679885e-06, |
| "loss": 0.3769, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 0.2267611983552526, |
| "learning_rate": 6.797778332175387e-06, |
| "loss": 0.3663, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.3376000000000001, |
| "grad_norm": 0.23907968038935365, |
| "learning_rate": 6.789086736325834e-06, |
| "loss": 0.3603, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.3392, |
| "grad_norm": 0.2433093259484987, |
| "learning_rate": 6.780388936061118e-06, |
| "loss": 0.3786, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.3408, |
| "grad_norm": 0.23507524543000768, |
| "learning_rate": 6.771684961544537e-06, |
| "loss": 0.3762, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.3424, |
| "grad_norm": 0.23360140726826367, |
| "learning_rate": 6.7629748429608076e-06, |
| "loss": 0.3359, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 0.22778484154038828, |
| "learning_rate": 6.754258610515949e-06, |
| "loss": 0.3472, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.3456000000000001, |
| "grad_norm": 0.2273677405063284, |
| "learning_rate": 6.745536294437187e-06, |
| "loss": 0.3593, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.3472, |
| "grad_norm": 0.225917917207584, |
| "learning_rate": 6.736807924972841e-06, |
| "loss": 0.3533, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.3488, |
| "grad_norm": 0.23277982978686493, |
| "learning_rate": 6.728073532392226e-06, |
| "loss": 0.3519, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.3504, |
| "grad_norm": 0.21801611304664992, |
| "learning_rate": 6.719333146985544e-06, |
| "loss": 0.3529, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.24011001777896257, |
| "learning_rate": 6.710586799063777e-06, |
| "loss": 0.364, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.3536000000000001, |
| "grad_norm": 0.24370531506751958, |
| "learning_rate": 6.701834518958587e-06, |
| "loss": 0.3765, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.3552, |
| "grad_norm": 0.25050459692015076, |
| "learning_rate": 6.6930763370222104e-06, |
| "loss": 0.3737, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.3568, |
| "grad_norm": 0.22669393531025567, |
| "learning_rate": 6.684312283627348e-06, |
| "loss": 0.3657, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.3584, |
| "grad_norm": 0.23953945193156154, |
| "learning_rate": 6.6755423891670605e-06, |
| "loss": 0.389, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.2294925497047156, |
| "learning_rate": 6.6667666840546685e-06, |
| "loss": 0.3885, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3616, |
| "grad_norm": 0.23261961530877945, |
| "learning_rate": 6.6579851987236435e-06, |
| "loss": 0.3674, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.3632, |
| "grad_norm": 0.228440909518283, |
| "learning_rate": 6.649197963627497e-06, |
| "loss": 0.3581, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.3648, |
| "grad_norm": 0.23023915856479849, |
| "learning_rate": 6.640405009239689e-06, |
| "loss": 0.3764, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.3664, |
| "grad_norm": 0.24293659740471726, |
| "learning_rate": 6.631606366053507e-06, |
| "loss": 0.3774, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 0.2294001414439151, |
| "learning_rate": 6.622802064581968e-06, |
| "loss": 0.3654, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.3696, |
| "grad_norm": 0.23394267808659724, |
| "learning_rate": 6.613992135357713e-06, |
| "loss": 0.3734, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.3712, |
| "grad_norm": 0.23619484740642865, |
| "learning_rate": 6.605176608932897e-06, |
| "loss": 0.3421, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.3728, |
| "grad_norm": 0.2357266211284377, |
| "learning_rate": 6.596355515879091e-06, |
| "loss": 0.3672, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.3744, |
| "grad_norm": 0.2593580618225521, |
| "learning_rate": 6.587528886787165e-06, |
| "loss": 0.3724, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.22700716204355342, |
| "learning_rate": 6.578696752267189e-06, |
| "loss": 0.3437, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3776, |
| "grad_norm": 0.23500442000507377, |
| "learning_rate": 6.5698591429483286e-06, |
| "loss": 0.3479, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.3792, |
| "grad_norm": 0.21992671473136727, |
| "learning_rate": 6.5610160894787275e-06, |
| "loss": 0.3511, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.3808, |
| "grad_norm": 0.22833450905269675, |
| "learning_rate": 6.552167622525421e-06, |
| "loss": 0.3762, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.3824, |
| "grad_norm": 0.25493923220455256, |
| "learning_rate": 6.543313772774209e-06, |
| "loss": 0.3962, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.23174463091904127, |
| "learning_rate": 6.534454570929563e-06, |
| "loss": 0.3602, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.3856, |
| "grad_norm": 0.2228783505657694, |
| "learning_rate": 6.52559004771451e-06, |
| "loss": 0.3767, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.3872, |
| "grad_norm": 0.24082830964244206, |
| "learning_rate": 6.516720233870538e-06, |
| "loss": 0.3889, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.3888, |
| "grad_norm": 0.23698720464612325, |
| "learning_rate": 6.507845160157476e-06, |
| "loss": 0.3799, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.3904, |
| "grad_norm": 0.22180708261254972, |
| "learning_rate": 6.498964857353401e-06, |
| "loss": 0.3482, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.23854186114421486, |
| "learning_rate": 6.4900793562545165e-06, |
| "loss": 0.3667, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3936, |
| "grad_norm": 0.22180136604562353, |
| "learning_rate": 6.481188687675057e-06, |
| "loss": 0.3623, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.3952, |
| "grad_norm": 0.2356211909499273, |
| "learning_rate": 6.47229288244718e-06, |
| "loss": 0.3887, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.3968, |
| "grad_norm": 0.23092295592663573, |
| "learning_rate": 6.46339197142085e-06, |
| "loss": 0.3585, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.3984, |
| "grad_norm": 0.23407403557968504, |
| "learning_rate": 6.454485985463742e-06, |
| "loss": 0.3785, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.22647548102103673, |
| "learning_rate": 6.445574955461134e-06, |
| "loss": 0.3553, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.4016, |
| "grad_norm": 0.2302590980704433, |
| "learning_rate": 6.436658912315789e-06, |
| "loss": 0.3431, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.4032, |
| "grad_norm": 0.23521890180295538, |
| "learning_rate": 6.427737886947859e-06, |
| "loss": 0.3741, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.4048, |
| "grad_norm": 0.24835104449004766, |
| "learning_rate": 6.418811910294776e-06, |
| "loss": 0.3888, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.4064, |
| "grad_norm": 0.23508756097778993, |
| "learning_rate": 6.409881013311136e-06, |
| "loss": 0.3781, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 0.22321399737406053, |
| "learning_rate": 6.400945226968607e-06, |
| "loss": 0.3515, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.4096, |
| "grad_norm": 0.2326486173454616, |
| "learning_rate": 6.392004582255807e-06, |
| "loss": 0.3494, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.4112, |
| "grad_norm": 0.21361174656953685, |
| "learning_rate": 6.383059110178205e-06, |
| "loss": 0.3281, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.4128, |
| "grad_norm": 0.2418019863266295, |
| "learning_rate": 6.374108841758006e-06, |
| "loss": 0.3579, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.4144, |
| "grad_norm": 0.22458963808530782, |
| "learning_rate": 6.365153808034057e-06, |
| "loss": 0.345, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.2379791473745, |
| "learning_rate": 6.356194040061725e-06, |
| "loss": 0.3621, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.4176, |
| "grad_norm": 0.23547454120514846, |
| "learning_rate": 6.3472295689127946e-06, |
| "loss": 0.3535, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.4192, |
| "grad_norm": 0.23190305457408839, |
| "learning_rate": 6.338260425675365e-06, |
| "loss": 0.3514, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.4208, |
| "grad_norm": 0.21946519730154843, |
| "learning_rate": 6.329286641453729e-06, |
| "loss": 0.3616, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.4224, |
| "grad_norm": 0.2180294140612126, |
| "learning_rate": 6.320308247368285e-06, |
| "loss": 0.3539, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.24362404693732542, |
| "learning_rate": 6.311325274555413e-06, |
| "loss": 0.3602, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.4256, |
| "grad_norm": 0.24159823365929534, |
| "learning_rate": 6.302337754167369e-06, |
| "loss": 0.3791, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.4272, |
| "grad_norm": 0.254730776400451, |
| "learning_rate": 6.2933457173721855e-06, |
| "loss": 0.3837, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.4288, |
| "grad_norm": 0.23858849361569914, |
| "learning_rate": 6.2843491953535515e-06, |
| "loss": 0.354, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.4304000000000001, |
| "grad_norm": 0.2360665542587955, |
| "learning_rate": 6.275348219310715e-06, |
| "loss": 0.399, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 0.22271280213106204, |
| "learning_rate": 6.266342820458366e-06, |
| "loss": 0.3744, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.4336, |
| "grad_norm": 0.24584895146778546, |
| "learning_rate": 6.2573330300265375e-06, |
| "loss": 0.3947, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.4352, |
| "grad_norm": 0.2265990392820067, |
| "learning_rate": 6.248318879260488e-06, |
| "loss": 0.3635, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.4368, |
| "grad_norm": 0.23816155002062409, |
| "learning_rate": 6.239300399420601e-06, |
| "loss": 0.375, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.4384000000000001, |
| "grad_norm": 0.34024010335601934, |
| "learning_rate": 6.230277621782269e-06, |
| "loss": 0.3494, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.2189066792533067, |
| "learning_rate": 6.221250577635791e-06, |
| "loss": 0.3565, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.4416, |
| "grad_norm": 0.23742676845400826, |
| "learning_rate": 6.2122192982862615e-06, |
| "loss": 0.3776, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.4432, |
| "grad_norm": 0.23487844667466032, |
| "learning_rate": 6.203183815053463e-06, |
| "loss": 0.359, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.4447999999999999, |
| "grad_norm": 0.21952145968827483, |
| "learning_rate": 6.1941441592717564e-06, |
| "loss": 0.3482, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.4464000000000001, |
| "grad_norm": 0.22143841211914445, |
| "learning_rate": 6.185100362289972e-06, |
| "loss": 0.3442, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 0.23353754384705555, |
| "learning_rate": 6.176052455471302e-06, |
| "loss": 0.3759, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.4496, |
| "grad_norm": 0.23468474903212708, |
| "learning_rate": 6.167000470193189e-06, |
| "loss": 0.3531, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.4512, |
| "grad_norm": 0.24681432412045784, |
| "learning_rate": 6.157944437847226e-06, |
| "loss": 0.3681, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.4527999999999999, |
| "grad_norm": 0.22653919300087505, |
| "learning_rate": 6.148884389839035e-06, |
| "loss": 0.3533, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.4544000000000001, |
| "grad_norm": 0.22910248377970177, |
| "learning_rate": 6.1398203575881645e-06, |
| "loss": 0.3518, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 0.22883524653138335, |
| "learning_rate": 6.130752372527981e-06, |
| "loss": 0.3723, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.4576, |
| "grad_norm": 0.2281803688217028, |
| "learning_rate": 6.121680466105559e-06, |
| "loss": 0.3704, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.4592, |
| "grad_norm": 0.22113724638020416, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.355, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.4607999999999999, |
| "grad_norm": 0.23353973410027182, |
| "learning_rate": 6.1035250150301864e-06, |
| "loss": 0.3585, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.4624, |
| "grad_norm": 0.23206684472849667, |
| "learning_rate": 6.0944415333389405e-06, |
| "loss": 0.3599, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 0.21098520516722896, |
| "learning_rate": 6.085354256208655e-06, |
| "loss": 0.3407, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.4656, |
| "grad_norm": 0.22257147698105467, |
| "learning_rate": 6.076263215153308e-06, |
| "loss": 0.3621, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.4672, |
| "grad_norm": 0.22588171214009592, |
| "learning_rate": 6.067168441699927e-06, |
| "loss": 0.3738, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.4687999999999999, |
| "grad_norm": 0.22431594143965727, |
| "learning_rate": 6.058069967388489e-06, |
| "loss": 0.3594, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.4704, |
| "grad_norm": 0.2764405598780132, |
| "learning_rate": 6.048967823771802e-06, |
| "loss": 0.3764, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.22277850289194742, |
| "learning_rate": 6.039862042415401e-06, |
| "loss": 0.3772, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4736, |
| "grad_norm": 0.2181453113724427, |
| "learning_rate": 6.030752654897435e-06, |
| "loss": 0.3693, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.4752, |
| "grad_norm": 0.2287120859689062, |
| "learning_rate": 6.021639692808558e-06, |
| "loss": 0.3633, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.4768, |
| "grad_norm": 0.22675473965850518, |
| "learning_rate": 6.0125231877518205e-06, |
| "loss": 0.3725, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.4784, |
| "grad_norm": 0.23051364778116207, |
| "learning_rate": 6.0034031713425636e-06, |
| "loss": 0.3733, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.21874647555797064, |
| "learning_rate": 5.994279675208302e-06, |
| "loss": 0.3677, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.4816, |
| "grad_norm": 0.21762237477485263, |
| "learning_rate": 5.985152730988617e-06, |
| "loss": 0.3537, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.4832, |
| "grad_norm": 0.22410918360561305, |
| "learning_rate": 5.9760223703350495e-06, |
| "loss": 0.3513, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.4848, |
| "grad_norm": 0.2169206778622965, |
| "learning_rate": 5.966888624910989e-06, |
| "loss": 0.3349, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.4864, |
| "grad_norm": 0.21097006844254215, |
| "learning_rate": 5.957751526391558e-06, |
| "loss": 0.3269, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.21234669240614082, |
| "learning_rate": 5.948611106463518e-06, |
| "loss": 0.3454, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4896, |
| "grad_norm": 0.22354651801251826, |
| "learning_rate": 5.939467396825137e-06, |
| "loss": 0.3558, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.4912, |
| "grad_norm": 0.23195581660261266, |
| "learning_rate": 5.9303204291860975e-06, |
| "loss": 0.3469, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.4928, |
| "grad_norm": 0.2300496976828517, |
| "learning_rate": 5.92117023526738e-06, |
| "loss": 0.3678, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.4944, |
| "grad_norm": 0.22771540294428458, |
| "learning_rate": 5.912016846801153e-06, |
| "loss": 0.3414, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 0.2321875022098519, |
| "learning_rate": 5.902860295530665e-06, |
| "loss": 0.3473, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.4976, |
| "grad_norm": 0.2379695065052903, |
| "learning_rate": 5.893700613210128e-06, |
| "loss": 0.3612, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.4992, |
| "grad_norm": 0.25030547435089123, |
| "learning_rate": 5.88453783160462e-06, |
| "loss": 0.3633, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.5008, |
| "grad_norm": 0.24321706268173005, |
| "learning_rate": 5.875371982489959e-06, |
| "loss": 0.3647, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.5024, |
| "grad_norm": 0.2297424897813886, |
| "learning_rate": 5.866203097652605e-06, |
| "loss": 0.3562, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 0.21645441744560415, |
| "learning_rate": 5.857031208889548e-06, |
| "loss": 0.3645, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.5056, |
| "grad_norm": 0.30447650993545894, |
| "learning_rate": 5.847856348008188e-06, |
| "loss": 0.354, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.5072, |
| "grad_norm": 0.23818370837104458, |
| "learning_rate": 5.838678546826242e-06, |
| "loss": 0.3578, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.5088, |
| "grad_norm": 0.22832107693145853, |
| "learning_rate": 5.829497837171616e-06, |
| "loss": 0.3611, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.5104, |
| "grad_norm": 0.23212021781588146, |
| "learning_rate": 5.820314250882304e-06, |
| "loss": 0.3541, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 0.2403926474412084, |
| "learning_rate": 5.811127819806277e-06, |
| "loss": 0.3787, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.5135999999999998, |
| "grad_norm": 0.22754583224200328, |
| "learning_rate": 5.801938575801372e-06, |
| "loss": 0.3598, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.5152, |
| "grad_norm": 0.2335284596892335, |
| "learning_rate": 5.792746550735182e-06, |
| "loss": 0.3713, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.5168, |
| "grad_norm": 0.23692474853711462, |
| "learning_rate": 5.7835517764849395e-06, |
| "loss": 0.3638, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.5184, |
| "grad_norm": 0.22902884413853306, |
| "learning_rate": 5.7743542849374155e-06, |
| "loss": 0.3462, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.20803814236821247, |
| "learning_rate": 5.765154107988803e-06, |
| "loss": 0.3404, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.5215999999999998, |
| "grad_norm": 0.2235312666477712, |
| "learning_rate": 5.755951277544607e-06, |
| "loss": 0.3732, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.5232, |
| "grad_norm": 0.22592467128723975, |
| "learning_rate": 5.746745825519539e-06, |
| "loss": 0.3639, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.5248, |
| "grad_norm": 0.23131630965230526, |
| "learning_rate": 5.737537783837395e-06, |
| "loss": 0.3708, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.5264, |
| "grad_norm": 0.217373010880051, |
| "learning_rate": 5.728327184430955e-06, |
| "loss": 0.3533, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 0.23677088166781893, |
| "learning_rate": 5.719114059241871e-06, |
| "loss": 0.3884, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.5295999999999998, |
| "grad_norm": 0.2232747325700971, |
| "learning_rate": 5.709898440220552e-06, |
| "loss": 0.3421, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.5312000000000001, |
| "grad_norm": 0.23680220153147308, |
| "learning_rate": 5.700680359326055e-06, |
| "loss": 0.3407, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.5328, |
| "grad_norm": 0.22869277576784572, |
| "learning_rate": 5.691459848525977e-06, |
| "loss": 0.3636, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.5344, |
| "grad_norm": 0.2524221399513015, |
| "learning_rate": 5.682236939796337e-06, |
| "loss": 0.3614, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 0.2335639599543941, |
| "learning_rate": 5.673011665121477e-06, |
| "loss": 0.3687, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.5375999999999999, |
| "grad_norm": 0.2400378759997106, |
| "learning_rate": 5.663784056493936e-06, |
| "loss": 0.3555, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.5392000000000001, |
| "grad_norm": 0.22757960323834914, |
| "learning_rate": 5.6545541459143535e-06, |
| "loss": 0.3957, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.5408, |
| "grad_norm": 0.20178933850932024, |
| "learning_rate": 5.6453219653913495e-06, |
| "loss": 0.3178, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.5424, |
| "grad_norm": 0.23436616401530966, |
| "learning_rate": 5.636087546941413e-06, |
| "loss": 0.3579, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.23568638722287033, |
| "learning_rate": 5.6268509225888005e-06, |
| "loss": 0.3768, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.5455999999999999, |
| "grad_norm": 0.23139982783040597, |
| "learning_rate": 5.617612124365411e-06, |
| "loss": 0.3712, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.5472000000000001, |
| "grad_norm": 0.2468542860866376, |
| "learning_rate": 5.608371184310688e-06, |
| "loss": 0.3497, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.5488, |
| "grad_norm": 0.2440488902130183, |
| "learning_rate": 5.5991281344714984e-06, |
| "loss": 0.3914, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.5504, |
| "grad_norm": 0.23268938749956977, |
| "learning_rate": 5.5898830069020325e-06, |
| "loss": 0.3511, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.22815887018042028, |
| "learning_rate": 5.580635833663679e-06, |
| "loss": 0.3825, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.5535999999999999, |
| "grad_norm": 0.21681783317465614, |
| "learning_rate": 5.5713866468249235e-06, |
| "loss": 0.3579, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.5552000000000001, |
| "grad_norm": 0.22817269384378228, |
| "learning_rate": 5.562135478461234e-06, |
| "loss": 0.3649, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.5568, |
| "grad_norm": 0.22672572014481562, |
| "learning_rate": 5.55288236065495e-06, |
| "loss": 0.3516, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.5584, |
| "grad_norm": 0.21394583866373454, |
| "learning_rate": 5.5436273254951734e-06, |
| "loss": 0.3633, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.22707482006847327, |
| "learning_rate": 5.5343704050776535e-06, |
| "loss": 0.3761, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.5615999999999999, |
| "grad_norm": 0.22498769415064948, |
| "learning_rate": 5.5251116315046785e-06, |
| "loss": 0.3611, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.5632000000000001, |
| "grad_norm": 0.23120866236630797, |
| "learning_rate": 5.515851036884964e-06, |
| "loss": 0.3521, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.5648, |
| "grad_norm": 0.23275652889103082, |
| "learning_rate": 5.5065886533335355e-06, |
| "loss": 0.3588, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.5664, |
| "grad_norm": 0.21836065135346083, |
| "learning_rate": 5.497324512971632e-06, |
| "loss": 0.3444, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 0.22549772657347708, |
| "learning_rate": 5.4880586479265774e-06, |
| "loss": 0.359, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5695999999999999, |
| "grad_norm": 0.2460534245605241, |
| "learning_rate": 5.478791090331677e-06, |
| "loss": 0.3958, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.5712000000000002, |
| "grad_norm": 0.23641236253993364, |
| "learning_rate": 5.4695218723261115e-06, |
| "loss": 0.3766, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.5728, |
| "grad_norm": 0.2121111618057072, |
| "learning_rate": 5.46025102605481e-06, |
| "loss": 0.3321, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.5744, |
| "grad_norm": 0.21264475610701442, |
| "learning_rate": 5.4509785836683606e-06, |
| "loss": 0.3475, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.2085093523426661, |
| "learning_rate": 5.441704577322877e-06, |
| "loss": 0.3329, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.5776, |
| "grad_norm": 0.2168535170103948, |
| "learning_rate": 5.4324290391798995e-06, |
| "loss": 0.3537, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.5792000000000002, |
| "grad_norm": 0.21872075567159996, |
| "learning_rate": 5.423152001406282e-06, |
| "loss": 0.3533, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.5808, |
| "grad_norm": 0.22289420442691002, |
| "learning_rate": 5.413873496174077e-06, |
| "loss": 0.3508, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.5824, |
| "grad_norm": 0.23343010025419714, |
| "learning_rate": 5.404593555660424e-06, |
| "loss": 0.38, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.22985302269494012, |
| "learning_rate": 5.39531221204745e-06, |
| "loss": 0.3693, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5856, |
| "grad_norm": 0.234810726494016, |
| "learning_rate": 5.3860294975221335e-06, |
| "loss": 0.389, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.5872000000000002, |
| "grad_norm": 0.23507121830636418, |
| "learning_rate": 5.376745444276219e-06, |
| "loss": 0.3734, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.5888, |
| "grad_norm": 0.2408600461076623, |
| "learning_rate": 5.3674600845060856e-06, |
| "loss": 0.3684, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.5904, |
| "grad_norm": 0.22063152471210556, |
| "learning_rate": 5.358173450412649e-06, |
| "loss": 0.35, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 0.23036309543203887, |
| "learning_rate": 5.34888557420124e-06, |
| "loss": 0.3803, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.5936, |
| "grad_norm": 0.2288511214248289, |
| "learning_rate": 5.339596488081501e-06, |
| "loss": 0.3589, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.5952, |
| "grad_norm": 0.2238532337409273, |
| "learning_rate": 5.330306224267268e-06, |
| "loss": 0.3459, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.5968, |
| "grad_norm": 0.22635987335106614, |
| "learning_rate": 5.321014814976459e-06, |
| "loss": 0.3665, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.5984, |
| "grad_norm": 0.21842536180204836, |
| "learning_rate": 5.311722292430966e-06, |
| "loss": 0.3657, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.2692932317998941, |
| "learning_rate": 5.302428688856544e-06, |
| "loss": 0.3841, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6016, |
| "grad_norm": 0.24018643294336625, |
| "learning_rate": 5.293134036482697e-06, |
| "loss": 0.3793, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.6032, |
| "grad_norm": 0.2334970804527414, |
| "learning_rate": 5.283838367542562e-06, |
| "loss": 0.3707, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.6048, |
| "grad_norm": 0.27781053467843353, |
| "learning_rate": 5.274541714272805e-06, |
| "loss": 0.3425, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.6064, |
| "grad_norm": 0.230457933793649, |
| "learning_rate": 5.265244108913503e-06, |
| "loss": 0.3624, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 0.2194364426568665, |
| "learning_rate": 5.255945583708037e-06, |
| "loss": 0.3733, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.6096, |
| "grad_norm": 0.20455036711833574, |
| "learning_rate": 5.2466461709029755e-06, |
| "loss": 0.3335, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.6112, |
| "grad_norm": 0.2538429237249699, |
| "learning_rate": 5.237345902747969e-06, |
| "loss": 0.3494, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.6128, |
| "grad_norm": 0.23364360321248484, |
| "learning_rate": 5.228044811495632e-06, |
| "loss": 0.3598, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.6143999999999998, |
| "grad_norm": 0.2295928630715531, |
| "learning_rate": 5.218742929401432e-06, |
| "loss": 0.3842, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.22509249225383338, |
| "learning_rate": 5.2094402887235805e-06, |
| "loss": 0.3658, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.6176, |
| "grad_norm": 0.22416198718928304, |
| "learning_rate": 5.200136921722919e-06, |
| "loss": 0.3543, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.6192, |
| "grad_norm": 0.24252134717667231, |
| "learning_rate": 5.1908328606628114e-06, |
| "loss": 0.3698, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.6208, |
| "grad_norm": 0.2296914741832035, |
| "learning_rate": 5.181528137809023e-06, |
| "loss": 0.3633, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.6223999999999998, |
| "grad_norm": 0.22837486902464868, |
| "learning_rate": 5.1722227854296195e-06, |
| "loss": 0.3547, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 0.22564836692430765, |
| "learning_rate": 5.162916835794843e-06, |
| "loss": 0.3603, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.6256, |
| "grad_norm": 0.26976383115594205, |
| "learning_rate": 5.1536103211770135e-06, |
| "loss": 0.3827, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.6272, |
| "grad_norm": 0.2220045490636461, |
| "learning_rate": 5.14430327385041e-06, |
| "loss": 0.356, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.6288, |
| "grad_norm": 0.21036437082923123, |
| "learning_rate": 5.134995726091152e-06, |
| "loss": 0.3432, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.6303999999999998, |
| "grad_norm": 0.2338572252012131, |
| "learning_rate": 5.1256877101771015e-06, |
| "loss": 0.3779, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 0.21303006575783548, |
| "learning_rate": 5.116379258387742e-06, |
| "loss": 0.3206, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.6336, |
| "grad_norm": 0.23704077773379204, |
| "learning_rate": 5.1070704030040675e-06, |
| "loss": 0.3965, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.6352, |
| "grad_norm": 0.233647869957559, |
| "learning_rate": 5.097761176308471e-06, |
| "loss": 0.3645, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.6368, |
| "grad_norm": 0.2284949656555198, |
| "learning_rate": 5.088451610584638e-06, |
| "loss": 0.385, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.6383999999999999, |
| "grad_norm": 0.27169194251321555, |
| "learning_rate": 5.079141738117423e-06, |
| "loss": 0.3693, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.21919913489455534, |
| "learning_rate": 5.06983159119275e-06, |
| "loss": 0.349, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.6416, |
| "grad_norm": 0.23607597710606382, |
| "learning_rate": 5.060521202097491e-06, |
| "loss": 0.3613, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.6432, |
| "grad_norm": 0.22026855920149682, |
| "learning_rate": 5.051210603119358e-06, |
| "loss": 0.3525, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.6448, |
| "grad_norm": 0.22511435660732762, |
| "learning_rate": 5.041899826546791e-06, |
| "loss": 0.3664, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.6463999999999999, |
| "grad_norm": 0.22805599618904998, |
| "learning_rate": 5.032588904668851e-06, |
| "loss": 0.3675, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 0.2235114392595547, |
| "learning_rate": 5.023277869775097e-06, |
| "loss": 0.3353, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.6496, |
| "grad_norm": 0.2200783989094173, |
| "learning_rate": 5.013966754155482e-06, |
| "loss": 0.36, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.6512, |
| "grad_norm": 0.2239296062950381, |
| "learning_rate": 5.004655590100238e-06, |
| "loss": 0.348, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.6528, |
| "grad_norm": 0.24259355181486578, |
| "learning_rate": 4.995344409899764e-06, |
| "loss": 0.3642, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.6543999999999999, |
| "grad_norm": 0.21651499879249725, |
| "learning_rate": 4.986033245844519e-06, |
| "loss": 0.3396, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 0.2190115782829853, |
| "learning_rate": 4.976722130224904e-06, |
| "loss": 0.3632, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.6576, |
| "grad_norm": 0.23752972081658758, |
| "learning_rate": 4.967411095331149e-06, |
| "loss": 0.3357, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.6592, |
| "grad_norm": 0.2237152197971954, |
| "learning_rate": 4.95810017345321e-06, |
| "loss": 0.3486, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.6608, |
| "grad_norm": 0.21674153672640126, |
| "learning_rate": 4.948789396880644e-06, |
| "loss": 0.3573, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.6623999999999999, |
| "grad_norm": 0.20705527204887764, |
| "learning_rate": 4.939478797902512e-06, |
| "loss": 0.3407, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 0.24143410207428773, |
| "learning_rate": 4.930168408807252e-06, |
| "loss": 0.3897, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.6656, |
| "grad_norm": 0.24205536642995848, |
| "learning_rate": 4.920858261882578e-06, |
| "loss": 0.3987, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.6672, |
| "grad_norm": 0.23864306260795018, |
| "learning_rate": 4.911548389415363e-06, |
| "loss": 0.3868, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.6688, |
| "grad_norm": 0.22279242776539246, |
| "learning_rate": 4.9022388236915306e-06, |
| "loss": 0.3642, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.6703999999999999, |
| "grad_norm": 0.2224163617633667, |
| "learning_rate": 4.892929596995934e-06, |
| "loss": 0.3654, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.2177377522038297, |
| "learning_rate": 4.883620741612259e-06, |
| "loss": 0.349, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.6736, |
| "grad_norm": 0.23164301953583896, |
| "learning_rate": 4.8743122898229e-06, |
| "loss": 0.3724, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.6752, |
| "grad_norm": 0.2250483949984091, |
| "learning_rate": 4.865004273908851e-06, |
| "loss": 0.3499, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.6768, |
| "grad_norm": 0.23112836304213952, |
| "learning_rate": 4.855696726149593e-06, |
| "loss": 0.37, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.6784, |
| "grad_norm": 0.2153432149115245, |
| "learning_rate": 4.846389678822987e-06, |
| "loss": 0.3466, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.21959381013841067, |
| "learning_rate": 4.837083164205159e-06, |
| "loss": 0.3701, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6816, |
| "grad_norm": 0.23806876889330827, |
| "learning_rate": 4.827777214570384e-06, |
| "loss": 0.3671, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.6832, |
| "grad_norm": 0.25162286456110666, |
| "learning_rate": 4.818471862190979e-06, |
| "loss": 0.3581, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.6848, |
| "grad_norm": 0.25490479770613106, |
| "learning_rate": 4.809167139337191e-06, |
| "loss": 0.3542, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.6864, |
| "grad_norm": 0.21636919847438957, |
| "learning_rate": 4.799863078277082e-06, |
| "loss": 0.3519, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 0.22191562588740787, |
| "learning_rate": 4.790559711276422e-06, |
| "loss": 0.3565, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.6896, |
| "grad_norm": 0.23812709297571816, |
| "learning_rate": 4.781257070598571e-06, |
| "loss": 0.3589, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.6912, |
| "grad_norm": 0.2188456024541258, |
| "learning_rate": 4.771955188504371e-06, |
| "loss": 0.3619, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.6928, |
| "grad_norm": 0.2247760496075181, |
| "learning_rate": 4.762654097252033e-06, |
| "loss": 0.3585, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.6944, |
| "grad_norm": 0.23060035275656893, |
| "learning_rate": 4.753353829097025e-06, |
| "loss": 0.3714, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.23452643996431813, |
| "learning_rate": 4.7440544162919645e-06, |
| "loss": 0.3441, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.6976, |
| "grad_norm": 0.2286655448843364, |
| "learning_rate": 4.734755891086498e-06, |
| "loss": 0.3531, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.6992, |
| "grad_norm": 0.23950943107257192, |
| "learning_rate": 4.725458285727195e-06, |
| "loss": 0.3768, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.7008, |
| "grad_norm": 0.21910441400347028, |
| "learning_rate": 4.716161632457438e-06, |
| "loss": 0.3369, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.7024, |
| "grad_norm": 0.21677608074236288, |
| "learning_rate": 4.7068659635173034e-06, |
| "loss": 0.3492, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.21895630366747554, |
| "learning_rate": 4.6975713111434556e-06, |
| "loss": 0.3479, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.7056, |
| "grad_norm": 0.2360635739282523, |
| "learning_rate": 4.688277707569035e-06, |
| "loss": 0.388, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.7072, |
| "grad_norm": 0.23134593118084898, |
| "learning_rate": 4.678985185023542e-06, |
| "loss": 0.3775, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.7088, |
| "grad_norm": 0.229609828969421, |
| "learning_rate": 4.669693775732733e-06, |
| "loss": 0.3801, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.7104, |
| "grad_norm": 0.2328227265542956, |
| "learning_rate": 4.660403511918499e-06, |
| "loss": 0.3793, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.21967637969412696, |
| "learning_rate": 4.65111442579876e-06, |
| "loss": 0.3513, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.7136, |
| "grad_norm": 0.23227694714803193, |
| "learning_rate": 4.641826549587352e-06, |
| "loss": 0.3822, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.7151999999999998, |
| "grad_norm": 0.3283372998798774, |
| "learning_rate": 4.632539915493915e-06, |
| "loss": 0.3563, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.7168, |
| "grad_norm": 0.22414037903901304, |
| "learning_rate": 4.623254555723783e-06, |
| "loss": 0.3655, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.7184, |
| "grad_norm": 0.22884865370278523, |
| "learning_rate": 4.613970502477867e-06, |
| "loss": 0.3784, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.21307320550132505, |
| "learning_rate": 4.604687787952552e-06, |
| "loss": 0.3378, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.7216, |
| "grad_norm": 0.20821644700962327, |
| "learning_rate": 4.5954064443395765e-06, |
| "loss": 0.341, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.7231999999999998, |
| "grad_norm": 0.22650708821344656, |
| "learning_rate": 4.586126503825925e-06, |
| "loss": 0.3806, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.7248, |
| "grad_norm": 0.234825498328115, |
| "learning_rate": 4.57684799859372e-06, |
| "loss": 0.3771, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.7264, |
| "grad_norm": 0.23048381638814078, |
| "learning_rate": 4.567570960820101e-06, |
| "loss": 0.3421, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.22505541227311865, |
| "learning_rate": 4.558295422677124e-06, |
| "loss": 0.3589, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.7296, |
| "grad_norm": 0.22802218970021382, |
| "learning_rate": 4.54902141633164e-06, |
| "loss": 0.3478, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.7311999999999999, |
| "grad_norm": 0.21921858651414675, |
| "learning_rate": 4.539748973945191e-06, |
| "loss": 0.3599, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.7328000000000001, |
| "grad_norm": 0.22408220037910923, |
| "learning_rate": 4.53047812767389e-06, |
| "loss": 0.3666, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.7344, |
| "grad_norm": 0.2384187475314328, |
| "learning_rate": 4.5212089096683234e-06, |
| "loss": 0.3763, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.2287373020446219, |
| "learning_rate": 4.511941352073424e-06, |
| "loss": 0.369, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.7376, |
| "grad_norm": 0.21714400498021044, |
| "learning_rate": 4.5026754870283695e-06, |
| "loss": 0.3451, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.7391999999999999, |
| "grad_norm": 0.24021282357971432, |
| "learning_rate": 4.493411346666465e-06, |
| "loss": 0.364, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.7408000000000001, |
| "grad_norm": 0.21550093793191907, |
| "learning_rate": 4.484148963115038e-06, |
| "loss": 0.3644, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.7424, |
| "grad_norm": 0.22271068404213717, |
| "learning_rate": 4.474888368495322e-06, |
| "loss": 0.3606, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 0.22761710470461724, |
| "learning_rate": 4.465629594922348e-06, |
| "loss": 0.3782, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.7456, |
| "grad_norm": 0.21040580505532472, |
| "learning_rate": 4.456372674504828e-06, |
| "loss": 0.3528, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.7471999999999999, |
| "grad_norm": 0.218438741206348, |
| "learning_rate": 4.447117639345052e-06, |
| "loss": 0.3427, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.7488000000000001, |
| "grad_norm": 0.2111218210165627, |
| "learning_rate": 4.437864521538768e-06, |
| "loss": 0.3445, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.7504, |
| "grad_norm": 0.2320253181871985, |
| "learning_rate": 4.428613353175078e-06, |
| "loss": 0.3695, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 0.2466323205459218, |
| "learning_rate": 4.4193641663363214e-06, |
| "loss": 0.3541, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.7536, |
| "grad_norm": 0.26812931918253086, |
| "learning_rate": 4.410116993097968e-06, |
| "loss": 0.3905, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.7551999999999999, |
| "grad_norm": 0.23106476080283783, |
| "learning_rate": 4.400871865528502e-06, |
| "loss": 0.3546, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.7568000000000001, |
| "grad_norm": 0.23261347685318606, |
| "learning_rate": 4.391628815689314e-06, |
| "loss": 0.3894, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.7584, |
| "grad_norm": 0.22732737502184266, |
| "learning_rate": 4.382387875634592e-06, |
| "loss": 0.333, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.22784303499997524, |
| "learning_rate": 4.373149077411203e-06, |
| "loss": 0.373, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.7616, |
| "grad_norm": 0.21892623073279657, |
| "learning_rate": 4.363912453058589e-06, |
| "loss": 0.3568, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.7631999999999999, |
| "grad_norm": 0.22909270634272916, |
| "learning_rate": 4.354678034608654e-06, |
| "loss": 0.3762, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.7648000000000001, |
| "grad_norm": 0.22541528242689915, |
| "learning_rate": 4.345445854085649e-06, |
| "loss": 0.365, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.7664, |
| "grad_norm": 0.23018918091059853, |
| "learning_rate": 4.336215943506066e-06, |
| "loss": 0.3701, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 0.33688595973244456, |
| "learning_rate": 4.326988334878526e-06, |
| "loss": 0.3721, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.7696, |
| "grad_norm": 0.23248376128224, |
| "learning_rate": 4.317763060203665e-06, |
| "loss": 0.3688, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.7711999999999999, |
| "grad_norm": 0.22348615362356153, |
| "learning_rate": 4.308540151474027e-06, |
| "loss": 0.3639, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.7728000000000002, |
| "grad_norm": 0.23864921682154716, |
| "learning_rate": 4.299319640673948e-06, |
| "loss": 0.3767, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.7744, |
| "grad_norm": 0.21193054515476886, |
| "learning_rate": 4.290101559779451e-06, |
| "loss": 0.3311, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.22396143420182207, |
| "learning_rate": 4.280885940758131e-06, |
| "loss": 0.3773, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.7776, |
| "grad_norm": 0.21253095542541528, |
| "learning_rate": 4.271672815569047e-06, |
| "loss": 0.3449, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.7792, |
| "grad_norm": 0.27188551658882243, |
| "learning_rate": 4.262462216162606e-06, |
| "loss": 0.3736, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.7808000000000002, |
| "grad_norm": 0.22443596719558226, |
| "learning_rate": 4.253254174480462e-06, |
| "loss": 0.3596, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.7824, |
| "grad_norm": 0.23466889450658973, |
| "learning_rate": 4.244048722455393e-06, |
| "loss": 0.3682, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 0.2256108412732194, |
| "learning_rate": 4.234845892011198e-06, |
| "loss": 0.3614, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.7856, |
| "grad_norm": 0.23551932414988708, |
| "learning_rate": 4.225645715062585e-06, |
| "loss": 0.3536, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.7872, |
| "grad_norm": 0.22890026402600988, |
| "learning_rate": 4.216448223515061e-06, |
| "loss": 0.3835, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.7888, |
| "grad_norm": 0.21498706833903172, |
| "learning_rate": 4.2072534492648184e-06, |
| "loss": 0.3401, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.7904, |
| "grad_norm": 0.23566405213744998, |
| "learning_rate": 4.198061424198627e-06, |
| "loss": 0.3802, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.2276543350049926, |
| "learning_rate": 4.188872180193723e-06, |
| "loss": 0.3668, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7936, |
| "grad_norm": 0.2315572018946117, |
| "learning_rate": 4.179685749117698e-06, |
| "loss": 0.3865, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.7952, |
| "grad_norm": 0.2340434164082974, |
| "learning_rate": 4.170502162828385e-06, |
| "loss": 0.3493, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.7968, |
| "grad_norm": 0.21833151893362848, |
| "learning_rate": 4.161321453173759e-06, |
| "loss": 0.34, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.7984, |
| "grad_norm": 0.2132103421086988, |
| "learning_rate": 4.152143651991812e-06, |
| "loss": 0.3398, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.20719353643236527, |
| "learning_rate": 4.142968791110455e-06, |
| "loss": 0.3376, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.8016, |
| "grad_norm": 0.21491856137955953, |
| "learning_rate": 4.133796902347397e-06, |
| "loss": 0.3389, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.8032, |
| "grad_norm": 0.2305197269214596, |
| "learning_rate": 4.124628017510043e-06, |
| "loss": 0.3818, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.8048, |
| "grad_norm": 0.22713600154562352, |
| "learning_rate": 4.115462168395382e-06, |
| "loss": 0.3453, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.8064, |
| "grad_norm": 0.2192635875890699, |
| "learning_rate": 4.106299386789873e-06, |
| "loss": 0.3487, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.2183158864390212, |
| "learning_rate": 4.097139704469337e-06, |
| "loss": 0.3737, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.8096, |
| "grad_norm": 0.2135448887212981, |
| "learning_rate": 4.0879831531988485e-06, |
| "loss": 0.3437, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.8112, |
| "grad_norm": 0.22942063071971094, |
| "learning_rate": 4.078829764732621e-06, |
| "loss": 0.3682, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.8128, |
| "grad_norm": 0.21664694865434705, |
| "learning_rate": 4.069679570813903e-06, |
| "loss": 0.3635, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.8144, |
| "grad_norm": 0.2132656909881387, |
| "learning_rate": 4.060532603174865e-06, |
| "loss": 0.3548, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 0.22611817805403897, |
| "learning_rate": 4.051388893536484e-06, |
| "loss": 0.3746, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.8176, |
| "grad_norm": 0.22865237002573302, |
| "learning_rate": 4.042248473608442e-06, |
| "loss": 0.3756, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.8192, |
| "grad_norm": 0.22282005215303108, |
| "learning_rate": 4.033111375089013e-06, |
| "loss": 0.3614, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.8208, |
| "grad_norm": 0.23208436102373642, |
| "learning_rate": 4.023977629664951e-06, |
| "loss": 0.3628, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.8224, |
| "grad_norm": 0.21887819975559739, |
| "learning_rate": 4.0148472690113845e-06, |
| "loss": 0.3552, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 0.2223476472822128, |
| "learning_rate": 4.0057203247917e-06, |
| "loss": 0.3632, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.8256000000000001, |
| "grad_norm": 0.22733253781244972, |
| "learning_rate": 3.996596828657437e-06, |
| "loss": 0.3546, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.8272, |
| "grad_norm": 0.22793946343148225, |
| "learning_rate": 3.987476812248181e-06, |
| "loss": 0.3531, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.8288, |
| "grad_norm": 0.24101260189330714, |
| "learning_rate": 3.978360307191444e-06, |
| "loss": 0.3627, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.8304, |
| "grad_norm": 0.20841630849723938, |
| "learning_rate": 3.969247345102567e-06, |
| "loss": 0.3456, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 0.22507138051620873, |
| "learning_rate": 3.960137957584601e-06, |
| "loss": 0.3779, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.8336000000000001, |
| "grad_norm": 0.22805048356690227, |
| "learning_rate": 3.9510321762282e-06, |
| "loss": 0.3615, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.8352, |
| "grad_norm": 0.22572528716946597, |
| "learning_rate": 3.941930032611513e-06, |
| "loss": 0.368, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.8368, |
| "grad_norm": 0.24822744293392016, |
| "learning_rate": 3.932831558300074e-06, |
| "loss": 0.3872, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.8384, |
| "grad_norm": 0.2281654113943852, |
| "learning_rate": 3.923736784846693e-06, |
| "loss": 0.3502, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.2213049124478571, |
| "learning_rate": 3.914645743791346e-06, |
| "loss": 0.3652, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.8416000000000001, |
| "grad_norm": 0.217329937600233, |
| "learning_rate": 3.90555846666106e-06, |
| "loss": 0.3659, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.8432, |
| "grad_norm": 0.22580179844231127, |
| "learning_rate": 3.896474984969817e-06, |
| "loss": 0.3761, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.8448, |
| "grad_norm": 0.22262784960113502, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.3675, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.8464, |
| "grad_norm": 0.23906164346393327, |
| "learning_rate": 3.878319533894443e-06, |
| "loss": 0.3971, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 0.2196815397631805, |
| "learning_rate": 3.869247627472021e-06, |
| "loss": 0.3783, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.8496000000000001, |
| "grad_norm": 0.22758429871542085, |
| "learning_rate": 3.860179642411838e-06, |
| "loss": 0.3668, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.8512, |
| "grad_norm": 0.2049467157281507, |
| "learning_rate": 3.851115610160967e-06, |
| "loss": 0.3352, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.8528, |
| "grad_norm": 0.2147614491753076, |
| "learning_rate": 3.842055562152775e-06, |
| "loss": 0.3633, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.8544, |
| "grad_norm": 0.2204867108284294, |
| "learning_rate": 3.8329995298068114e-06, |
| "loss": 0.357, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.22998111237125693, |
| "learning_rate": 3.8239475445287015e-06, |
| "loss": 0.3583, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.8576000000000001, |
| "grad_norm": 0.2298585480895593, |
| "learning_rate": 3.814899637710031e-06, |
| "loss": 0.3422, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.8592, |
| "grad_norm": 0.21114713881668376, |
| "learning_rate": 3.8058558407282465e-06, |
| "loss": 0.3781, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.8608, |
| "grad_norm": 0.2924962721636566, |
| "learning_rate": 3.7968161849465395e-06, |
| "loss": 0.356, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.8624, |
| "grad_norm": 0.22269680561351224, |
| "learning_rate": 3.78778070171374e-06, |
| "loss": 0.372, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.2119746838295717, |
| "learning_rate": 3.7787494223642096e-06, |
| "loss": 0.3625, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.8656000000000001, |
| "grad_norm": 0.20447842018564277, |
| "learning_rate": 3.7697223782177304e-06, |
| "loss": 0.3529, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.8672, |
| "grad_norm": 0.21873613008881324, |
| "learning_rate": 3.760699600579399e-06, |
| "loss": 0.3593, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.8688, |
| "grad_norm": 0.22661519728826032, |
| "learning_rate": 3.7516811207395116e-06, |
| "loss": 0.3727, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.8704, |
| "grad_norm": 0.2067234621443979, |
| "learning_rate": 3.742666969973463e-06, |
| "loss": 0.3608, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.22088652121542035, |
| "learning_rate": 3.733657179541635e-06, |
| "loss": 0.3432, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.8736000000000002, |
| "grad_norm": 0.21928409508020336, |
| "learning_rate": 3.724651780689286e-06, |
| "loss": 0.359, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.8752, |
| "grad_norm": 0.22244936389933476, |
| "learning_rate": 3.715650804646449e-06, |
| "loss": 0.3464, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.8768, |
| "grad_norm": 0.22248241269810634, |
| "learning_rate": 3.7066542826278153e-06, |
| "loss": 0.3661, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.8784, |
| "grad_norm": 0.2302524390014508, |
| "learning_rate": 3.6976622458326308e-06, |
| "loss": 0.3683, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.20951208393919515, |
| "learning_rate": 3.6886747254445877e-06, |
| "loss": 0.3582, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.8816000000000002, |
| "grad_norm": 0.2283999105426131, |
| "learning_rate": 3.6796917526317153e-06, |
| "loss": 0.3587, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.8832, |
| "grad_norm": 0.2233053573727507, |
| "learning_rate": 3.6707133585462713e-06, |
| "loss": 0.3705, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.8848, |
| "grad_norm": 0.21356216550023108, |
| "learning_rate": 3.6617395743246375e-06, |
| "loss": 0.36, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.8864, |
| "grad_norm": 0.22158853572314477, |
| "learning_rate": 3.652770431087206e-06, |
| "loss": 0.3682, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 0.21345316130597658, |
| "learning_rate": 3.6438059599382765e-06, |
| "loss": 0.367, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8896, |
| "grad_norm": 0.23205324769634803, |
| "learning_rate": 3.634846191965944e-06, |
| "loss": 0.3773, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.8912, |
| "grad_norm": 0.2176407875502435, |
| "learning_rate": 3.625891158241994e-06, |
| "loss": 0.3545, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.8928, |
| "grad_norm": 0.21589999391277886, |
| "learning_rate": 3.6169408898217973e-06, |
| "loss": 0.357, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.8944, |
| "grad_norm": 0.21961746711824684, |
| "learning_rate": 3.6079954177441945e-06, |
| "loss": 0.3714, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.2165582928709385, |
| "learning_rate": 3.599054773031394e-06, |
| "loss": 0.374, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.8976, |
| "grad_norm": 0.20690186530477075, |
| "learning_rate": 3.5901189866888654e-06, |
| "loss": 0.3376, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.8992, |
| "grad_norm": 0.2214499929255314, |
| "learning_rate": 3.581188089705226e-06, |
| "loss": 0.3652, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.9008, |
| "grad_norm": 0.21303532364136288, |
| "learning_rate": 3.572262113052142e-06, |
| "loss": 0.3495, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.9024, |
| "grad_norm": 0.2151151470646037, |
| "learning_rate": 3.563341087684213e-06, |
| "loss": 0.3531, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.23221947889737432, |
| "learning_rate": 3.554425044538868e-06, |
| "loss": 0.3522, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.9056, |
| "grad_norm": 0.2627996452421008, |
| "learning_rate": 3.5455140145362587e-06, |
| "loss": 0.3552, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.9072, |
| "grad_norm": 0.2208841023265633, |
| "learning_rate": 3.5366080285791516e-06, |
| "loss": 0.3491, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.9088, |
| "grad_norm": 0.23151652749208435, |
| "learning_rate": 3.527707117552822e-06, |
| "loss": 0.3753, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.9104, |
| "grad_norm": 0.21615879576442829, |
| "learning_rate": 3.5188113123249435e-06, |
| "loss": 0.3401, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 0.26885645942033615, |
| "learning_rate": 3.5099206437454852e-06, |
| "loss": 0.3883, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.9136, |
| "grad_norm": 0.2376378896018881, |
| "learning_rate": 3.5010351426466006e-06, |
| "loss": 0.3428, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.9152, |
| "grad_norm": 0.217377438962957, |
| "learning_rate": 3.4921548398425246e-06, |
| "loss": 0.3417, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.9167999999999998, |
| "grad_norm": 0.23786656619882812, |
| "learning_rate": 3.4832797661294633e-06, |
| "loss": 0.3704, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.9184, |
| "grad_norm": 0.22221503384957303, |
| "learning_rate": 3.4744099522854914e-06, |
| "loss": 0.3488, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.22512991707982286, |
| "learning_rate": 3.4655454290704393e-06, |
| "loss": 0.3734, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.9216, |
| "grad_norm": 0.21660263432390536, |
| "learning_rate": 3.4566862272257923e-06, |
| "loss": 0.367, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.9232, |
| "grad_norm": 0.2119471483223747, |
| "learning_rate": 3.44783237747458e-06, |
| "loss": 0.3355, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.9247999999999998, |
| "grad_norm": 0.20311475117262084, |
| "learning_rate": 3.438983910521273e-06, |
| "loss": 0.3346, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.9264000000000001, |
| "grad_norm": 0.21410533708947724, |
| "learning_rate": 3.430140857051675e-06, |
| "loss": 0.3688, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.22332680316127393, |
| "learning_rate": 3.421303247732813e-06, |
| "loss": 0.3531, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.9296, |
| "grad_norm": 0.2085857623224285, |
| "learning_rate": 3.4124711132128374e-06, |
| "loss": 0.3389, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.9312, |
| "grad_norm": 0.26571832696528896, |
| "learning_rate": 3.4036444841209113e-06, |
| "loss": 0.3756, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.9327999999999999, |
| "grad_norm": 0.2130287654297381, |
| "learning_rate": 3.3948233910671036e-06, |
| "loss": 0.3589, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.9344000000000001, |
| "grad_norm": 0.21446924880206542, |
| "learning_rate": 3.3860078646422894e-06, |
| "loss": 0.3511, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 0.2093640450149012, |
| "learning_rate": 3.3771979354180343e-06, |
| "loss": 0.3514, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.9376, |
| "grad_norm": 0.22492407475854545, |
| "learning_rate": 3.3683936339464957e-06, |
| "loss": 0.376, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.9392, |
| "grad_norm": 0.2463678517545241, |
| "learning_rate": 3.359594990760313e-06, |
| "loss": 0.3505, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.9407999999999999, |
| "grad_norm": 0.2067986016966478, |
| "learning_rate": 3.3508020363725043e-06, |
| "loss": 0.3341, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.9424000000000001, |
| "grad_norm": 0.21579598196071736, |
| "learning_rate": 3.34201480127636e-06, |
| "loss": 0.3414, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 0.22652277437415505, |
| "learning_rate": 3.333233315945333e-06, |
| "loss": 0.3777, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.9456, |
| "grad_norm": 0.21357643024734985, |
| "learning_rate": 3.324457610832942e-06, |
| "loss": 0.3377, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.9472, |
| "grad_norm": 0.21393005346471314, |
| "learning_rate": 3.315687716372655e-06, |
| "loss": 0.3619, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.9487999999999999, |
| "grad_norm": 0.2253913033170178, |
| "learning_rate": 3.306923662977789e-06, |
| "loss": 0.3351, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.9504000000000001, |
| "grad_norm": 0.24620338571328337, |
| "learning_rate": 3.2981654810414128e-06, |
| "loss": 0.397, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.20925535521897962, |
| "learning_rate": 3.2894132009362245e-06, |
| "loss": 0.3552, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.9536, |
| "grad_norm": 0.2112799846667196, |
| "learning_rate": 3.280666853014457e-06, |
| "loss": 0.347, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.9552, |
| "grad_norm": 0.23207842293618677, |
| "learning_rate": 3.271926467607774e-06, |
| "loss": 0.3375, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.9567999999999999, |
| "grad_norm": 0.22270028720829269, |
| "learning_rate": 3.2631920750271594e-06, |
| "loss": 0.3562, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.9584000000000001, |
| "grad_norm": 0.22170831494071339, |
| "learning_rate": 3.2544637055628135e-06, |
| "loss": 0.3724, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.2292226828146242, |
| "learning_rate": 3.2457413894840516e-06, |
| "loss": 0.3625, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.9616, |
| "grad_norm": 0.22765941476933654, |
| "learning_rate": 3.2370251570391933e-06, |
| "loss": 0.3864, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.9632, |
| "grad_norm": 0.298984072042988, |
| "learning_rate": 3.2283150384554642e-06, |
| "loss": 0.3708, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.9647999999999999, |
| "grad_norm": 0.22999695809226445, |
| "learning_rate": 3.219611063938883e-06, |
| "loss": 0.3804, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.9664000000000001, |
| "grad_norm": 0.22823641204274234, |
| "learning_rate": 3.210913263674166e-06, |
| "loss": 0.3557, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 0.22390470255411435, |
| "learning_rate": 3.2022216678246145e-06, |
| "loss": 0.364, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.9696, |
| "grad_norm": 0.22138136049821505, |
| "learning_rate": 3.193536306532013e-06, |
| "loss": 0.3602, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.9712, |
| "grad_norm": 0.21649210083168077, |
| "learning_rate": 3.184857209916528e-06, |
| "loss": 0.3432, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.9727999999999999, |
| "grad_norm": 0.22884157182692041, |
| "learning_rate": 3.1761844080765993e-06, |
| "loss": 0.3717, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.9744000000000002, |
| "grad_norm": 0.2161274024734588, |
| "learning_rate": 3.1675179310888344e-06, |
| "loss": 0.3496, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 0.2147654204377238, |
| "learning_rate": 3.15885780900791e-06, |
| "loss": 0.3569, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.9776, |
| "grad_norm": 0.21578380439483977, |
| "learning_rate": 3.150204071866464e-06, |
| "loss": 0.3602, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.9792, |
| "grad_norm": 0.22727490236903292, |
| "learning_rate": 3.141556749674988e-06, |
| "loss": 0.3497, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.9808, |
| "grad_norm": 0.2098202941370039, |
| "learning_rate": 3.132915872421734e-06, |
| "loss": 0.3476, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.9824000000000002, |
| "grad_norm": 0.21288672787074664, |
| "learning_rate": 3.1242814700725977e-06, |
| "loss": 0.3405, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.22713927780876544, |
| "learning_rate": 3.1156535725710224e-06, |
| "loss": 0.3627, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.9856, |
| "grad_norm": 0.21725193077250074, |
| "learning_rate": 3.1070322098378925e-06, |
| "loss": 0.3632, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.9872, |
| "grad_norm": 0.22085999558751024, |
| "learning_rate": 3.0984174117714306e-06, |
| "loss": 0.366, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.9888, |
| "grad_norm": 0.24401647542401939, |
| "learning_rate": 3.0898092082470943e-06, |
| "loss": 0.3993, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.9904, |
| "grad_norm": 0.23616145724412238, |
| "learning_rate": 3.081207629117472e-06, |
| "loss": 0.3857, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.21704054601451872, |
| "learning_rate": 3.0726127042121766e-06, |
| "loss": 0.359, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.9936, |
| "grad_norm": 0.22325041381983032, |
| "learning_rate": 3.064024463337747e-06, |
| "loss": 0.371, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.9952, |
| "grad_norm": 0.22500249116494242, |
| "learning_rate": 3.0554429362775417e-06, |
| "loss": 0.3609, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.9968, |
| "grad_norm": 0.22591289751379648, |
| "learning_rate": 3.046868152791638e-06, |
| "loss": 0.3639, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.9984, |
| "grad_norm": 0.2371139627803822, |
| "learning_rate": 3.038300142616723e-06, |
| "loss": 0.3672, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.20652964586164127, |
| "learning_rate": 3.0297389354659984e-06, |
| "loss": 0.3242, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.0016, |
| "grad_norm": 0.2381408269946942, |
| "learning_rate": 3.021184561029071e-06, |
| "loss": 0.3232, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.0032, |
| "grad_norm": 0.24116531627155727, |
| "learning_rate": 3.0126370489718537e-06, |
| "loss": 0.3208, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.0048, |
| "grad_norm": 0.24160531683767364, |
| "learning_rate": 3.0040964289364618e-06, |
| "loss": 0.3411, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.0064, |
| "grad_norm": 0.20932314795573098, |
| "learning_rate": 2.9955627305411074e-06, |
| "loss": 0.2956, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 0.23324083636464982, |
| "learning_rate": 2.9870359833799994e-06, |
| "loss": 0.3311, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.0096, |
| "grad_norm": 0.25727571347501993, |
| "learning_rate": 2.978516217023243e-06, |
| "loss": 0.3355, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.0112, |
| "grad_norm": 0.2354103544982331, |
| "learning_rate": 2.97000346101673e-06, |
| "loss": 0.321, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.0128, |
| "grad_norm": 0.24395290619479554, |
| "learning_rate": 2.9614977448820444e-06, |
| "loss": 0.3329, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.0144, |
| "grad_norm": 0.23272621075618968, |
| "learning_rate": 2.952999098116356e-06, |
| "loss": 0.3122, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 0.24419786371215577, |
| "learning_rate": 2.944507550192318e-06, |
| "loss": 0.3356, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.0176, |
| "grad_norm": 0.24470680617346624, |
| "learning_rate": 2.9360231305579645e-06, |
| "loss": 0.345, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.0192, |
| "grad_norm": 0.23146566014395697, |
| "learning_rate": 2.9275458686366108e-06, |
| "loss": 0.3215, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.0208, |
| "grad_norm": 0.28479337224099127, |
| "learning_rate": 2.9190757938267477e-06, |
| "loss": 0.3069, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.0224, |
| "grad_norm": 0.23502733412245694, |
| "learning_rate": 2.9106129355019464e-06, |
| "loss": 0.315, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 0.23447454690958286, |
| "learning_rate": 2.9021573230107436e-06, |
| "loss": 0.3229, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.0256, |
| "grad_norm": 0.238409543367168, |
| "learning_rate": 2.8937089856765564e-06, |
| "loss": 0.3117, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.0272, |
| "grad_norm": 0.2204669827859715, |
| "learning_rate": 2.885267952797569e-06, |
| "loss": 0.2952, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.0288, |
| "grad_norm": 0.22535885152662152, |
| "learning_rate": 2.876834253646631e-06, |
| "loss": 0.3018, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.0304, |
| "grad_norm": 0.2176494762200902, |
| "learning_rate": 2.8684079174711665e-06, |
| "loss": 0.2888, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.22489296871873812, |
| "learning_rate": 2.8599889734930548e-06, |
| "loss": 0.3013, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.0336, |
| "grad_norm": 0.24396149437011996, |
| "learning_rate": 2.8515774509085535e-06, |
| "loss": 0.3405, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.0352, |
| "grad_norm": 0.23432149229696977, |
| "learning_rate": 2.8431733788881703e-06, |
| "loss": 0.3323, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.0368, |
| "grad_norm": 0.24124838768766604, |
| "learning_rate": 2.8347767865765828e-06, |
| "loss": 0.3123, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.0384, |
| "grad_norm": 0.2309713006648156, |
| "learning_rate": 2.826387703092528e-06, |
| "loss": 0.3305, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.23586081757042907, |
| "learning_rate": 2.8180061575286995e-06, |
| "loss": 0.3333, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.0416, |
| "grad_norm": 0.2327008581898183, |
| "learning_rate": 2.8096321789516557e-06, |
| "loss": 0.344, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.0432, |
| "grad_norm": 0.23819176666638256, |
| "learning_rate": 2.8012657964017056e-06, |
| "loss": 0.336, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.0448, |
| "grad_norm": 0.2187227763990159, |
| "learning_rate": 2.792907038892823e-06, |
| "loss": 0.3184, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.0464, |
| "grad_norm": 0.21145131265998782, |
| "learning_rate": 2.784555935412538e-06, |
| "loss": 0.285, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 0.23085790946303078, |
| "learning_rate": 2.77621251492183e-06, |
| "loss": 0.3288, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.0496, |
| "grad_norm": 0.22128401134570433, |
| "learning_rate": 2.7678768063550454e-06, |
| "loss": 0.3063, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.0512, |
| "grad_norm": 0.2379051826096784, |
| "learning_rate": 2.759548838619774e-06, |
| "loss": 0.3281, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.0528, |
| "grad_norm": 0.2307193630585529, |
| "learning_rate": 2.7512286405967726e-06, |
| "loss": 0.3206, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.0544, |
| "grad_norm": 0.23275221947927213, |
| "learning_rate": 2.742916241139843e-06, |
| "loss": 0.3085, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 0.23459850183642791, |
| "learning_rate": 2.7346116690757496e-06, |
| "loss": 0.3285, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.0576, |
| "grad_norm": 0.2307107789992749, |
| "learning_rate": 2.726314953204111e-06, |
| "loss": 0.3123, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.0592, |
| "grad_norm": 0.2289309567318887, |
| "learning_rate": 2.718026122297297e-06, |
| "loss": 0.3273, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.0608, |
| "grad_norm": 0.2387467447336502, |
| "learning_rate": 2.7097452051003375e-06, |
| "loss": 0.3514, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.0624, |
| "grad_norm": 0.22247902236205225, |
| "learning_rate": 2.701472230330813e-06, |
| "loss": 0.3261, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 0.2243380823257603, |
| "learning_rate": 2.693207226678767e-06, |
| "loss": 0.3285, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.0656, |
| "grad_norm": 0.22050075176231151, |
| "learning_rate": 2.684950222806596e-06, |
| "loss": 0.3301, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.0672, |
| "grad_norm": 0.28426237474190064, |
| "learning_rate": 2.676701247348951e-06, |
| "loss": 0.334, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.0688, |
| "grad_norm": 0.22560724697055137, |
| "learning_rate": 2.6684603289126492e-06, |
| "loss": 0.3288, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.0704, |
| "grad_norm": 0.23518027537446703, |
| "learning_rate": 2.660227496076557e-06, |
| "loss": 0.3309, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 0.23148194402135147, |
| "learning_rate": 2.6520027773915075e-06, |
| "loss": 0.3452, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.0736, |
| "grad_norm": 0.2363649082231482, |
| "learning_rate": 2.643786201380194e-06, |
| "loss": 0.328, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.0752, |
| "grad_norm": 0.22102637508481368, |
| "learning_rate": 2.6355777965370665e-06, |
| "loss": 0.33, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.0768, |
| "grad_norm": 0.22230793136372923, |
| "learning_rate": 2.627377591328245e-06, |
| "loss": 0.3213, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.0784, |
| "grad_norm": 0.24758321652662485, |
| "learning_rate": 2.6191856141914074e-06, |
| "loss": 0.3253, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.24493988486380852, |
| "learning_rate": 2.6110018935357005e-06, |
| "loss": 0.363, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.0816, |
| "grad_norm": 0.2870857774293416, |
| "learning_rate": 2.6028264577416418e-06, |
| "loss": 0.3476, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.0832, |
| "grad_norm": 0.228305119519321, |
| "learning_rate": 2.594659335161008e-06, |
| "loss": 0.3135, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.0848, |
| "grad_norm": 0.23140826010366367, |
| "learning_rate": 2.586500554116757e-06, |
| "loss": 0.3252, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.0864, |
| "grad_norm": 0.250970075734723, |
| "learning_rate": 2.578350142902909e-06, |
| "loss": 0.3429, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 0.23549025084521302, |
| "learning_rate": 2.570208129784466e-06, |
| "loss": 0.3395, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.0896, |
| "grad_norm": 0.2213497917027268, |
| "learning_rate": 2.562074542997305e-06, |
| "loss": 0.3233, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.0912, |
| "grad_norm": 0.22820004183669304, |
| "learning_rate": 2.5539494107480746e-06, |
| "loss": 0.3264, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.0928, |
| "grad_norm": 0.2252316061807066, |
| "learning_rate": 2.545832761214112e-06, |
| "loss": 0.3221, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.0944, |
| "grad_norm": 0.22629007706108822, |
| "learning_rate": 2.5377246225433306e-06, |
| "loss": 0.3194, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.21925332589204724, |
| "learning_rate": 2.529625022854133e-06, |
| "loss": 0.317, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.0976, |
| "grad_norm": 0.24420567519299283, |
| "learning_rate": 2.5215339902353097e-06, |
| "loss": 0.3039, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.0992, |
| "grad_norm": 0.22920397777414803, |
| "learning_rate": 2.513451552745936e-06, |
| "loss": 0.3247, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.1008, |
| "grad_norm": 0.24742834625853843, |
| "learning_rate": 2.505377738415286e-06, |
| "loss": 0.3377, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.1024, |
| "grad_norm": 0.24153973541218082, |
| "learning_rate": 2.4973125752427243e-06, |
| "loss": 0.3501, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 0.23560513248089243, |
| "learning_rate": 2.4892560911976167e-06, |
| "loss": 0.3263, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.1056, |
| "grad_norm": 0.2353208266305718, |
| "learning_rate": 2.481208314219233e-06, |
| "loss": 0.3069, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.1072, |
| "grad_norm": 0.23138909874610297, |
| "learning_rate": 2.4731692722166408e-06, |
| "loss": 0.3207, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.1088, |
| "grad_norm": 0.22317666739777714, |
| "learning_rate": 2.4651389930686226e-06, |
| "loss": 0.3056, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.1104, |
| "grad_norm": 0.22106525546786102, |
| "learning_rate": 2.457117504623565e-06, |
| "loss": 0.3372, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.22481925502590314, |
| "learning_rate": 2.4491048346993756e-06, |
| "loss": 0.3216, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.1136, |
| "grad_norm": 0.39488221048477495, |
| "learning_rate": 2.4411010110833783e-06, |
| "loss": 0.3287, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.1152, |
| "grad_norm": 0.23832989717536432, |
| "learning_rate": 2.433106061532214e-06, |
| "loss": 0.3287, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.1168, |
| "grad_norm": 0.22694471674758826, |
| "learning_rate": 2.4251200137717545e-06, |
| "loss": 0.3355, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.1184, |
| "grad_norm": 0.21704315018141437, |
| "learning_rate": 2.4171428954969995e-06, |
| "loss": 0.326, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.2401203272506689, |
| "learning_rate": 2.4091747343719828e-06, |
| "loss": 0.31, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.1216, |
| "grad_norm": 0.23563685259687123, |
| "learning_rate": 2.401215558029671e-06, |
| "loss": 0.3109, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.1232, |
| "grad_norm": 0.2243757747086668, |
| "learning_rate": 2.3932653940718784e-06, |
| "loss": 0.3162, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.1248, |
| "grad_norm": 0.21814104588506608, |
| "learning_rate": 2.3853242700691594e-06, |
| "loss": 0.3126, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.1264, |
| "grad_norm": 0.23727009986159658, |
| "learning_rate": 2.3773922135607217e-06, |
| "loss": 0.3296, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.24055774932814053, |
| "learning_rate": 2.3694692520543293e-06, |
| "loss": 0.3014, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.1296, |
| "grad_norm": 0.22246223642142215, |
| "learning_rate": 2.3615554130262003e-06, |
| "loss": 0.3352, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.1312, |
| "grad_norm": 0.22805307302382793, |
| "learning_rate": 2.3536507239209223e-06, |
| "loss": 0.3211, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.1328, |
| "grad_norm": 0.23357558053070468, |
| "learning_rate": 2.3457552121513455e-06, |
| "loss": 0.3187, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.1344, |
| "grad_norm": 0.2243389119974022, |
| "learning_rate": 2.337868905098499e-06, |
| "loss": 0.3072, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 0.2373574579022331, |
| "learning_rate": 2.329991830111492e-06, |
| "loss": 0.3325, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.1376, |
| "grad_norm": 0.23331122020213832, |
| "learning_rate": 2.32212401450741e-06, |
| "loss": 0.3, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.1391999999999998, |
| "grad_norm": 0.23854473809909968, |
| "learning_rate": 2.3142654855712353e-06, |
| "loss": 0.3269, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.1408, |
| "grad_norm": 0.22853512447460314, |
| "learning_rate": 2.3064162705557387e-06, |
| "loss": 0.3161, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.1424, |
| "grad_norm": 0.23296190304986167, |
| "learning_rate": 2.2985763966813963e-06, |
| "loss": 0.3342, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.24834562780988573, |
| "learning_rate": 2.2907458911362885e-06, |
| "loss": 0.3366, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.1456, |
| "grad_norm": 0.25433924570682526, |
| "learning_rate": 2.2829247810760023e-06, |
| "loss": 0.3471, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.1471999999999998, |
| "grad_norm": 0.22492583124628163, |
| "learning_rate": 2.275113093623551e-06, |
| "loss": 0.3139, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.1488, |
| "grad_norm": 0.2343628341986889, |
| "learning_rate": 2.2673108558692603e-06, |
| "loss": 0.3159, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.1504, |
| "grad_norm": 0.22573518172167267, |
| "learning_rate": 2.259518094870693e-06, |
| "loss": 0.3151, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 0.23401747086505248, |
| "learning_rate": 2.251734837652547e-06, |
| "loss": 0.3527, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.1536, |
| "grad_norm": 0.2154175778690248, |
| "learning_rate": 2.243961111206555e-06, |
| "loss": 0.2956, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.1552, |
| "grad_norm": 0.2430165522849067, |
| "learning_rate": 2.236196942491407e-06, |
| "loss": 0.3174, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.1568, |
| "grad_norm": 0.23601240333106377, |
| "learning_rate": 2.228442358432638e-06, |
| "loss": 0.3102, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.1584, |
| "grad_norm": 0.2323596305984812, |
| "learning_rate": 2.2206973859225518e-06, |
| "loss": 0.3298, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.21189773305084328, |
| "learning_rate": 2.2129620518201184e-06, |
| "loss": 0.3134, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.1616, |
| "grad_norm": 0.22735589386232072, |
| "learning_rate": 2.2052363829508776e-06, |
| "loss": 0.3294, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.1632, |
| "grad_norm": 0.22606396712369073, |
| "learning_rate": 2.1975204061068594e-06, |
| "loss": 0.3393, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.1648, |
| "grad_norm": 0.22905191723674118, |
| "learning_rate": 2.189814148046473e-06, |
| "loss": 0.3323, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.1664, |
| "grad_norm": 0.21723187141262634, |
| "learning_rate": 2.182117635494431e-06, |
| "loss": 0.308, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 0.21414225207029255, |
| "learning_rate": 2.1744308951416483e-06, |
| "loss": 0.3181, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.1696, |
| "grad_norm": 0.2210654577774532, |
| "learning_rate": 2.1667539536451455e-06, |
| "loss": 0.3214, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.1712, |
| "grad_norm": 0.22464847644861796, |
| "learning_rate": 2.1590868376279693e-06, |
| "loss": 0.3248, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.1728, |
| "grad_norm": 0.21976106643273627, |
| "learning_rate": 2.151429573679084e-06, |
| "loss": 0.3312, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.1744, |
| "grad_norm": 0.22093060278403948, |
| "learning_rate": 2.1437821883532956e-06, |
| "loss": 0.2921, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.2367489501252667, |
| "learning_rate": 2.136144708171145e-06, |
| "loss": 0.3273, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.1776, |
| "grad_norm": 0.2506619488598268, |
| "learning_rate": 2.128517159618827e-06, |
| "loss": 0.3388, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.1792, |
| "grad_norm": 0.22321368446621537, |
| "learning_rate": 2.1208995691480947e-06, |
| "loss": 0.3105, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.1808, |
| "grad_norm": 0.2094928155897176, |
| "learning_rate": 2.1132919631761637e-06, |
| "loss": 0.2974, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.1824, |
| "grad_norm": 0.217177065974996, |
| "learning_rate": 2.1056943680856286e-06, |
| "loss": 0.3196, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 0.2261053763612296, |
| "learning_rate": 2.098106810224362e-06, |
| "loss": 0.3058, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.1856, |
| "grad_norm": 0.22901397841864793, |
| "learning_rate": 2.0905293159054315e-06, |
| "loss": 0.3208, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.1872, |
| "grad_norm": 0.25236378462495745, |
| "learning_rate": 2.0829619114070068e-06, |
| "loss": 0.314, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.1888, |
| "grad_norm": 0.2245620122339832, |
| "learning_rate": 2.075404622972261e-06, |
| "loss": 0.3184, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.1904, |
| "grad_norm": 0.20757081079637762, |
| "learning_rate": 2.0678574768092926e-06, |
| "loss": 0.3006, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.22196666232516493, |
| "learning_rate": 2.0603204990910195e-06, |
| "loss": 0.3021, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.1936, |
| "grad_norm": 0.23300716836685179, |
| "learning_rate": 2.0527937159551044e-06, |
| "loss": 0.33, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.1952, |
| "grad_norm": 0.22162466894296373, |
| "learning_rate": 2.0452771535038518e-06, |
| "loss": 0.3301, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.1968, |
| "grad_norm": 0.23033884780072156, |
| "learning_rate": 2.03777083780412e-06, |
| "loss": 0.3318, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.1984, |
| "grad_norm": 0.22451435916815873, |
| "learning_rate": 2.030274794887237e-06, |
| "loss": 0.3248, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.216423464190513, |
| "learning_rate": 2.0227890507488993e-06, |
| "loss": 0.299, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.2016, |
| "grad_norm": 0.22124556013917196, |
| "learning_rate": 2.0153136313490945e-06, |
| "loss": 0.2948, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.2032, |
| "grad_norm": 0.2336545100462845, |
| "learning_rate": 2.0078485626120015e-06, |
| "loss": 0.3152, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.2048, |
| "grad_norm": 0.22511204299495455, |
| "learning_rate": 2.000393870425904e-06, |
| "loss": 0.3292, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.2064, |
| "grad_norm": 0.23475428261778314, |
| "learning_rate": 1.9929495806431024e-06, |
| "loss": 0.3263, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.23136490129641113, |
| "learning_rate": 1.985515719079819e-06, |
| "loss": 0.3296, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.2096, |
| "grad_norm": 0.21869302405407526, |
| "learning_rate": 1.978092311516116e-06, |
| "loss": 0.3311, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.2112, |
| "grad_norm": 0.2170630427438648, |
| "learning_rate": 1.9706793836957964e-06, |
| "loss": 0.3187, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.2128, |
| "grad_norm": 0.251986491832162, |
| "learning_rate": 1.963276961326326e-06, |
| "loss": 0.3304, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.2144, |
| "grad_norm": 0.21937945489073496, |
| "learning_rate": 1.955885070078737e-06, |
| "loss": 0.3239, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 0.23471871490893748, |
| "learning_rate": 1.948503735587537e-06, |
| "loss": 0.3334, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.2176, |
| "grad_norm": 0.2154762296863616, |
| "learning_rate": 1.9411329834506286e-06, |
| "loss": 0.2925, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.2192, |
| "grad_norm": 0.22068563909697667, |
| "learning_rate": 1.9337728392292104e-06, |
| "loss": 0.3085, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.2208, |
| "grad_norm": 0.21701644675175985, |
| "learning_rate": 1.926423328447698e-06, |
| "loss": 0.3076, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.2224, |
| "grad_norm": 0.2276024670857963, |
| "learning_rate": 1.919084476593631e-06, |
| "loss": 0.3153, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 0.21353276098175183, |
| "learning_rate": 1.9117563091175795e-06, |
| "loss": 0.2986, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.2256, |
| "grad_norm": 0.22929476819926245, |
| "learning_rate": 1.904438851433068e-06, |
| "loss": 0.33, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.2272, |
| "grad_norm": 0.2297200561235875, |
| "learning_rate": 1.897132128916474e-06, |
| "loss": 0.3323, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.2288, |
| "grad_norm": 0.25318275533651696, |
| "learning_rate": 1.8898361669069497e-06, |
| "loss": 0.3295, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.2304, |
| "grad_norm": 0.2305951003066183, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.3146, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 0.22532833476558872, |
| "learning_rate": 1.87527662557905e-06, |
| "loss": 0.3381, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.2336, |
| "grad_norm": 0.21933040543940893, |
| "learning_rate": 1.8680130967520433e-06, |
| "loss": 0.3242, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.2352, |
| "grad_norm": 0.2168283320881497, |
| "learning_rate": 1.8607604294146685e-06, |
| "loss": 0.314, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.2368, |
| "grad_norm": 0.2230066332576956, |
| "learning_rate": 1.8535186487186213e-06, |
| "loss": 0.3156, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.2384, |
| "grad_norm": 0.22763395143246054, |
| "learning_rate": 1.8462877797778367e-06, |
| "loss": 0.329, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.21677785466373853, |
| "learning_rate": 1.8390678476684143e-06, |
| "loss": 0.3106, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.2416, |
| "grad_norm": 0.22307940809041352, |
| "learning_rate": 1.831858877428524e-06, |
| "loss": 0.3164, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.2432, |
| "grad_norm": 0.21963817089148155, |
| "learning_rate": 1.8246608940583166e-06, |
| "loss": 0.3104, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.2448, |
| "grad_norm": 0.2215758955804988, |
| "learning_rate": 1.8174739225198485e-06, |
| "loss": 0.3245, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.2464, |
| "grad_norm": 0.24022324081720273, |
| "learning_rate": 1.8102979877369808e-06, |
| "loss": 0.3312, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 0.21584959994626818, |
| "learning_rate": 1.8031331145953047e-06, |
| "loss": 0.3075, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.2496, |
| "grad_norm": 0.2255865614658856, |
| "learning_rate": 1.7959793279420507e-06, |
| "loss": 0.317, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.2512, |
| "grad_norm": 0.22182094329440188, |
| "learning_rate": 1.7888366525859968e-06, |
| "loss": 0.309, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.2528, |
| "grad_norm": 0.22619389697900474, |
| "learning_rate": 1.781705113297396e-06, |
| "loss": 0.3274, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.2544, |
| "grad_norm": 0.23229801618980359, |
| "learning_rate": 1.7745847348078742e-06, |
| "loss": 0.3077, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.23344138484096072, |
| "learning_rate": 1.7674755418103578e-06, |
| "loss": 0.3316, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.2576, |
| "grad_norm": 0.22436114708874488, |
| "learning_rate": 1.7603775589589821e-06, |
| "loss": 0.3058, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.2592, |
| "grad_norm": 0.22931155534593328, |
| "learning_rate": 1.7532908108690038e-06, |
| "loss": 0.328, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.2608, |
| "grad_norm": 0.22131178208719418, |
| "learning_rate": 1.7462153221167222e-06, |
| "loss": 0.3174, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.2624, |
| "grad_norm": 0.22983402286450955, |
| "learning_rate": 1.7391511172393849e-06, |
| "loss": 0.2927, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 0.22218155846230692, |
| "learning_rate": 1.7320982207351128e-06, |
| "loss": 0.3221, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.2656, |
| "grad_norm": 0.25789612888289554, |
| "learning_rate": 1.7250566570628103e-06, |
| "loss": 0.3225, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.2672, |
| "grad_norm": 0.22688159815866837, |
| "learning_rate": 1.7180264506420746e-06, |
| "loss": 0.325, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.2688, |
| "grad_norm": 0.24156552429110179, |
| "learning_rate": 1.7110076258531244e-06, |
| "loss": 0.3198, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.2704, |
| "grad_norm": 0.23844492182489876, |
| "learning_rate": 1.7040002070367006e-06, |
| "loss": 0.3214, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.22703303439891687, |
| "learning_rate": 1.6970042184939943e-06, |
| "loss": 0.334, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.2736, |
| "grad_norm": 0.21795242762102673, |
| "learning_rate": 1.6900196844865575e-06, |
| "loss": 0.2961, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.2752, |
| "grad_norm": 0.24950555345478417, |
| "learning_rate": 1.683046629236213e-06, |
| "loss": 0.301, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.2768, |
| "grad_norm": 0.22848184287089418, |
| "learning_rate": 1.6760850769249837e-06, |
| "loss": 0.3136, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.2784, |
| "grad_norm": 0.22942194607369032, |
| "learning_rate": 1.669135051694994e-06, |
| "loss": 0.3093, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 0.21915432852322136, |
| "learning_rate": 1.662196577648398e-06, |
| "loss": 0.2982, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.2816, |
| "grad_norm": 0.2237697198087632, |
| "learning_rate": 1.6552696788472921e-06, |
| "loss": 0.3172, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.2832, |
| "grad_norm": 0.24117837879923246, |
| "learning_rate": 1.6483543793136247e-06, |
| "loss": 0.3187, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.2848, |
| "grad_norm": 0.22431560342283713, |
| "learning_rate": 1.6414507030291249e-06, |
| "loss": 0.3239, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.2864, |
| "grad_norm": 0.23676405192865027, |
| "learning_rate": 1.6345586739352105e-06, |
| "loss": 0.3116, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 0.2451281685102598, |
| "learning_rate": 1.6276783159329095e-06, |
| "loss": 0.3133, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.2896, |
| "grad_norm": 0.24120740666514667, |
| "learning_rate": 1.6208096528827717e-06, |
| "loss": 0.3264, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.2912, |
| "grad_norm": 0.24528012239799285, |
| "learning_rate": 1.6139527086047929e-06, |
| "loss": 0.3493, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.2928, |
| "grad_norm": 0.2556210653183845, |
| "learning_rate": 1.6071075068783303e-06, |
| "loss": 0.3045, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.2944, |
| "grad_norm": 0.2287100523215139, |
| "learning_rate": 1.600274071442014e-06, |
| "loss": 0.338, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 0.21906480407182, |
| "learning_rate": 1.5934524259936757e-06, |
| "loss": 0.3286, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.2976, |
| "grad_norm": 0.21929769109738279, |
| "learning_rate": 1.5866425941902524e-06, |
| "loss": 0.329, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.2992, |
| "grad_norm": 0.23363696349663418, |
| "learning_rate": 1.5798445996477219e-06, |
| "loss": 0.3549, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.3008, |
| "grad_norm": 0.216140553035713, |
| "learning_rate": 1.573058465941002e-06, |
| "loss": 0.3067, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.3024, |
| "grad_norm": 0.2374732834670114, |
| "learning_rate": 1.5662842166038844e-06, |
| "loss": 0.3379, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.2432191674689953, |
| "learning_rate": 1.5595218751289465e-06, |
| "loss": 0.3286, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.3056, |
| "grad_norm": 0.20938741159504984, |
| "learning_rate": 1.5527714649674641e-06, |
| "loss": 0.3019, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.3072, |
| "grad_norm": 0.2208048308162257, |
| "learning_rate": 1.5460330095293447e-06, |
| "loss": 0.3436, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.3088, |
| "grad_norm": 0.2250399077592474, |
| "learning_rate": 1.5393065321830292e-06, |
| "loss": 0.338, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.3104, |
| "grad_norm": 0.21722229965056644, |
| "learning_rate": 1.5325920562554259e-06, |
| "loss": 0.3273, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 0.21409224041501637, |
| "learning_rate": 1.5258896050318217e-06, |
| "loss": 0.2925, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.3136, |
| "grad_norm": 0.22042386355025897, |
| "learning_rate": 1.5191992017557994e-06, |
| "loss": 0.3289, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.3152, |
| "grad_norm": 0.2752662336672433, |
| "learning_rate": 1.512520869629165e-06, |
| "loss": 0.3235, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.3168, |
| "grad_norm": 0.24608400971801628, |
| "learning_rate": 1.5058546318118583e-06, |
| "loss": 0.3264, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.3184, |
| "grad_norm": 0.23076364816199196, |
| "learning_rate": 1.4992005114218805e-06, |
| "loss": 0.3105, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.2430527676739228, |
| "learning_rate": 1.4925585315352108e-06, |
| "loss": 0.3268, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.3216, |
| "grad_norm": 2.0905507353266004, |
| "learning_rate": 1.485928715185721e-06, |
| "loss": 0.3363, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.3232, |
| "grad_norm": 0.2258521679269875, |
| "learning_rate": 1.4793110853651077e-06, |
| "loss": 0.3253, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.3247999999999998, |
| "grad_norm": 0.21855671227331058, |
| "learning_rate": 1.472705665022799e-06, |
| "loss": 0.3062, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.3264, |
| "grad_norm": 0.21878510530670894, |
| "learning_rate": 1.4661124770658857e-06, |
| "loss": 0.3235, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 0.22611409439810762, |
| "learning_rate": 1.459531544359038e-06, |
| "loss": 0.3209, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.3296, |
| "grad_norm": 0.21262270605830358, |
| "learning_rate": 1.4529628897244214e-06, |
| "loss": 0.3102, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.3312, |
| "grad_norm": 0.22549445503657056, |
| "learning_rate": 1.4464065359416274e-06, |
| "loss": 0.3177, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.3327999999999998, |
| "grad_norm": 0.21425311204633085, |
| "learning_rate": 1.4398625057475845e-06, |
| "loss": 0.3085, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.3344, |
| "grad_norm": 0.21443766855600835, |
| "learning_rate": 1.4333308218364861e-06, |
| "loss": 0.3002, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.2112500496864178, |
| "learning_rate": 1.4268115068597122e-06, |
| "loss": 0.2868, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.3376, |
| "grad_norm": 0.2324504144092052, |
| "learning_rate": 1.4203045834257418e-06, |
| "loss": 0.3261, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.3392, |
| "grad_norm": 0.22202801476661635, |
| "learning_rate": 1.4138100741000888e-06, |
| "loss": 0.3176, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.3407999999999998, |
| "grad_norm": 0.2154375102242997, |
| "learning_rate": 1.4073280014052077e-06, |
| "loss": 0.3203, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.3424, |
| "grad_norm": 0.22382614003356263, |
| "learning_rate": 1.4008583878204297e-06, |
| "loss": 0.3103, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 0.22891968831581852, |
| "learning_rate": 1.3944012557818793e-06, |
| "loss": 0.2978, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.3456, |
| "grad_norm": 0.2248236680319697, |
| "learning_rate": 1.3879566276823896e-06, |
| "loss": 0.3155, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.3472, |
| "grad_norm": 0.24482159411922377, |
| "learning_rate": 1.3815245258714393e-06, |
| "loss": 0.3102, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.3487999999999998, |
| "grad_norm": 0.21851728809474935, |
| "learning_rate": 1.3751049726550587e-06, |
| "loss": 0.3042, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.3504, |
| "grad_norm": 0.23098727892198803, |
| "learning_rate": 1.368697990295766e-06, |
| "loss": 0.329, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 0.23823102305917201, |
| "learning_rate": 1.3623036010124845e-06, |
| "loss": 0.3321, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.3536, |
| "grad_norm": 0.2536461938583511, |
| "learning_rate": 1.3559218269804624e-06, |
| "loss": 0.3074, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.3552, |
| "grad_norm": 0.22691489401401113, |
| "learning_rate": 1.3495526903312029e-06, |
| "loss": 0.3218, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.3568, |
| "grad_norm": 0.21602163700393537, |
| "learning_rate": 1.3431962131523796e-06, |
| "loss": 0.3126, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.3584, |
| "grad_norm": 0.23150889028472832, |
| "learning_rate": 1.3368524174877679e-06, |
| "loss": 0.3034, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.2220191352373736, |
| "learning_rate": 1.330521325337164e-06, |
| "loss": 0.3188, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.3616, |
| "grad_norm": 0.21982050277050807, |
| "learning_rate": 1.3242029586563054e-06, |
| "loss": 0.3116, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.3632, |
| "grad_norm": 0.3246406203790508, |
| "learning_rate": 1.3178973393568055e-06, |
| "loss": 0.3103, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.3648, |
| "grad_norm": 0.23703866735272489, |
| "learning_rate": 1.3116044893060637e-06, |
| "loss": 0.3242, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.3664, |
| "grad_norm": 0.21059879125288178, |
| "learning_rate": 1.3053244303272022e-06, |
| "loss": 0.3039, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.22987497036214377, |
| "learning_rate": 1.2990571841989796e-06, |
| "loss": 0.3353, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.3696, |
| "grad_norm": 0.25354708495451167, |
| "learning_rate": 1.2928027726557257e-06, |
| "loss": 0.3393, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.3712, |
| "grad_norm": 0.21090776681659268, |
| "learning_rate": 1.2865612173872577e-06, |
| "loss": 0.3144, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.3728, |
| "grad_norm": 0.23575362109101178, |
| "learning_rate": 1.2803325400388095e-06, |
| "loss": 0.326, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.3744, |
| "grad_norm": 0.22636627165040638, |
| "learning_rate": 1.2741167622109557e-06, |
| "loss": 0.3239, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 0.21964235258652148, |
| "learning_rate": 1.2679139054595335e-06, |
| "loss": 0.3021, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.3776, |
| "grad_norm": 0.22998511275558411, |
| "learning_rate": 1.261723991295576e-06, |
| "loss": 0.3234, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.3792, |
| "grad_norm": 0.2162756595508079, |
| "learning_rate": 1.2555470411852262e-06, |
| "loss": 0.3127, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.3808, |
| "grad_norm": 0.22612907087302397, |
| "learning_rate": 1.2493830765496724e-06, |
| "loss": 0.3255, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.3824, |
| "grad_norm": 0.22697196296879035, |
| "learning_rate": 1.2432321187650726e-06, |
| "loss": 0.314, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 0.21489495611332063, |
| "learning_rate": 1.237094189162471e-06, |
| "loss": 0.3195, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.3856, |
| "grad_norm": 0.22355481771136526, |
| "learning_rate": 1.2309693090277392e-06, |
| "loss": 0.3157, |
| "step": 1491 |
| }, |
| { |
| "epoch": 2.3872, |
| "grad_norm": 0.22022297510416242, |
| "learning_rate": 1.2248574996014872e-06, |
| "loss": 0.3051, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.3888, |
| "grad_norm": 0.2122757471002646, |
| "learning_rate": 1.218758782079001e-06, |
| "loss": 0.3246, |
| "step": 1493 |
| }, |
| { |
| "epoch": 2.3904, |
| "grad_norm": 0.22389635501886337, |
| "learning_rate": 1.2126731776101657e-06, |
| "loss": 0.3099, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 0.2343054150891001, |
| "learning_rate": 1.2066007072993856e-06, |
| "loss": 0.3018, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.3936, |
| "grad_norm": 0.23598454099515714, |
| "learning_rate": 1.2005413922055247e-06, |
| "loss": 0.3525, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.3952, |
| "grad_norm": 0.34377862023442746, |
| "learning_rate": 1.194495253341818e-06, |
| "loss": 0.3029, |
| "step": 1497 |
| }, |
| { |
| "epoch": 2.3968, |
| "grad_norm": 0.22633647029877546, |
| "learning_rate": 1.1884623116758121e-06, |
| "loss": 0.3139, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.3984, |
| "grad_norm": 0.23184791045116174, |
| "learning_rate": 1.1824425881292846e-06, |
| "loss": 0.3379, |
| "step": 1499 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.21052080950477306, |
| "learning_rate": 1.1764361035781718e-06, |
| "loss": 0.2954, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.4016, |
| "grad_norm": 0.22058864902129705, |
| "learning_rate": 1.170442878852503e-06, |
| "loss": 0.3172, |
| "step": 1501 |
| }, |
| { |
| "epoch": 2.4032, |
| "grad_norm": 0.22749876156285595, |
| "learning_rate": 1.1644629347363173e-06, |
| "loss": 0.3158, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.4048, |
| "grad_norm": 0.21074831867828683, |
| "learning_rate": 1.1584962919676024e-06, |
| "loss": 0.2982, |
| "step": 1503 |
| }, |
| { |
| "epoch": 2.4064, |
| "grad_norm": 0.2325002421099622, |
| "learning_rate": 1.1525429712382175e-06, |
| "loss": 0.3169, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 0.21560819844632, |
| "learning_rate": 1.1466029931938182e-06, |
| "loss": 0.3018, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.4096, |
| "grad_norm": 0.22854683338553725, |
| "learning_rate": 1.1406763784337948e-06, |
| "loss": 0.3047, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.4112, |
| "grad_norm": 0.2281732304697868, |
| "learning_rate": 1.1347631475111882e-06, |
| "loss": 0.3305, |
| "step": 1507 |
| }, |
| { |
| "epoch": 2.4128, |
| "grad_norm": 0.22571847509626305, |
| "learning_rate": 1.1288633209326288e-06, |
| "loss": 0.3148, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.4144, |
| "grad_norm": 0.2293333505452255, |
| "learning_rate": 1.122976919158264e-06, |
| "loss": 0.3021, |
| "step": 1509 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.22545814763023236, |
| "learning_rate": 1.1171039626016789e-06, |
| "loss": 0.3189, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.4176, |
| "grad_norm": 0.22079449730050107, |
| "learning_rate": 1.1112444716298381e-06, |
| "loss": 0.3203, |
| "step": 1511 |
| }, |
| { |
| "epoch": 2.4192, |
| "grad_norm": 0.21718721844353098, |
| "learning_rate": 1.1053984665630025e-06, |
| "loss": 0.3026, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.4208, |
| "grad_norm": 0.22888178516233906, |
| "learning_rate": 1.0995659676746706e-06, |
| "loss": 0.3293, |
| "step": 1513 |
| }, |
| { |
| "epoch": 2.4224, |
| "grad_norm": 0.25954008800949735, |
| "learning_rate": 1.093746995191497e-06, |
| "loss": 0.3283, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 0.2203143313339214, |
| "learning_rate": 1.0879415692932328e-06, |
| "loss": 0.3, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.4256, |
| "grad_norm": 0.22967404483242407, |
| "learning_rate": 1.0821497101126487e-06, |
| "loss": 0.3205, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.4272, |
| "grad_norm": 0.2130711170070916, |
| "learning_rate": 1.076371437735465e-06, |
| "loss": 0.3322, |
| "step": 1517 |
| }, |
| { |
| "epoch": 2.4288, |
| "grad_norm": 0.2401047276118849, |
| "learning_rate": 1.0706067722002877e-06, |
| "loss": 0.3218, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.4304, |
| "grad_norm": 0.3640445645362196, |
| "learning_rate": 1.064855733498531e-06, |
| "loss": 0.3294, |
| "step": 1519 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.22808935678021308, |
| "learning_rate": 1.0591183415743562e-06, |
| "loss": 0.3237, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.4336, |
| "grad_norm": 0.22093782303612183, |
| "learning_rate": 1.0533946163245984e-06, |
| "loss": 0.3338, |
| "step": 1521 |
| }, |
| { |
| "epoch": 2.4352, |
| "grad_norm": 0.2141789945132993, |
| "learning_rate": 1.047684577598694e-06, |
| "loss": 0.3141, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.4368, |
| "grad_norm": 0.21325298211614038, |
| "learning_rate": 1.0419882451986197e-06, |
| "loss": 0.3167, |
| "step": 1523 |
| }, |
| { |
| "epoch": 2.4384, |
| "grad_norm": 0.23178856415651408, |
| "learning_rate": 1.0363056388788162e-06, |
| "loss": 0.3438, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.21661114795366332, |
| "learning_rate": 1.0306367783461258e-06, |
| "loss": 0.3053, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.4416, |
| "grad_norm": 0.2149509149492646, |
| "learning_rate": 1.024981683259723e-06, |
| "loss": 0.324, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.4432, |
| "grad_norm": 0.2440196428912136, |
| "learning_rate": 1.0193403732310392e-06, |
| "loss": 0.3021, |
| "step": 1527 |
| }, |
| { |
| "epoch": 2.4448, |
| "grad_norm": 0.22656665819569757, |
| "learning_rate": 1.0137128678237062e-06, |
| "loss": 0.3156, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.4464, |
| "grad_norm": 0.21700232085077029, |
| "learning_rate": 1.0080991865534773e-06, |
| "loss": 0.3237, |
| "step": 1529 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.2161866882925346, |
| "learning_rate": 1.002499348888169e-06, |
| "loss": 0.3257, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.4496, |
| "grad_norm": 0.2160396400537397, |
| "learning_rate": 9.969133742475883e-07, |
| "loss": 0.3029, |
| "step": 1531 |
| }, |
| { |
| "epoch": 2.4512, |
| "grad_norm": 0.20813676892875227, |
| "learning_rate": 9.913412820034629e-07, |
| "loss": 0.3069, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.4528, |
| "grad_norm": 0.2316592426819985, |
| "learning_rate": 9.857830914793827e-07, |
| "loss": 0.331, |
| "step": 1533 |
| }, |
| { |
| "epoch": 2.4544, |
| "grad_norm": 0.2143622379037617, |
| "learning_rate": 9.802388219507215e-07, |
| "loss": 0.2968, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 0.23873982175621922, |
| "learning_rate": 9.747084926445839e-07, |
| "loss": 0.3459, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.4576000000000002, |
| "grad_norm": 0.2220306401841575, |
| "learning_rate": 9.691921227397227e-07, |
| "loss": 0.3008, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.4592, |
| "grad_norm": 0.22073013864033278, |
| "learning_rate": 9.63689731366486e-07, |
| "loss": 0.3216, |
| "step": 1537 |
| }, |
| { |
| "epoch": 2.4608, |
| "grad_norm": 0.22198779891277223, |
| "learning_rate": 9.58201337606745e-07, |
| "loss": 0.3043, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.4624, |
| "grad_norm": 0.2129038616615824, |
| "learning_rate": 9.527269604938249e-07, |
| "loss": 0.3113, |
| "step": 1539 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.20936132436385652, |
| "learning_rate": 9.472666190124457e-07, |
| "loss": 0.3105, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.4656000000000002, |
| "grad_norm": 0.2140131016897714, |
| "learning_rate": 9.418203320986502e-07, |
| "loss": 0.312, |
| "step": 1541 |
| }, |
| { |
| "epoch": 2.4672, |
| "grad_norm": 0.21211683054670685, |
| "learning_rate": 9.363881186397434e-07, |
| "loss": 0.3025, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.4688, |
| "grad_norm": 0.2246243245497385, |
| "learning_rate": 9.309699974742243e-07, |
| "loss": 0.3269, |
| "step": 1543 |
| }, |
| { |
| "epoch": 2.4704, |
| "grad_norm": 0.21987082166491043, |
| "learning_rate": 9.255659873917183e-07, |
| "loss": 0.3102, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 0.21997374784314677, |
| "learning_rate": 9.201761071329196e-07, |
| "loss": 0.3099, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.4736000000000002, |
| "grad_norm": 0.2326004285645294, |
| "learning_rate": 9.148003753895146e-07, |
| "loss": 0.3561, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.4752, |
| "grad_norm": 0.21134552444753293, |
| "learning_rate": 9.094388108041302e-07, |
| "loss": 0.3305, |
| "step": 1547 |
| }, |
| { |
| "epoch": 2.4768, |
| "grad_norm": 0.22553935820465043, |
| "learning_rate": 9.040914319702598e-07, |
| "loss": 0.315, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.4784, |
| "grad_norm": 0.22413605227953656, |
| "learning_rate": 8.987582574321996e-07, |
| "loss": 0.3133, |
| "step": 1549 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.2158910097093097, |
| "learning_rate": 8.934393056849921e-07, |
| "loss": 0.304, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.4816, |
| "grad_norm": 0.21388444715654087, |
| "learning_rate": 8.881345951743486e-07, |
| "loss": 0.305, |
| "step": 1551 |
| }, |
| { |
| "epoch": 2.4832, |
| "grad_norm": 0.23164093519051748, |
| "learning_rate": 8.828441442966013e-07, |
| "loss": 0.324, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.4848, |
| "grad_norm": 0.22108076927907855, |
| "learning_rate": 8.775679713986235e-07, |
| "loss": 0.3324, |
| "step": 1553 |
| }, |
| { |
| "epoch": 2.4864, |
| "grad_norm": 0.22594157361015668, |
| "learning_rate": 8.723060947777778e-07, |
| "loss": 0.3258, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 0.22709108506375286, |
| "learning_rate": 8.670585326818493e-07, |
| "loss": 0.3268, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.4896, |
| "grad_norm": 0.21950140961810133, |
| "learning_rate": 8.618253033089768e-07, |
| "loss": 0.3214, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.4912, |
| "grad_norm": 0.3646327768161505, |
| "learning_rate": 8.566064248076001e-07, |
| "loss": 0.325, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.4928, |
| "grad_norm": 0.22582657284236898, |
| "learning_rate": 8.514019152763852e-07, |
| "loss": 0.3256, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.4944, |
| "grad_norm": 0.20865918225179006, |
| "learning_rate": 8.462117927641733e-07, |
| "loss": 0.2938, |
| "step": 1559 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 0.22351061142918452, |
| "learning_rate": 8.410360752699099e-07, |
| "loss": 0.3184, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.4976, |
| "grad_norm": 0.22829597223946557, |
| "learning_rate": 8.358747807425827e-07, |
| "loss": 0.3349, |
| "step": 1561 |
| }, |
| { |
| "epoch": 2.4992, |
| "grad_norm": 0.22043904368277428, |
| "learning_rate": 8.307279270811675e-07, |
| "loss": 0.315, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.5008, |
| "grad_norm": 0.20803305508880285, |
| "learning_rate": 8.255955321345533e-07, |
| "loss": 0.3012, |
| "step": 1563 |
| }, |
| { |
| "epoch": 2.5023999999999997, |
| "grad_norm": 0.22102819063439694, |
| "learning_rate": 8.20477613701493e-07, |
| "loss": 0.335, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 0.22338189659667868, |
| "learning_rate": 8.153741895305351e-07, |
| "loss": 0.3194, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.5056000000000003, |
| "grad_norm": 0.2173671905166322, |
| "learning_rate": 8.102852773199588e-07, |
| "loss": 0.3133, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.5072, |
| "grad_norm": 0.21378941041331856, |
| "learning_rate": 8.052108947177234e-07, |
| "loss": 0.314, |
| "step": 1567 |
| }, |
| { |
| "epoch": 2.5088, |
| "grad_norm": 0.21898809072779002, |
| "learning_rate": 8.001510593213946e-07, |
| "loss": 0.3191, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.5103999999999997, |
| "grad_norm": 0.2427351810588726, |
| "learning_rate": 7.951057886780939e-07, |
| "loss": 0.3362, |
| "step": 1569 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.2242308181650998, |
| "learning_rate": 7.900751002844326e-07, |
| "loss": 0.3209, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.5136, |
| "grad_norm": 0.21906292494279603, |
| "learning_rate": 7.850590115864481e-07, |
| "loss": 0.3225, |
| "step": 1571 |
| }, |
| { |
| "epoch": 2.5152, |
| "grad_norm": 0.25316663438879455, |
| "learning_rate": 7.80057539979554e-07, |
| "loss": 0.3072, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.5168, |
| "grad_norm": 0.21581705027789447, |
| "learning_rate": 7.750707028084653e-07, |
| "loss": 0.3028, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.5183999999999997, |
| "grad_norm": 0.2299674219103037, |
| "learning_rate": 7.70098517367151e-07, |
| "loss": 0.3201, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.22008094378219809, |
| "learning_rate": 7.651410008987698e-07, |
| "loss": 0.3376, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.5216, |
| "grad_norm": 0.21953937858229505, |
| "learning_rate": 7.601981705956041e-07, |
| "loss": 0.3257, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.5232, |
| "grad_norm": 0.2102214058633889, |
| "learning_rate": 7.552700435990123e-07, |
| "loss": 0.3073, |
| "step": 1577 |
| }, |
| { |
| "epoch": 2.5248, |
| "grad_norm": 0.20877655621303168, |
| "learning_rate": 7.503566369993564e-07, |
| "loss": 0.313, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.5263999999999998, |
| "grad_norm": 0.22492810735973112, |
| "learning_rate": 7.454579678359547e-07, |
| "loss": 0.3077, |
| "step": 1579 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.21808260581024697, |
| "learning_rate": 7.405740530970157e-07, |
| "loss": 0.3117, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.5296, |
| "grad_norm": 0.22079598559411423, |
| "learning_rate": 7.357049097195773e-07, |
| "loss": 0.3227, |
| "step": 1581 |
| }, |
| { |
| "epoch": 2.5312, |
| "grad_norm": 0.22207965591574816, |
| "learning_rate": 7.308505545894567e-07, |
| "loss": 0.3221, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.5328, |
| "grad_norm": 0.2136067946157453, |
| "learning_rate": 7.260110045411816e-07, |
| "loss": 0.3096, |
| "step": 1583 |
| }, |
| { |
| "epoch": 2.5343999999999998, |
| "grad_norm": 0.21520718171036624, |
| "learning_rate": 7.211862763579414e-07, |
| "loss": 0.3101, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 0.2563107489737204, |
| "learning_rate": 7.163763867715218e-07, |
| "loss": 0.3424, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.5376, |
| "grad_norm": 0.231094286892828, |
| "learning_rate": 7.115813524622489e-07, |
| "loss": 0.3456, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.5392, |
| "grad_norm": 0.2373216077001464, |
| "learning_rate": 7.068011900589333e-07, |
| "loss": 0.3381, |
| "step": 1587 |
| }, |
| { |
| "epoch": 2.5408, |
| "grad_norm": 0.2103714717942439, |
| "learning_rate": 7.020359161388108e-07, |
| "loss": 0.2911, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.5423999999999998, |
| "grad_norm": 0.23642737631151128, |
| "learning_rate": 6.972855472274853e-07, |
| "loss": 0.3505, |
| "step": 1589 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.23009771549587837, |
| "learning_rate": 6.925500997988694e-07, |
| "loss": 0.3389, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.5456, |
| "grad_norm": 0.2109867468985298, |
| "learning_rate": 6.87829590275132e-07, |
| "loss": 0.3144, |
| "step": 1591 |
| }, |
| { |
| "epoch": 2.5472, |
| "grad_norm": 0.23628349557796502, |
| "learning_rate": 6.83124035026635e-07, |
| "loss": 0.3339, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.5488, |
| "grad_norm": 0.23266979930702628, |
| "learning_rate": 6.784334503718826e-07, |
| "loss": 0.3248, |
| "step": 1593 |
| }, |
| { |
| "epoch": 2.5504, |
| "grad_norm": 0.21625716488709032, |
| "learning_rate": 6.737578525774636e-07, |
| "loss": 0.2992, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 0.25021466672179055, |
| "learning_rate": 6.690972578579886e-07, |
| "loss": 0.3191, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.5536, |
| "grad_norm": 0.25379982118128663, |
| "learning_rate": 6.644516823760439e-07, |
| "loss": 0.3277, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.5552, |
| "grad_norm": 0.22014903911679898, |
| "learning_rate": 6.598211422421258e-07, |
| "loss": 0.3051, |
| "step": 1597 |
| }, |
| { |
| "epoch": 2.5568, |
| "grad_norm": 0.21932818238764878, |
| "learning_rate": 6.552056535145917e-07, |
| "loss": 0.3212, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.5584, |
| "grad_norm": 0.22231747820590972, |
| "learning_rate": 6.506052321996037e-07, |
| "loss": 0.3316, |
| "step": 1599 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.27145673606546655, |
| "learning_rate": 6.46019894251066e-07, |
| "loss": 0.3333, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.5616, |
| "grad_norm": 0.2158218046515406, |
| "learning_rate": 6.414496555705802e-07, |
| "loss": 0.3206, |
| "step": 1601 |
| }, |
| { |
| "epoch": 2.5632, |
| "grad_norm": 0.2224469858651559, |
| "learning_rate": 6.368945320073799e-07, |
| "loss": 0.3215, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.5648, |
| "grad_norm": 0.22754126364639723, |
| "learning_rate": 6.323545393582847e-07, |
| "loss": 0.3245, |
| "step": 1603 |
| }, |
| { |
| "epoch": 2.5664, |
| "grad_norm": 0.2114387430598465, |
| "learning_rate": 6.278296933676414e-07, |
| "loss": 0.3113, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 0.21046099377157837, |
| "learning_rate": 6.233200097272646e-07, |
| "loss": 0.3088, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.5696, |
| "grad_norm": 0.22255146221916106, |
| "learning_rate": 6.188255040763929e-07, |
| "loss": 0.3084, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.5712, |
| "grad_norm": 0.34295717043925944, |
| "learning_rate": 6.143461920016247e-07, |
| "loss": 0.3159, |
| "step": 1607 |
| }, |
| { |
| "epoch": 2.5728, |
| "grad_norm": 0.21926817387464947, |
| "learning_rate": 6.098820890368696e-07, |
| "loss": 0.3159, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.5744, |
| "grad_norm": 0.40596415191561397, |
| "learning_rate": 6.054332106632943e-07, |
| "loss": 0.3352, |
| "step": 1609 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.23086663122800052, |
| "learning_rate": 6.009995723092655e-07, |
| "loss": 0.3194, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.5776, |
| "grad_norm": 0.22799949136579734, |
| "learning_rate": 5.965811893503015e-07, |
| "loss": 0.3481, |
| "step": 1611 |
| }, |
| { |
| "epoch": 2.5792, |
| "grad_norm": 0.22142301849147178, |
| "learning_rate": 5.921780771090124e-07, |
| "loss": 0.3201, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.5808, |
| "grad_norm": 0.20477866361720679, |
| "learning_rate": 5.877902508550542e-07, |
| "loss": 0.3079, |
| "step": 1613 |
| }, |
| { |
| "epoch": 2.5824, |
| "grad_norm": 0.22555546374481633, |
| "learning_rate": 5.834177258050711e-07, |
| "loss": 0.3121, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 0.22827370267922675, |
| "learning_rate": 5.790605171226421e-07, |
| "loss": 0.3004, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.5856, |
| "grad_norm": 0.21930767977879323, |
| "learning_rate": 5.747186399182336e-07, |
| "loss": 0.3246, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.5872, |
| "grad_norm": 0.2146943691623156, |
| "learning_rate": 5.703921092491393e-07, |
| "loss": 0.328, |
| "step": 1617 |
| }, |
| { |
| "epoch": 2.5888, |
| "grad_norm": 0.218997676239078, |
| "learning_rate": 5.660809401194362e-07, |
| "loss": 0.31, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.5904, |
| "grad_norm": 0.21416697272695773, |
| "learning_rate": 5.617851474799285e-07, |
| "loss": 0.3214, |
| "step": 1619 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 0.22560833283236575, |
| "learning_rate": 5.575047462280919e-07, |
| "loss": 0.3433, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.5936, |
| "grad_norm": 0.20713521707437463, |
| "learning_rate": 5.532397512080306e-07, |
| "loss": 0.3068, |
| "step": 1621 |
| }, |
| { |
| "epoch": 2.5952, |
| "grad_norm": 0.23462995676096135, |
| "learning_rate": 5.489901772104178e-07, |
| "loss": 0.3327, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.5968, |
| "grad_norm": 0.2273942776366499, |
| "learning_rate": 5.447560389724499e-07, |
| "loss": 0.3095, |
| "step": 1623 |
| }, |
| { |
| "epoch": 2.5984, |
| "grad_norm": 0.22699637005133178, |
| "learning_rate": 5.405373511777939e-07, |
| "loss": 0.3233, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.2252879269556756, |
| "learning_rate": 5.363341284565316e-07, |
| "loss": 0.3128, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.6016, |
| "grad_norm": 0.21630081813021465, |
| "learning_rate": 5.321463853851189e-07, |
| "loss": 0.3336, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.6032, |
| "grad_norm": 0.2226002526132289, |
| "learning_rate": 5.279741364863244e-07, |
| "loss": 0.3063, |
| "step": 1627 |
| }, |
| { |
| "epoch": 2.6048, |
| "grad_norm": 0.23156995989833012, |
| "learning_rate": 5.238173962291881e-07, |
| "loss": 0.3226, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.6064, |
| "grad_norm": 0.23316158119297886, |
| "learning_rate": 5.196761790289639e-07, |
| "loss": 0.3263, |
| "step": 1629 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.24385352556745676, |
| "learning_rate": 5.155504992470751e-07, |
| "loss": 0.3337, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.6096, |
| "grad_norm": 0.21480637034215733, |
| "learning_rate": 5.114403711910631e-07, |
| "loss": 0.3034, |
| "step": 1631 |
| }, |
| { |
| "epoch": 2.6112, |
| "grad_norm": 0.21681385154495136, |
| "learning_rate": 5.073458091145328e-07, |
| "loss": 0.3006, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.6128, |
| "grad_norm": 0.21523241663148254, |
| "learning_rate": 5.032668272171138e-07, |
| "loss": 0.3111, |
| "step": 1633 |
| }, |
| { |
| "epoch": 2.6144, |
| "grad_norm": 0.22875246105777908, |
| "learning_rate": 4.99203439644399e-07, |
| "loss": 0.3272, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 0.21794409689318378, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.3229, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.6176, |
| "grad_norm": 0.22839234407809852, |
| "learning_rate": 4.911235037850187e-07, |
| "loss": 0.3374, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.6192, |
| "grad_norm": 0.22712513195605358, |
| "learning_rate": 4.871069835189485e-07, |
| "loss": 0.3422, |
| "step": 1637 |
| }, |
| { |
| "epoch": 2.6208, |
| "grad_norm": 0.22263354934683377, |
| "learning_rate": 4.831061136186787e-07, |
| "loss": 0.3183, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.6224, |
| "grad_norm": 0.22954836093150902, |
| "learning_rate": 4.791209079589165e-07, |
| "loss": 0.3058, |
| "step": 1639 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.2259392755047876, |
| "learning_rate": 4.7515138036005157e-07, |
| "loss": 0.3206, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.6256, |
| "grad_norm": 0.22456023355823607, |
| "learning_rate": 4.7119754458809727e-07, |
| "loss": 0.3009, |
| "step": 1641 |
| }, |
| { |
| "epoch": 2.6272, |
| "grad_norm": 0.21878986012295162, |
| "learning_rate": 4.672594143546538e-07, |
| "loss": 0.3023, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.6288, |
| "grad_norm": 0.23253127090124176, |
| "learning_rate": 4.6333700331685385e-07, |
| "loss": 0.3292, |
| "step": 1643 |
| }, |
| { |
| "epoch": 2.6304, |
| "grad_norm": 0.21285111057378994, |
| "learning_rate": 4.594303250773152e-07, |
| "loss": 0.3179, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 0.24025499565445355, |
| "learning_rate": 4.555393931841001e-07, |
| "loss": 0.3149, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.6336, |
| "grad_norm": 0.23581040371096915, |
| "learning_rate": 4.5166422113065877e-07, |
| "loss": 0.3456, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.6352, |
| "grad_norm": 0.22135280659971393, |
| "learning_rate": 4.478048223557907e-07, |
| "loss": 0.3289, |
| "step": 1647 |
| }, |
| { |
| "epoch": 2.6368, |
| "grad_norm": 0.22291545674379645, |
| "learning_rate": 4.439612102435942e-07, |
| "loss": 0.3103, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.6384, |
| "grad_norm": 0.22572640097644955, |
| "learning_rate": 4.401333981234196e-07, |
| "loss": 0.3399, |
| "step": 1649 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.21980341311837404, |
| "learning_rate": 4.3632139926982676e-07, |
| "loss": 0.3274, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.6416, |
| "grad_norm": 0.22596791603140695, |
| "learning_rate": 4.325252269025315e-07, |
| "loss": 0.3315, |
| "step": 1651 |
| }, |
| { |
| "epoch": 2.6432, |
| "grad_norm": 0.23418339836741597, |
| "learning_rate": 4.287448941863692e-07, |
| "loss": 0.3085, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.6448, |
| "grad_norm": 0.20876659971580724, |
| "learning_rate": 4.249804142312436e-07, |
| "loss": 0.3066, |
| "step": 1653 |
| }, |
| { |
| "epoch": 2.6464, |
| "grad_norm": 0.23545368804636532, |
| "learning_rate": 4.2123180009207956e-07, |
| "loss": 0.3301, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 0.21556770475918471, |
| "learning_rate": 4.1749906476878486e-07, |
| "loss": 0.3131, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.6496, |
| "grad_norm": 0.20528769381309137, |
| "learning_rate": 4.137822212061965e-07, |
| "loss": 0.2916, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.6512000000000002, |
| "grad_norm": 0.23547179538968283, |
| "learning_rate": 4.100812822940431e-07, |
| "loss": 0.3166, |
| "step": 1657 |
| }, |
| { |
| "epoch": 2.6528, |
| "grad_norm": 0.2912356602531298, |
| "learning_rate": 4.063962608668959e-07, |
| "loss": 0.3388, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.6544, |
| "grad_norm": 0.21504768253407588, |
| "learning_rate": 4.0272716970412516e-07, |
| "loss": 0.3075, |
| "step": 1659 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 0.22014096632702726, |
| "learning_rate": 3.990740215298583e-07, |
| "loss": 0.3134, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.6576, |
| "grad_norm": 0.2447994791955817, |
| "learning_rate": 3.954368290129301e-07, |
| "loss": 0.3283, |
| "step": 1661 |
| }, |
| { |
| "epoch": 2.6592000000000002, |
| "grad_norm": 0.23151371358278952, |
| "learning_rate": 3.918156047668453e-07, |
| "loss": 0.3194, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.6608, |
| "grad_norm": 0.231004532474009, |
| "learning_rate": 3.882103613497318e-07, |
| "loss": 0.3056, |
| "step": 1663 |
| }, |
| { |
| "epoch": 2.6624, |
| "grad_norm": 0.21650364261057706, |
| "learning_rate": 3.84621111264295e-07, |
| "loss": 0.3276, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 0.2206733184318087, |
| "learning_rate": 3.810478669577794e-07, |
| "loss": 0.3353, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.6656, |
| "grad_norm": 0.23080740637797909, |
| "learning_rate": 3.7749064082191976e-07, |
| "loss": 0.3358, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.6672000000000002, |
| "grad_norm": 0.2240545478084365, |
| "learning_rate": 3.739494451929049e-07, |
| "loss": 0.3282, |
| "step": 1667 |
| }, |
| { |
| "epoch": 2.6688, |
| "grad_norm": 0.2364653683675628, |
| "learning_rate": 3.7042429235132625e-07, |
| "loss": 0.3069, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.6704, |
| "grad_norm": 0.21247556461064918, |
| "learning_rate": 3.6691519452214387e-07, |
| "loss": 0.3094, |
| "step": 1669 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.23091317918086066, |
| "learning_rate": 3.6342216387464047e-07, |
| "loss": 0.3216, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.6736, |
| "grad_norm": 0.21762899339075653, |
| "learning_rate": 3.5994521252237516e-07, |
| "loss": 0.3125, |
| "step": 1671 |
| }, |
| { |
| "epoch": 2.6752000000000002, |
| "grad_norm": 0.21807526225632623, |
| "learning_rate": 3.564843525231498e-07, |
| "loss": 0.317, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.6768, |
| "grad_norm": 0.2238101348891608, |
| "learning_rate": 3.53039595878959e-07, |
| "loss": 0.3135, |
| "step": 1673 |
| }, |
| { |
| "epoch": 2.6784, |
| "grad_norm": 0.21761774052660368, |
| "learning_rate": 3.496109545359544e-07, |
| "loss": 0.3, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 0.23462795631214092, |
| "learning_rate": 3.461984403844015e-07, |
| "loss": 0.3334, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.6816, |
| "grad_norm": 0.20672094646727748, |
| "learning_rate": 3.42802065258635e-07, |
| "loss": 0.3133, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.6832000000000003, |
| "grad_norm": 0.21321874397568766, |
| "learning_rate": 3.394218409370242e-07, |
| "loss": 0.3153, |
| "step": 1677 |
| }, |
| { |
| "epoch": 2.6848, |
| "grad_norm": 0.22024216391050636, |
| "learning_rate": 3.360577791419256e-07, |
| "loss": 0.32, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.6864, |
| "grad_norm": 0.22969420576200103, |
| "learning_rate": 3.3270989153964707e-07, |
| "loss": 0.3254, |
| "step": 1679 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.21460412310864807, |
| "learning_rate": 3.2937818974040637e-07, |
| "loss": 0.3227, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.6896, |
| "grad_norm": 0.22272073450405586, |
| "learning_rate": 3.260626852982873e-07, |
| "loss": 0.3254, |
| "step": 1681 |
| }, |
| { |
| "epoch": 2.6912000000000003, |
| "grad_norm": 0.20930385233046075, |
| "learning_rate": 3.227633897112059e-07, |
| "loss": 0.3152, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.6928, |
| "grad_norm": 0.21569312405373325, |
| "learning_rate": 3.194803144208636e-07, |
| "loss": 0.3168, |
| "step": 1683 |
| }, |
| { |
| "epoch": 2.6944, |
| "grad_norm": 0.2235058971576791, |
| "learning_rate": 3.16213470812714e-07, |
| "loss": 0.3201, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 0.21101281296989963, |
| "learning_rate": 3.129628702159204e-07, |
| "loss": 0.2924, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.6976, |
| "grad_norm": 0.2342792761034275, |
| "learning_rate": 3.097285239033138e-07, |
| "loss": 0.3103, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.6992000000000003, |
| "grad_norm": 0.22493491912061747, |
| "learning_rate": 3.0651044309136016e-07, |
| "loss": 0.305, |
| "step": 1687 |
| }, |
| { |
| "epoch": 2.7008, |
| "grad_norm": 0.2818012634685806, |
| "learning_rate": 3.033086389401141e-07, |
| "loss": 0.3289, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.7024, |
| "grad_norm": 0.21935867678526613, |
| "learning_rate": 3.0012312255318696e-07, |
| "loss": 0.3131, |
| "step": 1689 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.234982667092687, |
| "learning_rate": 2.9695390497770535e-07, |
| "loss": 0.3443, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.7056, |
| "grad_norm": 0.22992666945992266, |
| "learning_rate": 2.93800997204271e-07, |
| "loss": 0.3268, |
| "step": 1691 |
| }, |
| { |
| "epoch": 2.7072000000000003, |
| "grad_norm": 0.21554938327967788, |
| "learning_rate": 2.9066441016692594e-07, |
| "loss": 0.2986, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.7088, |
| "grad_norm": 0.21236452084529875, |
| "learning_rate": 2.8754415474311235e-07, |
| "loss": 0.3216, |
| "step": 1693 |
| }, |
| { |
| "epoch": 2.7104, |
| "grad_norm": 0.21487640805944336, |
| "learning_rate": 2.844402417536374e-07, |
| "loss": 0.3115, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 0.22571270468410864, |
| "learning_rate": 2.8135268196263055e-07, |
| "loss": 0.3296, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.7136, |
| "grad_norm": 0.2102353139409989, |
| "learning_rate": 2.782814860775124e-07, |
| "loss": 0.3091, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.7152, |
| "grad_norm": 0.2391610738630848, |
| "learning_rate": 2.752266647489549e-07, |
| "loss": 0.3342, |
| "step": 1697 |
| }, |
| { |
| "epoch": 2.7168, |
| "grad_norm": 0.23898584746380053, |
| "learning_rate": 2.7218822857084217e-07, |
| "loss": 0.3557, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.7184, |
| "grad_norm": 0.2127448766777578, |
| "learning_rate": 2.691661880802382e-07, |
| "loss": 0.3119, |
| "step": 1699 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.22466730577179067, |
| "learning_rate": 2.661605537573453e-07, |
| "loss": 0.3184, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.7216, |
| "grad_norm": 0.21788815155689836, |
| "learning_rate": 2.631713360254734e-07, |
| "loss": 0.3229, |
| "step": 1701 |
| }, |
| { |
| "epoch": 2.7232, |
| "grad_norm": 0.20964330900789924, |
| "learning_rate": 2.6019854525099977e-07, |
| "loss": 0.3182, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.7248, |
| "grad_norm": 0.20885660454025992, |
| "learning_rate": 2.572421917433332e-07, |
| "loss": 0.2883, |
| "step": 1703 |
| }, |
| { |
| "epoch": 2.7264, |
| "grad_norm": 0.2123665468648309, |
| "learning_rate": 2.5430228575488156e-07, |
| "loss": 0.3204, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 0.2172638266476259, |
| "learning_rate": 2.513788374810111e-07, |
| "loss": 0.3173, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.7296, |
| "grad_norm": 0.2269068947056191, |
| "learning_rate": 2.4847185706001643e-07, |
| "loss": 0.3229, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.7312, |
| "grad_norm": 0.2107051834961778, |
| "learning_rate": 2.455813545730812e-07, |
| "loss": 0.2996, |
| "step": 1707 |
| }, |
| { |
| "epoch": 2.7328, |
| "grad_norm": 0.2172804646950499, |
| "learning_rate": 2.4270734004424643e-07, |
| "loss": 0.3232, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.7344, |
| "grad_norm": 0.20588407159429323, |
| "learning_rate": 2.39849823440374e-07, |
| "loss": 0.3155, |
| "step": 1709 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.2148781028366536, |
| "learning_rate": 2.3700881467111025e-07, |
| "loss": 0.3218, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.7376, |
| "grad_norm": 0.22890759963093985, |
| "learning_rate": 2.3418432358885633e-07, |
| "loss": 0.3231, |
| "step": 1711 |
| }, |
| { |
| "epoch": 2.7392, |
| "grad_norm": 0.22036924731655955, |
| "learning_rate": 2.3137635998872808e-07, |
| "loss": 0.3202, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.7408, |
| "grad_norm": 0.2376002300957189, |
| "learning_rate": 2.285849336085294e-07, |
| "loss": 0.3389, |
| "step": 1713 |
| }, |
| { |
| "epoch": 2.7424, |
| "grad_norm": 0.22578503840245648, |
| "learning_rate": 2.258100541287117e-07, |
| "loss": 0.3056, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 0.22307814665239128, |
| "learning_rate": 2.2305173117234236e-07, |
| "loss": 0.3218, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.7456, |
| "grad_norm": 0.22447114348902894, |
| "learning_rate": 2.2030997430507462e-07, |
| "loss": 0.3107, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.7472, |
| "grad_norm": 0.23762630161241177, |
| "learning_rate": 2.1758479303510937e-07, |
| "loss": 0.3135, |
| "step": 1717 |
| }, |
| { |
| "epoch": 2.7488, |
| "grad_norm": 0.22795099515542164, |
| "learning_rate": 2.148761968131663e-07, |
| "loss": 0.3183, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.7504, |
| "grad_norm": 0.2112123618091967, |
| "learning_rate": 2.121841950324488e-07, |
| "loss": 0.3161, |
| "step": 1719 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 0.22793142978799352, |
| "learning_rate": 2.0950879702861082e-07, |
| "loss": 0.3329, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.7536, |
| "grad_norm": 0.21797476405869493, |
| "learning_rate": 2.0685001207972843e-07, |
| "loss": 0.3228, |
| "step": 1721 |
| }, |
| { |
| "epoch": 2.7552, |
| "grad_norm": 0.2123517286518798, |
| "learning_rate": 2.042078494062616e-07, |
| "loss": 0.2818, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.7568, |
| "grad_norm": 0.22019999045448357, |
| "learning_rate": 2.0158231817102858e-07, |
| "loss": 0.325, |
| "step": 1723 |
| }, |
| { |
| "epoch": 2.7584, |
| "grad_norm": 0.21490463091607434, |
| "learning_rate": 1.9897342747916938e-07, |
| "loss": 0.3281, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.22444029318440836, |
| "learning_rate": 1.9638118637811564e-07, |
| "loss": 0.3289, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.7616, |
| "grad_norm": 0.24644517397473797, |
| "learning_rate": 1.9380560385756088e-07, |
| "loss": 0.3394, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.7632, |
| "grad_norm": 0.21335108724366536, |
| "learning_rate": 1.9124668884942632e-07, |
| "loss": 0.3049, |
| "step": 1727 |
| }, |
| { |
| "epoch": 2.7648, |
| "grad_norm": 0.2256104363369049, |
| "learning_rate": 1.8870445022783234e-07, |
| "loss": 0.3452, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.7664, |
| "grad_norm": 0.22127958274105763, |
| "learning_rate": 1.861788968090683e-07, |
| "loss": 0.3224, |
| "step": 1729 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.21471700839401023, |
| "learning_rate": 1.8367003735155764e-07, |
| "loss": 0.3054, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.7696, |
| "grad_norm": 0.214873360668454, |
| "learning_rate": 1.8117788055583286e-07, |
| "loss": 0.3021, |
| "step": 1731 |
| }, |
| { |
| "epoch": 2.7712, |
| "grad_norm": 0.2132969172912175, |
| "learning_rate": 1.7870243506450113e-07, |
| "loss": 0.3092, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.7728, |
| "grad_norm": 0.22595052819810668, |
| "learning_rate": 1.762437094622177e-07, |
| "loss": 0.313, |
| "step": 1733 |
| }, |
| { |
| "epoch": 2.7744, |
| "grad_norm": 0.2080951189763034, |
| "learning_rate": 1.738017122756541e-07, |
| "loss": 0.3098, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 0.2158704022316432, |
| "learning_rate": 1.713764519734673e-07, |
| "loss": 0.32, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.7776, |
| "grad_norm": 0.22167179315236246, |
| "learning_rate": 1.68967936966275e-07, |
| "loss": 0.321, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.7792, |
| "grad_norm": 0.21221610825296142, |
| "learning_rate": 1.6657617560662088e-07, |
| "loss": 0.3028, |
| "step": 1737 |
| }, |
| { |
| "epoch": 2.7808, |
| "grad_norm": 0.22810433810310513, |
| "learning_rate": 1.6420117618895003e-07, |
| "loss": 0.3386, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.7824, |
| "grad_norm": 0.21539064465367852, |
| "learning_rate": 1.6184294694957747e-07, |
| "loss": 0.312, |
| "step": 1739 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.22044564624184998, |
| "learning_rate": 1.5950149606666077e-07, |
| "loss": 0.325, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.7856, |
| "grad_norm": 0.22874098055064906, |
| "learning_rate": 1.5717683166017184e-07, |
| "loss": 0.3208, |
| "step": 1741 |
| }, |
| { |
| "epoch": 2.7872, |
| "grad_norm": 0.22676331241502895, |
| "learning_rate": 1.5486896179186693e-07, |
| "loss": 0.3208, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.7888, |
| "grad_norm": 0.22096435050475383, |
| "learning_rate": 1.5257789446526172e-07, |
| "loss": 0.3028, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.7904, |
| "grad_norm": 0.2255381292138752, |
| "learning_rate": 1.5030363762560228e-07, |
| "loss": 0.339, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 0.24460489533212315, |
| "learning_rate": 1.480461991598353e-07, |
| "loss": 0.3277, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.7936, |
| "grad_norm": 0.22444851150742803, |
| "learning_rate": 1.458055868965841e-07, |
| "loss": 0.3237, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.7952, |
| "grad_norm": 0.2109938878552334, |
| "learning_rate": 1.4358180860611913e-07, |
| "loss": 0.3062, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.7968, |
| "grad_norm": 0.2303115742065436, |
| "learning_rate": 1.4137487200033383e-07, |
| "loss": 0.3431, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.7984, |
| "grad_norm": 0.24441957148020693, |
| "learning_rate": 1.3918478473271325e-07, |
| "loss": 0.3319, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.21489841000647453, |
| "learning_rate": 1.3701155439831249e-07, |
| "loss": 0.3088, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.8016, |
| "grad_norm": 0.21745383306340557, |
| "learning_rate": 1.3485518853372625e-07, |
| "loss": 0.3107, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.8032, |
| "grad_norm": 0.22888230137256607, |
| "learning_rate": 1.3271569461706547e-07, |
| "loss": 0.3195, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.8048, |
| "grad_norm": 0.2208935610827272, |
| "learning_rate": 1.305930800679317e-07, |
| "loss": 0.3235, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.8064, |
| "grad_norm": 0.22202537042231485, |
| "learning_rate": 1.2848735224738729e-07, |
| "loss": 0.326, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.2136769710760529, |
| "learning_rate": 1.2639851845793583e-07, |
| "loss": 0.2991, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.8096, |
| "grad_norm": 0.2195404812635347, |
| "learning_rate": 1.2432658594349113e-07, |
| "loss": 0.3112, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.8112, |
| "grad_norm": 0.21936096425580714, |
| "learning_rate": 1.2227156188935552e-07, |
| "loss": 0.3412, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.8128, |
| "grad_norm": 0.2176519209839612, |
| "learning_rate": 1.202334534221955e-07, |
| "loss": 0.3102, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.8144, |
| "grad_norm": 0.20964777126549455, |
| "learning_rate": 1.1821226761001391e-07, |
| "loss": 0.3148, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.21308054473299776, |
| "learning_rate": 1.1620801146212723e-07, |
| "loss": 0.3031, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.8176, |
| "grad_norm": 0.2376445151177635, |
| "learning_rate": 1.1422069192914221e-07, |
| "loss": 0.3401, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.8192, |
| "grad_norm": 0.3236157648808624, |
| "learning_rate": 1.1225031590292923e-07, |
| "loss": 0.3305, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.8208, |
| "grad_norm": 0.24814199716246266, |
| "learning_rate": 1.1029689021660183e-07, |
| "loss": 0.3301, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.8224, |
| "grad_norm": 0.21254895975917093, |
| "learning_rate": 1.0836042164448945e-07, |
| "loss": 0.3099, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 0.21024059254273658, |
| "learning_rate": 1.0644091690211633e-07, |
| "loss": 0.2973, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.8256, |
| "grad_norm": 0.2177812570768171, |
| "learning_rate": 1.0453838264617711e-07, |
| "loss": 0.3251, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.8272, |
| "grad_norm": 0.22653325487911705, |
| "learning_rate": 1.0265282547451405e-07, |
| "loss": 0.3231, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.8288, |
| "grad_norm": 0.22281534436039488, |
| "learning_rate": 1.0078425192609487e-07, |
| "loss": 0.324, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.8304, |
| "grad_norm": 0.27445733761740576, |
| "learning_rate": 9.893266848098826e-08, |
| "loss": 0.3405, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.21719086759418046, |
| "learning_rate": 9.709808156034394e-08, |
| "loss": 0.3259, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.8336, |
| "grad_norm": 0.22490178530224567, |
| "learning_rate": 9.528049752636714e-08, |
| "loss": 0.2998, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.8352, |
| "grad_norm": 0.22423143548678948, |
| "learning_rate": 9.347992268230022e-08, |
| "loss": 0.32, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.8368, |
| "grad_norm": 0.22427520226672237, |
| "learning_rate": 9.169636327239883e-08, |
| "loss": 0.3331, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.8384, |
| "grad_norm": 0.21065924937423897, |
| "learning_rate": 8.992982548190809e-08, |
| "loss": 0.3099, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.25062026950079414, |
| "learning_rate": 8.818031543704641e-08, |
| "loss": 0.3381, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.8416, |
| "grad_norm": 0.21740561726351526, |
| "learning_rate": 8.644783920498001e-08, |
| "loss": 0.3283, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.8432, |
| "grad_norm": 0.22475950118988236, |
| "learning_rate": 8.473240279380235e-08, |
| "loss": 0.3269, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.8448, |
| "grad_norm": 0.21254210022931266, |
| "learning_rate": 8.303401215251583e-08, |
| "loss": 0.315, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.8464, |
| "grad_norm": 0.2623851325098313, |
| "learning_rate": 8.135267317100792e-08, |
| "loss": 0.3277, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.22146889766509964, |
| "learning_rate": 7.968839168003395e-08, |
| "loss": 0.3201, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.8496, |
| "grad_norm": 0.22845807125190054, |
| "learning_rate": 7.804117345119266e-08, |
| "loss": 0.3444, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.8512, |
| "grad_norm": 0.2158605531875498, |
| "learning_rate": 7.64110241969107e-08, |
| "loss": 0.3099, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.8528000000000002, |
| "grad_norm": 0.2184699356957489, |
| "learning_rate": 7.479794957042041e-08, |
| "loss": 0.304, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.8544, |
| "grad_norm": 0.218936748641115, |
| "learning_rate": 7.320195516574036e-08, |
| "loss": 0.3248, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 0.2227962983381659, |
| "learning_rate": 7.16230465176565e-08, |
| "loss": 0.3127, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.8576, |
| "grad_norm": 0.21256127298231967, |
| "learning_rate": 7.00612291017022e-08, |
| "loss": 0.3082, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.8592, |
| "grad_norm": 0.23639661388900624, |
| "learning_rate": 6.851650833414103e-08, |
| "loss": 0.3093, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.8608000000000002, |
| "grad_norm": 0.2255316007704173, |
| "learning_rate": 6.698888957194505e-08, |
| "loss": 0.3498, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.8624, |
| "grad_norm": 0.2270856100309182, |
| "learning_rate": 6.547837811277824e-08, |
| "loss": 0.3202, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.21454337991629174, |
| "learning_rate": 6.39849791949787e-08, |
| "loss": 0.3066, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.8656, |
| "grad_norm": 0.2771968384507702, |
| "learning_rate": 6.250869799753866e-08, |
| "loss": 0.3224, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.8672, |
| "grad_norm": 0.22519688776608196, |
| "learning_rate": 6.104953964008897e-08, |
| "loss": 0.3224, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.8688000000000002, |
| "grad_norm": 0.2169444829878765, |
| "learning_rate": 5.960750918287627e-08, |
| "loss": 0.3201, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.8704, |
| "grad_norm": 0.2182601387983194, |
| "learning_rate": 5.818261162675309e-08, |
| "loss": 0.3002, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 0.22335441645440832, |
| "learning_rate": 5.677485191315391e-08, |
| "loss": 0.327, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.8736, |
| "grad_norm": 0.21320665900271804, |
| "learning_rate": 5.538423492408129e-08, |
| "loss": 0.3309, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.8752, |
| "grad_norm": 0.21124484626736206, |
| "learning_rate": 5.401076548208761e-08, |
| "loss": 0.3095, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.8768000000000002, |
| "grad_norm": 0.209789209001171, |
| "learning_rate": 5.265444835025946e-08, |
| "loss": 0.308, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.8784, |
| "grad_norm": 0.21082703846136128, |
| "learning_rate": 5.1315288232201e-08, |
| "loss": 0.3027, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.2241486577523718, |
| "learning_rate": 4.9993289772015116e-08, |
| "loss": 0.3046, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.8816, |
| "grad_norm": 0.22039426772862553, |
| "learning_rate": 4.8688457554291746e-08, |
| "loss": 0.3268, |
| "step": 1801 |
| }, |
| { |
| "epoch": 2.8832, |
| "grad_norm": 0.2266636115327991, |
| "learning_rate": 4.7400796104088434e-08, |
| "loss": 0.3118, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.8848000000000003, |
| "grad_norm": 0.21462101429308708, |
| "learning_rate": 4.613030988691536e-08, |
| "loss": 0.3221, |
| "step": 1803 |
| }, |
| { |
| "epoch": 2.8864, |
| "grad_norm": 0.2180338651345845, |
| "learning_rate": 4.4877003308722575e-08, |
| "loss": 0.3151, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 0.21458515737298609, |
| "learning_rate": 4.364088071587891e-08, |
| "loss": 0.3133, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.8895999999999997, |
| "grad_norm": 0.21028077928110264, |
| "learning_rate": 4.2421946395164174e-08, |
| "loss": 0.3061, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.8912, |
| "grad_norm": 0.2283355864154111, |
| "learning_rate": 4.1220204573747534e-08, |
| "loss": 0.3293, |
| "step": 1807 |
| }, |
| { |
| "epoch": 2.8928000000000003, |
| "grad_norm": 0.24098823922096244, |
| "learning_rate": 4.0035659419178086e-08, |
| "loss": 0.3253, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.8944, |
| "grad_norm": 0.21674802287595166, |
| "learning_rate": 3.88683150393665e-08, |
| "loss": 0.3214, |
| "step": 1809 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.21125888220036507, |
| "learning_rate": 3.771817548257395e-08, |
| "loss": 0.3278, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.8975999999999997, |
| "grad_norm": 0.2110110005107259, |
| "learning_rate": 3.658524473739544e-08, |
| "loss": 0.3152, |
| "step": 1811 |
| }, |
| { |
| "epoch": 2.8992, |
| "grad_norm": 0.2112810298089397, |
| "learning_rate": 3.546952673274817e-08, |
| "loss": 0.3122, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.9008000000000003, |
| "grad_norm": 0.21686459723808196, |
| "learning_rate": 3.437102533785541e-08, |
| "loss": 0.3035, |
| "step": 1813 |
| }, |
| { |
| "epoch": 2.9024, |
| "grad_norm": 0.23112467182421306, |
| "learning_rate": 3.328974436223709e-08, |
| "loss": 0.3531, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 0.24485059349581315, |
| "learning_rate": 3.2225687555690886e-08, |
| "loss": 0.3093, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.9055999999999997, |
| "grad_norm": 0.2173972572172644, |
| "learning_rate": 3.117885860828396e-08, |
| "loss": 0.3198, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.9072, |
| "grad_norm": 0.21619327927651127, |
| "learning_rate": 3.014926115034012e-08, |
| "loss": 0.3289, |
| "step": 1817 |
| }, |
| { |
| "epoch": 2.9088000000000003, |
| "grad_norm": 0.22134338992482136, |
| "learning_rate": 2.9136898752422648e-08, |
| "loss": 0.335, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.9104, |
| "grad_norm": 0.2198419217011243, |
| "learning_rate": 2.8141774925327103e-08, |
| "loss": 0.3142, |
| "step": 1819 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 0.2301999974872236, |
| "learning_rate": 2.7163893120066288e-08, |
| "loss": 0.3268, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.9135999999999997, |
| "grad_norm": 0.22750268375394597, |
| "learning_rate": 2.6203256727859172e-08, |
| "loss": 0.3332, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.9152, |
| "grad_norm": 0.22397299041754037, |
| "learning_rate": 2.5259869080118127e-08, |
| "loss": 0.3329, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.9168, |
| "grad_norm": 0.22242644370538295, |
| "learning_rate": 2.4333733448440033e-08, |
| "loss": 0.3225, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.9184, |
| "grad_norm": 0.21704950438296636, |
| "learning_rate": 2.34248530445913e-08, |
| "loss": 0.3047, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.22354436573254682, |
| "learning_rate": 2.2533231020499536e-08, |
| "loss": 0.3192, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.9215999999999998, |
| "grad_norm": 0.22396399934180627, |
| "learning_rate": 2.1658870468241332e-08, |
| "loss": 0.3248, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.9232, |
| "grad_norm": 0.2161600594391088, |
| "learning_rate": 2.0801774420031172e-08, |
| "loss": 0.318, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.9248, |
| "grad_norm": 0.2247284799603974, |
| "learning_rate": 1.9961945848213092e-08, |
| "loss": 0.3321, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.9264, |
| "grad_norm": 0.2356035937878808, |
| "learning_rate": 1.9139387665247922e-08, |
| "loss": 0.3332, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.21788694833083858, |
| "learning_rate": 1.8334102723703286e-08, |
| "loss": 0.3254, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.9295999999999998, |
| "grad_norm": 0.21483486857926812, |
| "learning_rate": 1.754609381624639e-08, |
| "loss": 0.312, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.9312, |
| "grad_norm": 0.21473960873155795, |
| "learning_rate": 1.677536367563126e-08, |
| "loss": 0.3078, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.9328, |
| "grad_norm": 0.2194043212864441, |
| "learning_rate": 1.6021914974690413e-08, |
| "loss": 0.3033, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.9344, |
| "grad_norm": 0.22461766430920085, |
| "learning_rate": 1.5285750326325953e-08, |
| "loss": 0.3157, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 0.23058415028726756, |
| "learning_rate": 1.4566872283500733e-08, |
| "loss": 0.3212, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.9375999999999998, |
| "grad_norm": 0.22146061977655218, |
| "learning_rate": 1.3865283339228319e-08, |
| "loss": 0.318, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.9392, |
| "grad_norm": 0.21867608604796965, |
| "learning_rate": 1.3180985926564693e-08, |
| "loss": 0.3174, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.9408, |
| "grad_norm": 0.21466697149190353, |
| "learning_rate": 1.2513982418601024e-08, |
| "loss": 0.3196, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.9424, |
| "grad_norm": 0.24928317855830584, |
| "learning_rate": 1.1864275128454783e-08, |
| "loss": 0.3206, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.21471395789275188, |
| "learning_rate": 1.1231866309259764e-08, |
| "loss": 0.3393, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.9455999999999998, |
| "grad_norm": 0.21799085773713728, |
| "learning_rate": 1.0616758154161633e-08, |
| "loss": 0.3118, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.9472, |
| "grad_norm": 0.21631015269736178, |
| "learning_rate": 1.0018952796307934e-08, |
| "loss": 0.3146, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.9488, |
| "grad_norm": 0.22115654698430673, |
| "learning_rate": 9.438452308841995e-09, |
| "loss": 0.3145, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.9504, |
| "grad_norm": 0.22246341385640273, |
| "learning_rate": 8.87525870489514e-09, |
| "loss": 0.3165, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 0.2352539010258673, |
| "learning_rate": 8.329373937578378e-09, |
| "loss": 0.3161, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.9536, |
| "grad_norm": 0.2690725092681549, |
| "learning_rate": 7.800799899979061e-09, |
| "loss": 0.3085, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.9552, |
| "grad_norm": 0.24313418373468992, |
| "learning_rate": 7.289538425150899e-09, |
| "loss": 0.3241, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.9568, |
| "grad_norm": 0.2019140561461267, |
| "learning_rate": 6.7955912861095155e-09, |
| "loss": 0.3087, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.9584, |
| "grad_norm": 0.21705271011126567, |
| "learning_rate": 6.31896019582523e-09, |
| "loss": 0.3115, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.21476782208157766, |
| "learning_rate": 5.8596468072180665e-09, |
| "loss": 0.3069, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.9616, |
| "grad_norm": 0.21467894968932114, |
| "learning_rate": 5.417652713152199e-09, |
| "loss": 0.3159, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.9632, |
| "grad_norm": 0.21263577895555094, |
| "learning_rate": 4.992979446428736e-09, |
| "loss": 0.3101, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.9648, |
| "grad_norm": 0.22034327616705476, |
| "learning_rate": 4.585628479781279e-09, |
| "loss": 0.3108, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.9664, |
| "grad_norm": 0.21503021549275733, |
| "learning_rate": 4.195601225872592e-09, |
| "loss": 0.3269, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 0.2386268890930576, |
| "learning_rate": 3.822899037286276e-09, |
| "loss": 0.2894, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.9696, |
| "grad_norm": 0.22025291661505747, |
| "learning_rate": 3.4675232065256583e-09, |
| "loss": 0.3242, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.9712, |
| "grad_norm": 0.22204866649984245, |
| "learning_rate": 3.129474966006574e-09, |
| "loss": 0.2901, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.9728, |
| "grad_norm": 0.21865306622100167, |
| "learning_rate": 2.808755488054038e-09, |
| "loss": 0.3218, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.9744, |
| "grad_norm": 0.24604769722633632, |
| "learning_rate": 2.5053658848989137e-09, |
| "loss": 0.3101, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.21707058323617917, |
| "learning_rate": 2.219307208672361e-09, |
| "loss": 0.3202, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.9776, |
| "grad_norm": 0.22282509949294219, |
| "learning_rate": 1.9505804514047266e-09, |
| "loss": 0.321, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.9792, |
| "grad_norm": 0.2266478424403301, |
| "learning_rate": 1.6991865450188827e-09, |
| "loss": 0.3114, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.9808, |
| "grad_norm": 0.21212532120279726, |
| "learning_rate": 1.465126361330227e-09, |
| "loss": 0.2931, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.9824, |
| "grad_norm": 0.22235746929647765, |
| "learning_rate": 1.2484007120411312e-09, |
| "loss": 0.3205, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 0.22417093905338267, |
| "learning_rate": 1.0490103487392766e-09, |
| "loss": 0.3339, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.9856, |
| "grad_norm": 0.2668196996344108, |
| "learning_rate": 8.669559628954327e-10, |
| "loss": 0.3161, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.9872, |
| "grad_norm": 0.21542687183161902, |
| "learning_rate": 7.02238185860682e-10, |
| "loss": 0.3067, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.9888, |
| "grad_norm": 0.2191093924983881, |
| "learning_rate": 5.54857588862534e-10, |
| "loss": 0.3285, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.9904, |
| "grad_norm": 0.2232100843637184, |
| "learning_rate": 4.2481468300603625e-10, |
| "loss": 0.3346, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.21208992936533716, |
| "learning_rate": 3.1210991927044244e-10, |
| "loss": 0.3148, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.9936, |
| "grad_norm": 0.21003007518746872, |
| "learning_rate": 2.167436885064378e-10, |
| "loss": 0.2988, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.9952, |
| "grad_norm": 0.2151423633204371, |
| "learning_rate": 1.387163214372489e-10, |
| "loss": 0.3164, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.9968, |
| "grad_norm": 0.2585107214211105, |
| "learning_rate": 7.80280886558682e-11, |
| "loss": 0.3382, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.9984, |
| "grad_norm": 0.20860176139573655, |
| "learning_rate": 3.467920062394381e-11, |
| "loss": 0.3121, |
| "step": 1874 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.21489732094155842, |
| "learning_rate": 8.669807672334606e-12, |
| "loss": 0.314, |
| "step": 1875 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1663721130819584.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|