{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17038839087096866, "eval_steps": 1000, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.466021715053814e-06, "grad_norm": 1294.9527587890625, "learning_rate": 6.309148264984227e-10, "loss": 172.0, "step": 1 }, { "epoch": 1.893204343010763e-05, "grad_norm": 1150.3070068359375, "learning_rate": 1.2618296529968454e-09, "loss": 125.1875, "step": 2 }, { "epoch": 2.8398065145161443e-05, "grad_norm": 2059.051513671875, "learning_rate": 1.892744479495268e-09, "loss": 132.1875, "step": 3 }, { "epoch": 3.786408686021526e-05, "grad_norm": 3.2430968284606934, "learning_rate": 2.523659305993691e-09, "loss": 0.9551, "step": 4 }, { "epoch": 4.733010857526907e-05, "grad_norm": 2595.162109375, "learning_rate": 3.1545741324921134e-09, "loss": 178.25, "step": 5 }, { "epoch": 5.6796130290322886e-05, "grad_norm": 2045.851318359375, "learning_rate": 3.785488958990536e-09, "loss": 124.25, "step": 6 }, { "epoch": 6.626215200537671e-05, "grad_norm": 1506.7730712890625, "learning_rate": 4.4164037854889584e-09, "loss": 196.5, "step": 7 }, { "epoch": 7.572817372043051e-05, "grad_norm": 3.4059131145477295, "learning_rate": 5.047318611987382e-09, "loss": 0.8076, "step": 8 }, { "epoch": 8.519419543548434e-05, "grad_norm": 2373.244140625, "learning_rate": 5.678233438485804e-09, "loss": 171.25, "step": 9 }, { "epoch": 9.466021715053814e-05, "grad_norm": 2.5038576126098633, "learning_rate": 6.309148264984227e-09, "loss": 0.7388, "step": 10 }, { "epoch": 0.00010412623886559196, "grad_norm": 6842.2763671875, "learning_rate": 6.94006309148265e-09, "loss": 133.3125, "step": 11 }, { "epoch": 0.00011359226058064577, "grad_norm": 1099.3135986328125, "learning_rate": 7.570977917981072e-09, "loss": 126.0625, "step": 12 }, { "epoch": 0.0001230582822956996, "grad_norm": 1913.849609375, "learning_rate": 8.201892744479495e-09, "loss": 176.5625, "step": 13 }, { "epoch": 0.00013252430401075341, "grad_norm": 1012.0535278320312, "learning_rate": 8.832807570977917e-09, "loss": 126.9375, "step": 14 }, { "epoch": 0.0001419903257258072, "grad_norm": 3769.14453125, "learning_rate": 9.46372239747634e-09, "loss": 127.5625, "step": 15 }, { "epoch": 0.00015145634744086103, "grad_norm": 1569.410888671875, "learning_rate": 1.0094637223974764e-08, "loss": 129.6875, "step": 16 }, { "epoch": 0.00016092236915591485, "grad_norm": 2181.215087890625, "learning_rate": 1.0725552050473185e-08, "loss": 175.0, "step": 17 }, { "epoch": 0.00017038839087096867, "grad_norm": 1300.0615234375, "learning_rate": 1.1356466876971609e-08, "loss": 133.0, "step": 18 }, { "epoch": 0.00017985441258602247, "grad_norm": 1421.8985595703125, "learning_rate": 1.1987381703470032e-08, "loss": 115.375, "step": 19 }, { "epoch": 0.00018932043430107629, "grad_norm": 1994.2186279296875, "learning_rate": 1.2618296529968454e-08, "loss": 202.0, "step": 20 }, { "epoch": 0.0001987864560161301, "grad_norm": 7314.115234375, "learning_rate": 1.3249211356466877e-08, "loss": 136.75, "step": 21 }, { "epoch": 0.00020825247773118393, "grad_norm": 3190.08642578125, "learning_rate": 1.38801261829653e-08, "loss": 126.9375, "step": 22 }, { "epoch": 0.00021771849944623772, "grad_norm": 1441.97119140625, "learning_rate": 1.4511041009463722e-08, "loss": 176.875, "step": 23 }, { "epoch": 0.00022718452116129154, "grad_norm": 2326.063232421875, "learning_rate": 1.5141955835962144e-08, "loss": 153.5, "step": 24 }, { "epoch": 0.00023665054287634536, "grad_norm": 3041.300048828125, "learning_rate": 1.5772870662460567e-08, "loss": 183.8125, "step": 25 }, { "epoch": 0.0002461165645913992, "grad_norm": 1888.8056640625, "learning_rate": 1.640378548895899e-08, "loss": 138.5625, "step": 26 }, { "epoch": 0.000255582586306453, "grad_norm": 1124.2860107421875, "learning_rate": 1.7034700315457414e-08, "loss": 130.75, "step": 27 }, { "epoch": 0.00026504860802150683, "grad_norm": 4134.025390625, "learning_rate": 1.7665615141955834e-08, "loss": 158.4375, "step": 28 }, { "epoch": 0.0002745146297365606, "grad_norm": 1149.139892578125, "learning_rate": 1.829652996845426e-08, "loss": 118.9375, "step": 29 }, { "epoch": 0.0002839806514516144, "grad_norm": 980.4683837890625, "learning_rate": 1.892744479495268e-08, "loss": 122.6875, "step": 30 }, { "epoch": 0.00029344667316666824, "grad_norm": 2778.408447265625, "learning_rate": 1.9558359621451104e-08, "loss": 146.5625, "step": 31 }, { "epoch": 0.00030291269488172206, "grad_norm": 1037.8314208984375, "learning_rate": 2.0189274447949527e-08, "loss": 125.8125, "step": 32 }, { "epoch": 0.0003123787165967759, "grad_norm": 2846.587158203125, "learning_rate": 2.082018927444795e-08, "loss": 198.5, "step": 33 }, { "epoch": 0.0003218447383118297, "grad_norm": 1630.4580078125, "learning_rate": 2.145110410094637e-08, "loss": 195.875, "step": 34 }, { "epoch": 0.0003313107600268835, "grad_norm": 1266.4034423828125, "learning_rate": 2.2082018927444794e-08, "loss": 140.8125, "step": 35 }, { "epoch": 0.00034077678174193734, "grad_norm": 4139.9658203125, "learning_rate": 2.2712933753943217e-08, "loss": 132.875, "step": 36 }, { "epoch": 0.0003502428034569911, "grad_norm": 1141.658203125, "learning_rate": 2.3343848580441637e-08, "loss": 125.5625, "step": 37 }, { "epoch": 0.00035970882517204493, "grad_norm": 6168.24853515625, "learning_rate": 2.3974763406940064e-08, "loss": 123.375, "step": 38 }, { "epoch": 0.00036917484688709875, "grad_norm": 973.4960327148438, "learning_rate": 2.4605678233438484e-08, "loss": 121.25, "step": 39 }, { "epoch": 0.00037864086860215257, "grad_norm": 1568.877197265625, "learning_rate": 2.5236593059936907e-08, "loss": 140.125, "step": 40 }, { "epoch": 0.0003881068903172064, "grad_norm": 1471.1729736328125, "learning_rate": 2.586750788643533e-08, "loss": 180.125, "step": 41 }, { "epoch": 0.0003975729120322602, "grad_norm": 4013.028564453125, "learning_rate": 2.6498422712933754e-08, "loss": 175.5625, "step": 42 }, { "epoch": 0.00040703893374731404, "grad_norm": 1544.47216796875, "learning_rate": 2.7129337539432174e-08, "loss": 134.75, "step": 43 }, { "epoch": 0.00041650495546236786, "grad_norm": 1712.1092529296875, "learning_rate": 2.77602523659306e-08, "loss": 204.75, "step": 44 }, { "epoch": 0.0004259709771774216, "grad_norm": 1394.689697265625, "learning_rate": 2.839116719242902e-08, "loss": 150.875, "step": 45 }, { "epoch": 0.00043543699889247544, "grad_norm": 15979.23046875, "learning_rate": 2.9022082018927444e-08, "loss": 133.0, "step": 46 }, { "epoch": 0.00044490302060752927, "grad_norm": 1244.7691650390625, "learning_rate": 2.9652996845425867e-08, "loss": 137.5, "step": 47 }, { "epoch": 0.0004543690423225831, "grad_norm": 1501.9390869140625, "learning_rate": 3.028391167192429e-08, "loss": 123.8125, "step": 48 }, { "epoch": 0.0004638350640376369, "grad_norm": 1997.481201171875, "learning_rate": 3.0914826498422714e-08, "loss": 197.25, "step": 49 }, { "epoch": 0.00047330108575269073, "grad_norm": 1502.2030029296875, "learning_rate": 3.1545741324921134e-08, "loss": 128.3125, "step": 50 }, { "epoch": 0.00048276710746774455, "grad_norm": 1686.3980712890625, "learning_rate": 3.2176656151419554e-08, "loss": 123.1875, "step": 51 }, { "epoch": 0.0004922331291827984, "grad_norm": 1059.7200927734375, "learning_rate": 3.280757097791798e-08, "loss": 127.875, "step": 52 }, { "epoch": 0.0005016991508978521, "grad_norm": 1624.57666015625, "learning_rate": 3.34384858044164e-08, "loss": 181.0, "step": 53 }, { "epoch": 0.000511165172612906, "grad_norm": 5570.09228515625, "learning_rate": 3.406940063091483e-08, "loss": 139.1875, "step": 54 }, { "epoch": 0.0005206311943279598, "grad_norm": 3897.96240234375, "learning_rate": 3.470031545741325e-08, "loss": 162.6875, "step": 55 }, { "epoch": 0.0005300972160430137, "grad_norm": 2696.365234375, "learning_rate": 3.533123028391167e-08, "loss": 124.1875, "step": 56 }, { "epoch": 0.0005395632377580674, "grad_norm": 1353.2523193359375, "learning_rate": 3.596214511041009e-08, "loss": 123.875, "step": 57 }, { "epoch": 0.0005490292594731212, "grad_norm": 2294.989990234375, "learning_rate": 3.659305993690852e-08, "loss": 129.3125, "step": 58 }, { "epoch": 0.0005584952811881751, "grad_norm": 1437.30908203125, "learning_rate": 3.722397476340694e-08, "loss": 131.1875, "step": 59 }, { "epoch": 0.0005679613029032288, "grad_norm": 2410.67724609375, "learning_rate": 3.785488958990536e-08, "loss": 139.1875, "step": 60 }, { "epoch": 0.0005774273246182827, "grad_norm": 3.6927695274353027, "learning_rate": 3.848580441640378e-08, "loss": 0.8262, "step": 61 }, { "epoch": 0.0005868933463333365, "grad_norm": 1964.7734375, "learning_rate": 3.911671924290221e-08, "loss": 225.375, "step": 62 }, { "epoch": 0.0005963593680483903, "grad_norm": 1374.345947265625, "learning_rate": 3.974763406940063e-08, "loss": 129.6875, "step": 63 }, { "epoch": 0.0006058253897634441, "grad_norm": 3316.80078125, "learning_rate": 4.0378548895899054e-08, "loss": 124.375, "step": 64 }, { "epoch": 0.0006152914114784979, "grad_norm": 5896.333984375, "learning_rate": 4.1009463722397474e-08, "loss": 181.75, "step": 65 }, { "epoch": 0.0006247574331935518, "grad_norm": 1705.070068359375, "learning_rate": 4.16403785488959e-08, "loss": 136.875, "step": 66 }, { "epoch": 0.0006342234549086055, "grad_norm": 2275.2607421875, "learning_rate": 4.227129337539432e-08, "loss": 197.375, "step": 67 }, { "epoch": 0.0006436894766236594, "grad_norm": 3376.725830078125, "learning_rate": 4.290220820189274e-08, "loss": 137.9375, "step": 68 }, { "epoch": 0.0006531554983387132, "grad_norm": 2853.64501953125, "learning_rate": 4.353312302839116e-08, "loss": 134.5, "step": 69 }, { "epoch": 0.000662621520053767, "grad_norm": 2388.16845703125, "learning_rate": 4.416403785488959e-08, "loss": 211.375, "step": 70 }, { "epoch": 0.0006720875417688208, "grad_norm": 3.4987003803253174, "learning_rate": 4.4794952681388014e-08, "loss": 0.8208, "step": 71 }, { "epoch": 0.0006815535634838747, "grad_norm": 1166.2076416015625, "learning_rate": 4.5425867507886434e-08, "loss": 125.9375, "step": 72 }, { "epoch": 0.0006910195851989285, "grad_norm": 2768.10693359375, "learning_rate": 4.6056782334384854e-08, "loss": 143.375, "step": 73 }, { "epoch": 0.0007004856069139822, "grad_norm": 12647.724609375, "learning_rate": 4.6687697160883274e-08, "loss": 117.25, "step": 74 }, { "epoch": 0.0007099516286290361, "grad_norm": 1172.857177734375, "learning_rate": 4.73186119873817e-08, "loss": 126.6875, "step": 75 }, { "epoch": 0.0007194176503440899, "grad_norm": 3843.252685546875, "learning_rate": 4.794952681388013e-08, "loss": 190.0625, "step": 76 }, { "epoch": 0.0007288836720591437, "grad_norm": 1368.9404296875, "learning_rate": 4.858044164037855e-08, "loss": 123.875, "step": 77 }, { "epoch": 0.0007383496937741975, "grad_norm": 1601.910400390625, "learning_rate": 4.921135646687697e-08, "loss": 140.25, "step": 78 }, { "epoch": 0.0007478157154892514, "grad_norm": 1746.275634765625, "learning_rate": 4.9842271293375394e-08, "loss": 204.0, "step": 79 }, { "epoch": 0.0007572817372043051, "grad_norm": 1086.4010009765625, "learning_rate": 5.0473186119873814e-08, "loss": 126.375, "step": 80 }, { "epoch": 0.0007667477589193589, "grad_norm": 1508.918701171875, "learning_rate": 5.1104100946372234e-08, "loss": 194.75, "step": 81 }, { "epoch": 0.0007762137806344128, "grad_norm": 1799.4818115234375, "learning_rate": 5.173501577287066e-08, "loss": 163.125, "step": 82 }, { "epoch": 0.0007856798023494666, "grad_norm": 4171.857421875, "learning_rate": 5.236593059936909e-08, "loss": 168.8125, "step": 83 }, { "epoch": 0.0007951458240645204, "grad_norm": 2.688009023666382, "learning_rate": 5.299684542586751e-08, "loss": 0.8501, "step": 84 }, { "epoch": 0.0008046118457795742, "grad_norm": 943.6417846679688, "learning_rate": 5.362776025236593e-08, "loss": 123.0625, "step": 85 }, { "epoch": 0.0008140778674946281, "grad_norm": 7223.08984375, "learning_rate": 5.425867507886435e-08, "loss": 134.6875, "step": 86 }, { "epoch": 0.0008235438892096818, "grad_norm": 1965.09521484375, "learning_rate": 5.488958990536277e-08, "loss": 149.3125, "step": 87 }, { "epoch": 0.0008330099109247357, "grad_norm": 1159.4515380859375, "learning_rate": 5.55205047318612e-08, "loss": 181.0, "step": 88 }, { "epoch": 0.0008424759326397895, "grad_norm": 1556.3966064453125, "learning_rate": 5.615141955835962e-08, "loss": 122.4375, "step": 89 }, { "epoch": 0.0008519419543548432, "grad_norm": 1305.815673828125, "learning_rate": 5.678233438485804e-08, "loss": 136.8125, "step": 90 }, { "epoch": 0.0008614079760698971, "grad_norm": 1126.86572265625, "learning_rate": 5.741324921135646e-08, "loss": 140.5625, "step": 91 }, { "epoch": 0.0008708739977849509, "grad_norm": 1148.200927734375, "learning_rate": 5.804416403785489e-08, "loss": 128.0, "step": 92 }, { "epoch": 0.0008803400195000048, "grad_norm": 1909.9720458984375, "learning_rate": 5.867507886435331e-08, "loss": 119.625, "step": 93 }, { "epoch": 0.0008898060412150585, "grad_norm": 1243.28759765625, "learning_rate": 5.9305993690851735e-08, "loss": 121.625, "step": 94 }, { "epoch": 0.0008992720629301124, "grad_norm": 1465.4775390625, "learning_rate": 5.993690851735016e-08, "loss": 126.375, "step": 95 }, { "epoch": 0.0009087380846451662, "grad_norm": 1299.260498046875, "learning_rate": 6.056782334384857e-08, "loss": 126.875, "step": 96 }, { "epoch": 0.0009182041063602199, "grad_norm": 2510.9443359375, "learning_rate": 6.1198738170347e-08, "loss": 178.625, "step": 97 }, { "epoch": 0.0009276701280752738, "grad_norm": 1369.9324951171875, "learning_rate": 6.182965299684543e-08, "loss": 171.0, "step": 98 }, { "epoch": 0.0009371361497903276, "grad_norm": 2049.669189453125, "learning_rate": 6.246056782334384e-08, "loss": 190.75, "step": 99 }, { "epoch": 0.0009466021715053815, "grad_norm": 930.5064697265625, "learning_rate": 6.309148264984227e-08, "loss": 129.8125, "step": 100 }, { "epoch": 0.0009560681932204352, "grad_norm": 3215.50439453125, "learning_rate": 6.372239747634068e-08, "loss": 129.25, "step": 101 }, { "epoch": 0.0009655342149354891, "grad_norm": 3203.063232421875, "learning_rate": 6.435331230283911e-08, "loss": 131.75, "step": 102 }, { "epoch": 0.0009750002366505429, "grad_norm": 1127.2569580078125, "learning_rate": 6.498422712933755e-08, "loss": 128.4375, "step": 103 }, { "epoch": 0.0009844662583655967, "grad_norm": 1534.4344482421875, "learning_rate": 6.561514195583596e-08, "loss": 124.9375, "step": 104 }, { "epoch": 0.0009939322800806504, "grad_norm": 1447.319091796875, "learning_rate": 6.624605678233439e-08, "loss": 134.875, "step": 105 }, { "epoch": 0.0010033983017957043, "grad_norm": 2187.002197265625, "learning_rate": 6.68769716088328e-08, "loss": 169.0, "step": 106 }, { "epoch": 0.0010128643235107582, "grad_norm": 1135.5521240234375, "learning_rate": 6.750788643533123e-08, "loss": 151.125, "step": 107 }, { "epoch": 0.001022330345225812, "grad_norm": 1968.72998046875, "learning_rate": 6.813880126182965e-08, "loss": 207.4375, "step": 108 }, { "epoch": 0.0010317963669408657, "grad_norm": 1163.6510009765625, "learning_rate": 6.876971608832807e-08, "loss": 172.875, "step": 109 }, { "epoch": 0.0010412623886559196, "grad_norm": 985.3278198242188, "learning_rate": 6.94006309148265e-08, "loss": 126.25, "step": 110 }, { "epoch": 0.0010507284103709734, "grad_norm": 1956.7113037109375, "learning_rate": 7.003154574132492e-08, "loss": 120.3125, "step": 111 }, { "epoch": 0.0010601944320860273, "grad_norm": 2200.677734375, "learning_rate": 7.066246056782333e-08, "loss": 239.625, "step": 112 }, { "epoch": 0.001069660453801081, "grad_norm": 1477.730712890625, "learning_rate": 7.129337539432176e-08, "loss": 136.9375, "step": 113 }, { "epoch": 0.0010791264755161348, "grad_norm": 2043.1361083984375, "learning_rate": 7.192429022082017e-08, "loss": 172.5625, "step": 114 }, { "epoch": 0.0010885924972311887, "grad_norm": 1399.99365234375, "learning_rate": 7.255520504731861e-08, "loss": 196.375, "step": 115 }, { "epoch": 0.0010980585189462424, "grad_norm": 2666.517333984375, "learning_rate": 7.318611987381704e-08, "loss": 121.0, "step": 116 }, { "epoch": 0.0011075245406612963, "grad_norm": 2776.783447265625, "learning_rate": 7.381703470031545e-08, "loss": 166.1875, "step": 117 }, { "epoch": 0.0011169905623763501, "grad_norm": 3128.35595703125, "learning_rate": 7.444794952681388e-08, "loss": 193.5, "step": 118 }, { "epoch": 0.001126456584091404, "grad_norm": 3239.6884765625, "learning_rate": 7.507886435331231e-08, "loss": 126.5, "step": 119 }, { "epoch": 0.0011359226058064577, "grad_norm": 1262.8270263671875, "learning_rate": 7.570977917981072e-08, "loss": 122.9375, "step": 120 }, { "epoch": 0.0011453886275215115, "grad_norm": 1134.215087890625, "learning_rate": 7.634069400630915e-08, "loss": 135.375, "step": 121 }, { "epoch": 0.0011548546492365654, "grad_norm": 1192.8763427734375, "learning_rate": 7.697160883280756e-08, "loss": 132.5, "step": 122 }, { "epoch": 0.001164320670951619, "grad_norm": 1062.813232421875, "learning_rate": 7.760252365930599e-08, "loss": 128.0625, "step": 123 }, { "epoch": 0.001173786692666673, "grad_norm": 1235.7606201171875, "learning_rate": 7.823343848580441e-08, "loss": 135.875, "step": 124 }, { "epoch": 0.0011832527143817268, "grad_norm": 1610.4808349609375, "learning_rate": 7.886435331230283e-08, "loss": 185.75, "step": 125 }, { "epoch": 0.0011927187360967807, "grad_norm": 1205.70751953125, "learning_rate": 7.949526813880126e-08, "loss": 121.5, "step": 126 }, { "epoch": 0.0012021847578118344, "grad_norm": 1320.756103515625, "learning_rate": 8.012618296529967e-08, "loss": 128.375, "step": 127 }, { "epoch": 0.0012116507795268882, "grad_norm": 1072.24365234375, "learning_rate": 8.075709779179811e-08, "loss": 122.4375, "step": 128 }, { "epoch": 0.001221116801241942, "grad_norm": 1348.376220703125, "learning_rate": 8.138801261829654e-08, "loss": 154.125, "step": 129 }, { "epoch": 0.0012305828229569958, "grad_norm": 1366.898193359375, "learning_rate": 8.201892744479495e-08, "loss": 127.0, "step": 130 }, { "epoch": 0.0012400488446720496, "grad_norm": 1560.385009765625, "learning_rate": 8.264984227129338e-08, "loss": 151.3125, "step": 131 }, { "epoch": 0.0012495148663871035, "grad_norm": 1252.733154296875, "learning_rate": 8.32807570977918e-08, "loss": 130.5625, "step": 132 }, { "epoch": 0.0012589808881021574, "grad_norm": 1223.7322998046875, "learning_rate": 8.391167192429022e-08, "loss": 125.1875, "step": 133 }, { "epoch": 0.001268446909817211, "grad_norm": 1506.0888671875, "learning_rate": 8.454258675078864e-08, "loss": 198.375, "step": 134 }, { "epoch": 0.001277912931532265, "grad_norm": 1833.3076171875, "learning_rate": 8.517350157728706e-08, "loss": 125.75, "step": 135 }, { "epoch": 0.0012873789532473188, "grad_norm": 998.892333984375, "learning_rate": 8.580441640378548e-08, "loss": 129.0625, "step": 136 }, { "epoch": 0.0012968449749623725, "grad_norm": 1755.04443359375, "learning_rate": 8.643533123028391e-08, "loss": 129.1875, "step": 137 }, { "epoch": 0.0013063109966774263, "grad_norm": 8819.3583984375, "learning_rate": 8.706624605678232e-08, "loss": 133.25, "step": 138 }, { "epoch": 0.0013157770183924802, "grad_norm": 3.8019556999206543, "learning_rate": 8.769716088328075e-08, "loss": 0.8831, "step": 139 }, { "epoch": 0.001325243040107534, "grad_norm": 1276.834228515625, "learning_rate": 8.832807570977918e-08, "loss": 198.5, "step": 140 }, { "epoch": 0.0013347090618225877, "grad_norm": 1510.272705078125, "learning_rate": 8.89589905362776e-08, "loss": 182.75, "step": 141 }, { "epoch": 0.0013441750835376416, "grad_norm": 1264.444580078125, "learning_rate": 8.958990536277603e-08, "loss": 123.9375, "step": 142 }, { "epoch": 0.0013536411052526955, "grad_norm": 1861.7518310546875, "learning_rate": 9.022082018927444e-08, "loss": 128.1875, "step": 143 }, { "epoch": 0.0013631071269677494, "grad_norm": 2205.286376953125, "learning_rate": 9.085173501577287e-08, "loss": 178.5, "step": 144 }, { "epoch": 0.001372573148682803, "grad_norm": 971.6563720703125, "learning_rate": 9.14826498422713e-08, "loss": 156.4375, "step": 145 }, { "epoch": 0.001382039170397857, "grad_norm": 1339.9732666015625, "learning_rate": 9.211356466876971e-08, "loss": 126.125, "step": 146 }, { "epoch": 0.0013915051921129108, "grad_norm": 1140.3096923828125, "learning_rate": 9.274447949526814e-08, "loss": 139.0625, "step": 147 }, { "epoch": 0.0014009712138279644, "grad_norm": 3349.97412109375, "learning_rate": 9.337539432176655e-08, "loss": 146.875, "step": 148 }, { "epoch": 0.0014104372355430183, "grad_norm": 1194.341064453125, "learning_rate": 9.400630914826498e-08, "loss": 140.75, "step": 149 }, { "epoch": 0.0014199032572580722, "grad_norm": 1417.7950439453125, "learning_rate": 9.46372239747634e-08, "loss": 168.625, "step": 150 }, { "epoch": 0.001429369278973126, "grad_norm": 1442.929443359375, "learning_rate": 9.526813880126182e-08, "loss": 216.625, "step": 151 }, { "epoch": 0.0014388353006881797, "grad_norm": 1922.5452880859375, "learning_rate": 9.589905362776026e-08, "loss": 125.0, "step": 152 }, { "epoch": 0.0014483013224032336, "grad_norm": 1191.654052734375, "learning_rate": 9.652996845425868e-08, "loss": 162.5625, "step": 153 }, { "epoch": 0.0014577673441182875, "grad_norm": 1889.33544921875, "learning_rate": 9.71608832807571e-08, "loss": 187.75, "step": 154 }, { "epoch": 0.0014672333658333411, "grad_norm": 2.8472564220428467, "learning_rate": 9.779179810725552e-08, "loss": 0.9492, "step": 155 }, { "epoch": 0.001476699387548395, "grad_norm": 4.297066688537598, "learning_rate": 9.842271293375394e-08, "loss": 0.8535, "step": 156 }, { "epoch": 0.0014861654092634489, "grad_norm": 1217.4825439453125, "learning_rate": 9.905362776025236e-08, "loss": 180.875, "step": 157 }, { "epoch": 0.0014956314309785028, "grad_norm": 6719.46435546875, "learning_rate": 9.968454258675079e-08, "loss": 154.125, "step": 158 }, { "epoch": 0.0015050974526935564, "grad_norm": 6595.26904296875, "learning_rate": 1.003154574132492e-07, "loss": 136.75, "step": 159 }, { "epoch": 0.0015145634744086103, "grad_norm": 1852.66650390625, "learning_rate": 1.0094637223974763e-07, "loss": 211.75, "step": 160 }, { "epoch": 0.0015240294961236642, "grad_norm": 1454.290771484375, "learning_rate": 1.0157728706624604e-07, "loss": 195.3125, "step": 161 }, { "epoch": 0.0015334955178387178, "grad_norm": 1459.72119140625, "learning_rate": 1.0220820189274447e-07, "loss": 117.875, "step": 162 }, { "epoch": 0.0015429615395537717, "grad_norm": 1639.9857177734375, "learning_rate": 1.028391167192429e-07, "loss": 196.25, "step": 163 }, { "epoch": 0.0015524275612688256, "grad_norm": 1589.5120849609375, "learning_rate": 1.0347003154574132e-07, "loss": 182.75, "step": 164 }, { "epoch": 0.0015618935829838794, "grad_norm": 1731.669189453125, "learning_rate": 1.0410094637223975e-07, "loss": 212.125, "step": 165 }, { "epoch": 0.001571359604698933, "grad_norm": 1750.7200927734375, "learning_rate": 1.0473186119873818e-07, "loss": 177.3125, "step": 166 }, { "epoch": 0.001580825626413987, "grad_norm": 1236.463623046875, "learning_rate": 1.0536277602523659e-07, "loss": 126.3125, "step": 167 }, { "epoch": 0.0015902916481290409, "grad_norm": 2817.97802734375, "learning_rate": 1.0599369085173502e-07, "loss": 119.0625, "step": 168 }, { "epoch": 0.0015997576698440945, "grad_norm": 1407.7593994140625, "learning_rate": 1.0662460567823343e-07, "loss": 131.625, "step": 169 }, { "epoch": 0.0016092236915591484, "grad_norm": 1887.056884765625, "learning_rate": 1.0725552050473186e-07, "loss": 130.0625, "step": 170 }, { "epoch": 0.0016186897132742023, "grad_norm": 967.4691162109375, "learning_rate": 1.0788643533123028e-07, "loss": 122.5625, "step": 171 }, { "epoch": 0.0016281557349892561, "grad_norm": 1067.241943359375, "learning_rate": 1.085173501577287e-07, "loss": 133.875, "step": 172 }, { "epoch": 0.0016376217567043098, "grad_norm": 1179.1019287109375, "learning_rate": 1.0914826498422712e-07, "loss": 126.3125, "step": 173 }, { "epoch": 0.0016470877784193637, "grad_norm": 1104.7216796875, "learning_rate": 1.0977917981072554e-07, "loss": 169.5625, "step": 174 }, { "epoch": 0.0016565538001344176, "grad_norm": 2763.822998046875, "learning_rate": 1.1041009463722396e-07, "loss": 119.125, "step": 175 }, { "epoch": 0.0016660198218494714, "grad_norm": 1609.243408203125, "learning_rate": 1.110410094637224e-07, "loss": 180.875, "step": 176 }, { "epoch": 0.001675485843564525, "grad_norm": 1183.4676513671875, "learning_rate": 1.1167192429022082e-07, "loss": 160.9375, "step": 177 }, { "epoch": 0.001684951865279579, "grad_norm": 3891.11181640625, "learning_rate": 1.1230283911671924e-07, "loss": 145.0625, "step": 178 }, { "epoch": 0.0016944178869946328, "grad_norm": 1784.876220703125, "learning_rate": 1.1293375394321767e-07, "loss": 120.1875, "step": 179 }, { "epoch": 0.0017038839087096865, "grad_norm": 1114.09375, "learning_rate": 1.1356466876971608e-07, "loss": 121.0625, "step": 180 }, { "epoch": 0.0017133499304247404, "grad_norm": 1295.1387939453125, "learning_rate": 1.1419558359621451e-07, "loss": 192.6875, "step": 181 }, { "epoch": 0.0017228159521397942, "grad_norm": 1200.2640380859375, "learning_rate": 1.1482649842271292e-07, "loss": 135.6875, "step": 182 }, { "epoch": 0.0017322819738548481, "grad_norm": 1294.312255859375, "learning_rate": 1.1545741324921135e-07, "loss": 177.375, "step": 183 }, { "epoch": 0.0017417479955699018, "grad_norm": 3060.49853515625, "learning_rate": 1.1608832807570978e-07, "loss": 123.125, "step": 184 }, { "epoch": 0.0017512140172849557, "grad_norm": 1267.408935546875, "learning_rate": 1.1671924290220819e-07, "loss": 117.125, "step": 185 }, { "epoch": 0.0017606800390000095, "grad_norm": 1071.64892578125, "learning_rate": 1.1735015772870662e-07, "loss": 116.75, "step": 186 }, { "epoch": 0.0017701460607150632, "grad_norm": 1181.3748779296875, "learning_rate": 1.1798107255520503e-07, "loss": 170.0, "step": 187 }, { "epoch": 0.001779612082430117, "grad_norm": 1739.52294921875, "learning_rate": 1.1861198738170347e-07, "loss": 164.625, "step": 188 }, { "epoch": 0.001789078104145171, "grad_norm": 1264.677978515625, "learning_rate": 1.1924290220820188e-07, "loss": 113.75, "step": 189 }, { "epoch": 0.0017985441258602248, "grad_norm": 1270.1224365234375, "learning_rate": 1.1987381703470032e-07, "loss": 117.5625, "step": 190 }, { "epoch": 0.0018080101475752785, "grad_norm": 1423.93701171875, "learning_rate": 1.2050473186119874e-07, "loss": 153.125, "step": 191 }, { "epoch": 0.0018174761692903323, "grad_norm": 1510.7283935546875, "learning_rate": 1.2113564668769715e-07, "loss": 175.8125, "step": 192 }, { "epoch": 0.0018269421910053862, "grad_norm": 1264.1031494140625, "learning_rate": 1.217665615141956e-07, "loss": 135.75, "step": 193 }, { "epoch": 0.0018364082127204399, "grad_norm": 3.618422508239746, "learning_rate": 1.22397476340694e-07, "loss": 0.9531, "step": 194 }, { "epoch": 0.0018458742344354938, "grad_norm": 1803.1566162109375, "learning_rate": 1.2302839116719242e-07, "loss": 156.75, "step": 195 }, { "epoch": 0.0018553402561505476, "grad_norm": 3.9544999599456787, "learning_rate": 1.2365930599369086e-07, "loss": 1.0532, "step": 196 }, { "epoch": 0.0018648062778656015, "grad_norm": 1403.880126953125, "learning_rate": 1.2429022082018927e-07, "loss": 132.125, "step": 197 }, { "epoch": 0.0018742722995806552, "grad_norm": 1507.85595703125, "learning_rate": 1.2492113564668768e-07, "loss": 136.0, "step": 198 }, { "epoch": 0.001883738321295709, "grad_norm": 1431.7271728515625, "learning_rate": 1.2555205047318612e-07, "loss": 114.1875, "step": 199 }, { "epoch": 0.001893204343010763, "grad_norm": 1307.0106201171875, "learning_rate": 1.2618296529968454e-07, "loss": 110.75, "step": 200 }, { "epoch": 0.0019026703647258166, "grad_norm": 2603.2822265625, "learning_rate": 1.2681388012618298e-07, "loss": 182.75, "step": 201 }, { "epoch": 0.0019121363864408704, "grad_norm": 1452.68359375, "learning_rate": 1.2744479495268136e-07, "loss": 120.5625, "step": 202 }, { "epoch": 0.0019216024081559243, "grad_norm": 1412.5966796875, "learning_rate": 1.280757097791798e-07, "loss": 124.0, "step": 203 }, { "epoch": 0.0019310684298709782, "grad_norm": 1154.302734375, "learning_rate": 1.2870662460567822e-07, "loss": 124.25, "step": 204 }, { "epoch": 0.0019405344515860319, "grad_norm": 1089.109619140625, "learning_rate": 1.2933753943217666e-07, "loss": 109.6875, "step": 205 }, { "epoch": 0.0019500004733010857, "grad_norm": 2484.768798828125, "learning_rate": 1.299684542586751e-07, "loss": 129.3125, "step": 206 }, { "epoch": 0.0019594664950161396, "grad_norm": 1709.1727294921875, "learning_rate": 1.3059936908517348e-07, "loss": 123.625, "step": 207 }, { "epoch": 0.0019689325167311935, "grad_norm": 1218.867431640625, "learning_rate": 1.3123028391167192e-07, "loss": 135.0, "step": 208 }, { "epoch": 0.0019783985384462474, "grad_norm": 2.9123761653900146, "learning_rate": 1.3186119873817034e-07, "loss": 0.9297, "step": 209 }, { "epoch": 0.001987864560161301, "grad_norm": 3058.292724609375, "learning_rate": 1.3249211356466878e-07, "loss": 166.5, "step": 210 }, { "epoch": 0.0019973305818763547, "grad_norm": 1505.275634765625, "learning_rate": 1.331230283911672e-07, "loss": 116.5625, "step": 211 }, { "epoch": 0.0020067966035914086, "grad_norm": 1970.2532958984375, "learning_rate": 1.337539432176656e-07, "loss": 173.25, "step": 212 }, { "epoch": 0.0020162626253064624, "grad_norm": 1390.5252685546875, "learning_rate": 1.3438485804416402e-07, "loss": 108.625, "step": 213 }, { "epoch": 0.0020257286470215163, "grad_norm": 1182.1207275390625, "learning_rate": 1.3501577287066246e-07, "loss": 174.625, "step": 214 }, { "epoch": 0.00203519466873657, "grad_norm": 1183.169677734375, "learning_rate": 1.3564668769716087e-07, "loss": 156.25, "step": 215 }, { "epoch": 0.002044660690451624, "grad_norm": 2129.21875, "learning_rate": 1.362776025236593e-07, "loss": 108.375, "step": 216 }, { "epoch": 0.0020541267121666775, "grad_norm": 1442.5628662109375, "learning_rate": 1.369085173501577e-07, "loss": 186.6875, "step": 217 }, { "epoch": 0.0020635927338817314, "grad_norm": 1893.0543212890625, "learning_rate": 1.3753943217665614e-07, "loss": 111.4375, "step": 218 }, { "epoch": 0.0020730587555967852, "grad_norm": 1824.6494140625, "learning_rate": 1.3817034700315458e-07, "loss": 163.8125, "step": 219 }, { "epoch": 0.002082524777311839, "grad_norm": 1242.5086669921875, "learning_rate": 1.38801261829653e-07, "loss": 117.625, "step": 220 }, { "epoch": 0.002091990799026893, "grad_norm": 1262.830810546875, "learning_rate": 1.3943217665615143e-07, "loss": 116.8125, "step": 221 }, { "epoch": 0.002101456820741947, "grad_norm": 2042.3402099609375, "learning_rate": 1.4006309148264984e-07, "loss": 119.0625, "step": 222 }, { "epoch": 0.0021109228424570007, "grad_norm": 2098.223876953125, "learning_rate": 1.4069400630914826e-07, "loss": 121.25, "step": 223 }, { "epoch": 0.0021203888641720546, "grad_norm": 1642.031494140625, "learning_rate": 1.4132492113564667e-07, "loss": 149.8125, "step": 224 }, { "epoch": 0.002129854885887108, "grad_norm": 1537.3067626953125, "learning_rate": 1.419558359621451e-07, "loss": 114.75, "step": 225 }, { "epoch": 0.002139320907602162, "grad_norm": 1660.147705078125, "learning_rate": 1.4258675078864352e-07, "loss": 166.5625, "step": 226 }, { "epoch": 0.002148786929317216, "grad_norm": 1099.3323974609375, "learning_rate": 1.4321766561514196e-07, "loss": 158.5625, "step": 227 }, { "epoch": 0.0021582529510322697, "grad_norm": 1468.4925537109375, "learning_rate": 1.4384858044164035e-07, "loss": 108.9375, "step": 228 }, { "epoch": 0.0021677189727473236, "grad_norm": 2.995248556137085, "learning_rate": 1.444794952681388e-07, "loss": 0.7837, "step": 229 }, { "epoch": 0.0021771849944623774, "grad_norm": 1293.233154296875, "learning_rate": 1.4511041009463723e-07, "loss": 121.0, "step": 230 }, { "epoch": 0.0021866510161774313, "grad_norm": 1048.2811279296875, "learning_rate": 1.4574132492113564e-07, "loss": 146.25, "step": 231 }, { "epoch": 0.0021961170378924848, "grad_norm": 1430.478515625, "learning_rate": 1.4637223974763408e-07, "loss": 188.125, "step": 232 }, { "epoch": 0.0022055830596075386, "grad_norm": 2140.932861328125, "learning_rate": 1.4700315457413247e-07, "loss": 159.25, "step": 233 }, { "epoch": 0.0022150490813225925, "grad_norm": 1756.155517578125, "learning_rate": 1.476340694006309e-07, "loss": 113.6875, "step": 234 }, { "epoch": 0.0022245151030376464, "grad_norm": 1116.3638916015625, "learning_rate": 1.4826498422712932e-07, "loss": 129.0625, "step": 235 }, { "epoch": 0.0022339811247527003, "grad_norm": 2416.242919921875, "learning_rate": 1.4889589905362776e-07, "loss": 204.8125, "step": 236 }, { "epoch": 0.002243447146467754, "grad_norm": 3.380565643310547, "learning_rate": 1.4952681388012618e-07, "loss": 0.8159, "step": 237 }, { "epoch": 0.002252913168182808, "grad_norm": 2446.087158203125, "learning_rate": 1.5015772870662462e-07, "loss": 131.9375, "step": 238 }, { "epoch": 0.0022623791898978615, "grad_norm": 1765.4295654296875, "learning_rate": 1.50788643533123e-07, "loss": 111.1875, "step": 239 }, { "epoch": 0.0022718452116129153, "grad_norm": 2165.679443359375, "learning_rate": 1.5141955835962144e-07, "loss": 152.5, "step": 240 }, { "epoch": 0.002281311233327969, "grad_norm": 2481.47509765625, "learning_rate": 1.5205047318611986e-07, "loss": 197.5, "step": 241 }, { "epoch": 0.002290777255043023, "grad_norm": 1630.9371337890625, "learning_rate": 1.526813880126183e-07, "loss": 99.6875, "step": 242 }, { "epoch": 0.002300243276758077, "grad_norm": 1309.2384033203125, "learning_rate": 1.5331230283911674e-07, "loss": 108.625, "step": 243 }, { "epoch": 0.002309709298473131, "grad_norm": 2.55615496635437, "learning_rate": 1.5394321766561512e-07, "loss": 0.7717, "step": 244 }, { "epoch": 0.0023191753201881847, "grad_norm": 1510.5743408203125, "learning_rate": 1.5457413249211356e-07, "loss": 181.5, "step": 245 }, { "epoch": 0.002328641341903238, "grad_norm": 3.5962634086608887, "learning_rate": 1.5520504731861198e-07, "loss": 0.8721, "step": 246 }, { "epoch": 0.002338107363618292, "grad_norm": 1075.9527587890625, "learning_rate": 1.5583596214511042e-07, "loss": 117.5625, "step": 247 }, { "epoch": 0.002347573385333346, "grad_norm": 1590.4312744140625, "learning_rate": 1.5646687697160883e-07, "loss": 107.0625, "step": 248 }, { "epoch": 0.0023570394070483998, "grad_norm": 1869.1075439453125, "learning_rate": 1.5709779179810724e-07, "loss": 117.125, "step": 249 }, { "epoch": 0.0023665054287634536, "grad_norm": 1214.6466064453125, "learning_rate": 1.5772870662460566e-07, "loss": 111.5625, "step": 250 }, { "epoch": 0.0023759714504785075, "grad_norm": 1314.9755859375, "learning_rate": 1.583596214511041e-07, "loss": 167.1875, "step": 251 }, { "epoch": 0.0023854374721935614, "grad_norm": 1649.4871826171875, "learning_rate": 1.589905362776025e-07, "loss": 167.375, "step": 252 }, { "epoch": 0.002394903493908615, "grad_norm": 990.2603149414062, "learning_rate": 1.5962145110410095e-07, "loss": 114.875, "step": 253 }, { "epoch": 0.0024043695156236687, "grad_norm": 2154.75244140625, "learning_rate": 1.6025236593059934e-07, "loss": 179.375, "step": 254 }, { "epoch": 0.0024138355373387226, "grad_norm": 1608.8109130859375, "learning_rate": 1.6088328075709778e-07, "loss": 158.625, "step": 255 }, { "epoch": 0.0024233015590537765, "grad_norm": 1513.909423828125, "learning_rate": 1.6151419558359622e-07, "loss": 111.5625, "step": 256 }, { "epoch": 0.0024327675807688303, "grad_norm": 1748.3988037109375, "learning_rate": 1.6214511041009463e-07, "loss": 197.5, "step": 257 }, { "epoch": 0.002442233602483884, "grad_norm": 1518.42724609375, "learning_rate": 1.6277602523659307e-07, "loss": 149.875, "step": 258 }, { "epoch": 0.002451699624198938, "grad_norm": 1262.2388916015625, "learning_rate": 1.6340694006309146e-07, "loss": 165.5625, "step": 259 }, { "epoch": 0.0024611656459139915, "grad_norm": 1460.620849609375, "learning_rate": 1.640378548895899e-07, "loss": 102.875, "step": 260 }, { "epoch": 0.0024706316676290454, "grad_norm": 1272.4871826171875, "learning_rate": 1.646687697160883e-07, "loss": 100.8125, "step": 261 }, { "epoch": 0.0024800976893440993, "grad_norm": 1024.879638671875, "learning_rate": 1.6529968454258675e-07, "loss": 94.625, "step": 262 }, { "epoch": 0.002489563711059153, "grad_norm": 1157.3880615234375, "learning_rate": 1.6593059936908516e-07, "loss": 103.75, "step": 263 }, { "epoch": 0.002499029732774207, "grad_norm": 1094.3538818359375, "learning_rate": 1.665615141955836e-07, "loss": 105.4375, "step": 264 }, { "epoch": 0.002508495754489261, "grad_norm": 1282.859375, "learning_rate": 1.67192429022082e-07, "loss": 103.0, "step": 265 }, { "epoch": 0.0025179617762043148, "grad_norm": 999.6652221679688, "learning_rate": 1.6782334384858043e-07, "loss": 121.375, "step": 266 }, { "epoch": 0.0025274277979193682, "grad_norm": 1098.42041015625, "learning_rate": 1.6845425867507887e-07, "loss": 105.6875, "step": 267 }, { "epoch": 0.002536893819634422, "grad_norm": 1749.0947265625, "learning_rate": 1.6908517350157728e-07, "loss": 106.6875, "step": 268 }, { "epoch": 0.002546359841349476, "grad_norm": 2.4961538314819336, "learning_rate": 1.6971608832807572e-07, "loss": 0.9307, "step": 269 }, { "epoch": 0.00255582586306453, "grad_norm": 1996.1824951171875, "learning_rate": 1.703470031545741e-07, "loss": 97.0, "step": 270 }, { "epoch": 0.0025652918847795837, "grad_norm": 1302.5595703125, "learning_rate": 1.7097791798107255e-07, "loss": 179.8125, "step": 271 }, { "epoch": 0.0025747579064946376, "grad_norm": 1315.5894775390625, "learning_rate": 1.7160883280757096e-07, "loss": 95.0625, "step": 272 }, { "epoch": 0.0025842239282096915, "grad_norm": 1143.633056640625, "learning_rate": 1.722397476340694e-07, "loss": 115.25, "step": 273 }, { "epoch": 0.002593689949924745, "grad_norm": 2092.668701171875, "learning_rate": 1.7287066246056782e-07, "loss": 200.0, "step": 274 }, { "epoch": 0.002603155971639799, "grad_norm": 1296.0836181640625, "learning_rate": 1.7350157728706623e-07, "loss": 110.6875, "step": 275 }, { "epoch": 0.0026126219933548527, "grad_norm": 2300.943603515625, "learning_rate": 1.7413249211356464e-07, "loss": 140.375, "step": 276 }, { "epoch": 0.0026220880150699065, "grad_norm": 1423.821044921875, "learning_rate": 1.7476340694006308e-07, "loss": 97.75, "step": 277 }, { "epoch": 0.0026315540367849604, "grad_norm": 1114.5367431640625, "learning_rate": 1.753943217665615e-07, "loss": 157.8125, "step": 278 }, { "epoch": 0.0026410200585000143, "grad_norm": 1634.1494140625, "learning_rate": 1.7602523659305994e-07, "loss": 103.6875, "step": 279 }, { "epoch": 0.002650486080215068, "grad_norm": 1019.2512817382812, "learning_rate": 1.7665615141955835e-07, "loss": 117.125, "step": 280 }, { "epoch": 0.0026599521019301216, "grad_norm": 1276.6824951171875, "learning_rate": 1.7728706624605676e-07, "loss": 97.4375, "step": 281 }, { "epoch": 0.0026694181236451755, "grad_norm": 945.8740844726562, "learning_rate": 1.779179810725552e-07, "loss": 99.25, "step": 282 }, { "epoch": 0.0026788841453602294, "grad_norm": 1480.15380859375, "learning_rate": 1.7854889589905362e-07, "loss": 95.0625, "step": 283 }, { "epoch": 0.0026883501670752832, "grad_norm": 1357.3424072265625, "learning_rate": 1.7917981072555206e-07, "loss": 156.6875, "step": 284 }, { "epoch": 0.002697816188790337, "grad_norm": 1161.6617431640625, "learning_rate": 1.7981072555205047e-07, "loss": 108.8125, "step": 285 }, { "epoch": 0.002707282210505391, "grad_norm": 5818.056640625, "learning_rate": 1.8044164037854888e-07, "loss": 134.0, "step": 286 }, { "epoch": 0.002716748232220445, "grad_norm": 1151.5064697265625, "learning_rate": 1.810725552050473e-07, "loss": 93.125, "step": 287 }, { "epoch": 0.0027262142539354987, "grad_norm": 3.9509263038635254, "learning_rate": 1.8170347003154574e-07, "loss": 0.9551, "step": 288 }, { "epoch": 0.002735680275650552, "grad_norm": 4691.18017578125, "learning_rate": 1.8233438485804415e-07, "loss": 109.5, "step": 289 }, { "epoch": 0.002745146297365606, "grad_norm": 1262.510498046875, "learning_rate": 1.829652996845426e-07, "loss": 128.1875, "step": 290 }, { "epoch": 0.00275461231908066, "grad_norm": 2.6075966358184814, "learning_rate": 1.83596214511041e-07, "loss": 0.7854, "step": 291 }, { "epoch": 0.002764078340795714, "grad_norm": 1320.4666748046875, "learning_rate": 1.8422712933753942e-07, "loss": 103.875, "step": 292 }, { "epoch": 0.0027735443625107677, "grad_norm": 1305.63427734375, "learning_rate": 1.8485804416403786e-07, "loss": 168.2812, "step": 293 }, { "epoch": 0.0027830103842258216, "grad_norm": 999.3013916015625, "learning_rate": 1.8548895899053627e-07, "loss": 117.8125, "step": 294 }, { "epoch": 0.0027924764059408754, "grad_norm": 1395.64453125, "learning_rate": 1.861198738170347e-07, "loss": 155.3125, "step": 295 }, { "epoch": 0.002801942427655929, "grad_norm": 1065.287841796875, "learning_rate": 1.867507886435331e-07, "loss": 107.25, "step": 296 }, { "epoch": 0.0028114084493709827, "grad_norm": 1983.1998291015625, "learning_rate": 1.8738170347003154e-07, "loss": 195.75, "step": 297 }, { "epoch": 0.0028208744710860366, "grad_norm": 1196.0794677734375, "learning_rate": 1.8801261829652995e-07, "loss": 100.5, "step": 298 }, { "epoch": 0.0028303404928010905, "grad_norm": 2351.440185546875, "learning_rate": 1.886435331230284e-07, "loss": 158.75, "step": 299 }, { "epoch": 0.0028398065145161444, "grad_norm": 2.9501140117645264, "learning_rate": 1.892744479495268e-07, "loss": 0.7939, "step": 300 }, { "epoch": 0.0028492725362311982, "grad_norm": 1298.6029052734375, "learning_rate": 1.8990536277602522e-07, "loss": 184.5, "step": 301 }, { "epoch": 0.002858738557946252, "grad_norm": 1429.816162109375, "learning_rate": 1.9053627760252363e-07, "loss": 167.875, "step": 302 }, { "epoch": 0.0028682045796613056, "grad_norm": 1341.813232421875, "learning_rate": 1.9116719242902207e-07, "loss": 170.9375, "step": 303 }, { "epoch": 0.0028776706013763594, "grad_norm": 1841.7606201171875, "learning_rate": 1.917981072555205e-07, "loss": 104.75, "step": 304 }, { "epoch": 0.0028871366230914133, "grad_norm": 5674.31103515625, "learning_rate": 1.9242902208201892e-07, "loss": 146.875, "step": 305 }, { "epoch": 0.002896602644806467, "grad_norm": 1118.888916015625, "learning_rate": 1.9305993690851736e-07, "loss": 168.9375, "step": 306 }, { "epoch": 0.002906068666521521, "grad_norm": 2.272533416748047, "learning_rate": 1.9369085173501575e-07, "loss": 0.7769, "step": 307 }, { "epoch": 0.002915534688236575, "grad_norm": 1808.4525146484375, "learning_rate": 1.943217665615142e-07, "loss": 134.875, "step": 308 }, { "epoch": 0.002925000709951629, "grad_norm": 1195.366943359375, "learning_rate": 1.949526813880126e-07, "loss": 105.5, "step": 309 }, { "epoch": 0.0029344667316666823, "grad_norm": 960.0848388671875, "learning_rate": 1.9558359621451104e-07, "loss": 99.5, "step": 310 }, { "epoch": 0.002943932753381736, "grad_norm": 1156.8896484375, "learning_rate": 1.9621451104100946e-07, "loss": 94.75, "step": 311 }, { "epoch": 0.00295339877509679, "grad_norm": 1298.240234375, "learning_rate": 1.9684542586750787e-07, "loss": 105.0, "step": 312 }, { "epoch": 0.002962864796811844, "grad_norm": 1073.2266845703125, "learning_rate": 1.9747634069400628e-07, "loss": 185.5625, "step": 313 }, { "epoch": 0.0029723308185268978, "grad_norm": 1210.400146484375, "learning_rate": 1.9810725552050472e-07, "loss": 107.0625, "step": 314 }, { "epoch": 0.0029817968402419516, "grad_norm": 1416.227783203125, "learning_rate": 1.9873817034700316e-07, "loss": 156.25, "step": 315 }, { "epoch": 0.0029912628619570055, "grad_norm": 3768.859130859375, "learning_rate": 1.9936908517350158e-07, "loss": 166.875, "step": 316 }, { "epoch": 0.003000728883672059, "grad_norm": 1410.2275390625, "learning_rate": 2e-07, "loss": 92.0625, "step": 317 }, { "epoch": 0.003010194905387113, "grad_norm": 1235.7955322265625, "learning_rate": 2.006309148264984e-07, "loss": 85.25, "step": 318 }, { "epoch": 0.0030196609271021667, "grad_norm": 1803.3367919921875, "learning_rate": 2.0126182965299684e-07, "loss": 89.5, "step": 319 }, { "epoch": 0.0030291269488172206, "grad_norm": 1493.832275390625, "learning_rate": 2.0189274447949526e-07, "loss": 181.3125, "step": 320 }, { "epoch": 0.0030385929705322745, "grad_norm": 2.9281601905822754, "learning_rate": 2.025236593059937e-07, "loss": 1.0322, "step": 321 }, { "epoch": 0.0030480589922473283, "grad_norm": 1227.860595703125, "learning_rate": 2.0315457413249208e-07, "loss": 165.625, "step": 322 }, { "epoch": 0.003057525013962382, "grad_norm": 1107.1368408203125, "learning_rate": 2.0378548895899052e-07, "loss": 83.3125, "step": 323 }, { "epoch": 0.0030669910356774356, "grad_norm": 3.111886978149414, "learning_rate": 2.0441640378548894e-07, "loss": 0.7703, "step": 324 }, { "epoch": 0.0030764570573924895, "grad_norm": 1125.5733642578125, "learning_rate": 2.0504731861198738e-07, "loss": 94.625, "step": 325 }, { "epoch": 0.0030859230791075434, "grad_norm": 1059.1480712890625, "learning_rate": 2.056782334384858e-07, "loss": 106.375, "step": 326 }, { "epoch": 0.0030953891008225973, "grad_norm": 898.36376953125, "learning_rate": 2.063091482649842e-07, "loss": 93.25, "step": 327 }, { "epoch": 0.003104855122537651, "grad_norm": 1085.2352294921875, "learning_rate": 2.0694006309148264e-07, "loss": 86.25, "step": 328 }, { "epoch": 0.003114321144252705, "grad_norm": 1363.849853515625, "learning_rate": 2.0757097791798106e-07, "loss": 141.375, "step": 329 }, { "epoch": 0.003123787165967759, "grad_norm": 1281.955078125, "learning_rate": 2.082018927444795e-07, "loss": 87.125, "step": 330 }, { "epoch": 0.0031332531876828123, "grad_norm": 3.2870306968688965, "learning_rate": 2.088328075709779e-07, "loss": 1.0161, "step": 331 }, { "epoch": 0.003142719209397866, "grad_norm": 1306.583984375, "learning_rate": 2.0946372239747635e-07, "loss": 195.875, "step": 332 }, { "epoch": 0.00315218523111292, "grad_norm": 1065.841552734375, "learning_rate": 2.1009463722397474e-07, "loss": 160.375, "step": 333 }, { "epoch": 0.003161651252827974, "grad_norm": 959.3275756835938, "learning_rate": 2.1072555205047318e-07, "loss": 95.125, "step": 334 }, { "epoch": 0.003171117274543028, "grad_norm": 1120.622802734375, "learning_rate": 2.113564668769716e-07, "loss": 106.1875, "step": 335 }, { "epoch": 0.0031805832962580817, "grad_norm": 1932.004150390625, "learning_rate": 2.1198738170347003e-07, "loss": 132.5, "step": 336 }, { "epoch": 0.0031900493179731356, "grad_norm": 1096.031494140625, "learning_rate": 2.1261829652996844e-07, "loss": 104.625, "step": 337 }, { "epoch": 0.003199515339688189, "grad_norm": 1013.9035034179688, "learning_rate": 2.1324921135646686e-07, "loss": 134.625, "step": 338 }, { "epoch": 0.003208981361403243, "grad_norm": 1022.3290405273438, "learning_rate": 2.138801261829653e-07, "loss": 178.5, "step": 339 }, { "epoch": 0.003218447383118297, "grad_norm": 1113.2945556640625, "learning_rate": 2.145110410094637e-07, "loss": 108.25, "step": 340 }, { "epoch": 0.0032279134048333507, "grad_norm": 959.1475219726562, "learning_rate": 2.1514195583596215e-07, "loss": 85.125, "step": 341 }, { "epoch": 0.0032373794265484045, "grad_norm": 1559.513916015625, "learning_rate": 2.1577287066246056e-07, "loss": 140.9375, "step": 342 }, { "epoch": 0.0032468454482634584, "grad_norm": 1166.2724609375, "learning_rate": 2.1640378548895898e-07, "loss": 108.25, "step": 343 }, { "epoch": 0.0032563114699785123, "grad_norm": 912.3612670898438, "learning_rate": 2.170347003154574e-07, "loss": 87.125, "step": 344 }, { "epoch": 0.003265777491693566, "grad_norm": 3.2482919692993164, "learning_rate": 2.1766561514195583e-07, "loss": 0.8613, "step": 345 }, { "epoch": 0.0032752435134086196, "grad_norm": 765.2449951171875, "learning_rate": 2.1829652996845424e-07, "loss": 87.1875, "step": 346 }, { "epoch": 0.0032847095351236735, "grad_norm": 1313.075439453125, "learning_rate": 2.1892744479495268e-07, "loss": 145.625, "step": 347 }, { "epoch": 0.0032941755568387274, "grad_norm": 1417.8218994140625, "learning_rate": 2.1955835962145107e-07, "loss": 181.3125, "step": 348 }, { "epoch": 0.0033036415785537812, "grad_norm": 1712.029541015625, "learning_rate": 2.201892744479495e-07, "loss": 197.5, "step": 349 }, { "epoch": 0.003313107600268835, "grad_norm": 1993.977783203125, "learning_rate": 2.2082018927444792e-07, "loss": 166.75, "step": 350 }, { "epoch": 0.003322573621983889, "grad_norm": 1887.4539794921875, "learning_rate": 2.2145110410094636e-07, "loss": 156.8125, "step": 351 }, { "epoch": 0.003332039643698943, "grad_norm": 988.2648315429688, "learning_rate": 2.220820189274448e-07, "loss": 145.625, "step": 352 }, { "epoch": 0.0033415056654139963, "grad_norm": 1204.1593017578125, "learning_rate": 2.2271293375394322e-07, "loss": 88.4062, "step": 353 }, { "epoch": 0.00335097168712905, "grad_norm": 1393.5394287109375, "learning_rate": 2.2334384858044163e-07, "loss": 126.5, "step": 354 }, { "epoch": 0.003360437708844104, "grad_norm": 1125.91259765625, "learning_rate": 2.2397476340694004e-07, "loss": 145.75, "step": 355 }, { "epoch": 0.003369903730559158, "grad_norm": 1464.4053955078125, "learning_rate": 2.2460567823343848e-07, "loss": 85.0625, "step": 356 }, { "epoch": 0.003379369752274212, "grad_norm": 1030.506103515625, "learning_rate": 2.252365930599369e-07, "loss": 95.4375, "step": 357 }, { "epoch": 0.0033888357739892657, "grad_norm": 1144.7415771484375, "learning_rate": 2.2586750788643534e-07, "loss": 100.9375, "step": 358 }, { "epoch": 0.0033983017957043195, "grad_norm": 961.9559326171875, "learning_rate": 2.2649842271293372e-07, "loss": 78.5625, "step": 359 }, { "epoch": 0.003407767817419373, "grad_norm": 1093.2755126953125, "learning_rate": 2.2712933753943216e-07, "loss": 143.625, "step": 360 }, { "epoch": 0.003417233839134427, "grad_norm": 1560.0389404296875, "learning_rate": 2.2776025236593058e-07, "loss": 172.375, "step": 361 }, { "epoch": 0.0034266998608494807, "grad_norm": 985.8718872070312, "learning_rate": 2.2839116719242902e-07, "loss": 71.4688, "step": 362 }, { "epoch": 0.0034361658825645346, "grad_norm": 2063.0419921875, "learning_rate": 2.2902208201892746e-07, "loss": 191.375, "step": 363 }, { "epoch": 0.0034456319042795885, "grad_norm": 1101.048583984375, "learning_rate": 2.2965299684542585e-07, "loss": 98.4375, "step": 364 }, { "epoch": 0.0034550979259946424, "grad_norm": 1168.1688232421875, "learning_rate": 2.3028391167192428e-07, "loss": 94.3438, "step": 365 }, { "epoch": 0.0034645639477096962, "grad_norm": 1210.393798828125, "learning_rate": 2.309148264984227e-07, "loss": 163.75, "step": 366 }, { "epoch": 0.0034740299694247497, "grad_norm": 1479.927734375, "learning_rate": 2.3154574132492114e-07, "loss": 187.9375, "step": 367 }, { "epoch": 0.0034834959911398036, "grad_norm": 1318.9559326171875, "learning_rate": 2.3217665615141955e-07, "loss": 157.3438, "step": 368 }, { "epoch": 0.0034929620128548574, "grad_norm": 1536.8331298828125, "learning_rate": 2.3280757097791797e-07, "loss": 113.3438, "step": 369 }, { "epoch": 0.0035024280345699113, "grad_norm": 1066.8603515625, "learning_rate": 2.3343848580441638e-07, "loss": 142.8438, "step": 370 }, { "epoch": 0.003511894056284965, "grad_norm": 752.5729370117188, "learning_rate": 2.3406940063091482e-07, "loss": 94.1875, "step": 371 }, { "epoch": 0.003521360078000019, "grad_norm": 1427.775634765625, "learning_rate": 2.3470031545741323e-07, "loss": 92.4375, "step": 372 }, { "epoch": 0.003530826099715073, "grad_norm": 896.3651123046875, "learning_rate": 2.3533123028391167e-07, "loss": 120.6875, "step": 373 }, { "epoch": 0.0035402921214301264, "grad_norm": 886.4716796875, "learning_rate": 2.3596214511041006e-07, "loss": 84.0625, "step": 374 }, { "epoch": 0.0035497581431451803, "grad_norm": 1201.2655029296875, "learning_rate": 2.365930599369085e-07, "loss": 169.0625, "step": 375 }, { "epoch": 0.003559224164860234, "grad_norm": 1064.5067138671875, "learning_rate": 2.3722397476340694e-07, "loss": 88.375, "step": 376 }, { "epoch": 0.003568690186575288, "grad_norm": 829.5634155273438, "learning_rate": 2.3785488958990535e-07, "loss": 133.875, "step": 377 }, { "epoch": 0.003578156208290342, "grad_norm": 1119.84765625, "learning_rate": 2.3848580441640377e-07, "loss": 65.9375, "step": 378 }, { "epoch": 0.0035876222300053958, "grad_norm": 1257.77099609375, "learning_rate": 2.391167192429022e-07, "loss": 73.375, "step": 379 }, { "epoch": 0.0035970882517204496, "grad_norm": 960.9338989257812, "learning_rate": 2.3974763406940064e-07, "loss": 77.0625, "step": 380 }, { "epoch": 0.003606554273435503, "grad_norm": 1820.6767578125, "learning_rate": 2.4037854889589903e-07, "loss": 173.375, "step": 381 }, { "epoch": 0.003616020295150557, "grad_norm": 1180.593017578125, "learning_rate": 2.4100946372239747e-07, "loss": 85.5938, "step": 382 }, { "epoch": 0.003625486316865611, "grad_norm": 784.6696166992188, "learning_rate": 2.4164037854889586e-07, "loss": 68.0312, "step": 383 }, { "epoch": 0.0036349523385806647, "grad_norm": 1201.55810546875, "learning_rate": 2.422712933753943e-07, "loss": 77.6562, "step": 384 }, { "epoch": 0.0036444183602957186, "grad_norm": 1909.580810546875, "learning_rate": 2.4290220820189274e-07, "loss": 142.375, "step": 385 }, { "epoch": 0.0036538843820107724, "grad_norm": 1049.998779296875, "learning_rate": 2.435331230283912e-07, "loss": 154.3125, "step": 386 }, { "epoch": 0.0036633504037258263, "grad_norm": 1079.589111328125, "learning_rate": 2.4416403785488957e-07, "loss": 87.0625, "step": 387 }, { "epoch": 0.0036728164254408798, "grad_norm": 1307.552490234375, "learning_rate": 2.44794952681388e-07, "loss": 150.0625, "step": 388 }, { "epoch": 0.0036822824471559336, "grad_norm": 41300.85546875, "learning_rate": 2.4542586750788645e-07, "loss": 349.0, "step": 389 }, { "epoch": 0.0036917484688709875, "grad_norm": 1458.7481689453125, "learning_rate": 2.4605678233438483e-07, "loss": 115.125, "step": 390 }, { "epoch": 0.0037012144905860414, "grad_norm": 1394.8209228515625, "learning_rate": 2.4668769716088327e-07, "loss": 121.0938, "step": 391 }, { "epoch": 0.0037106805123010953, "grad_norm": 3100.234619140625, "learning_rate": 2.473186119873817e-07, "loss": 96.8438, "step": 392 }, { "epoch": 0.003720146534016149, "grad_norm": 1114.331298828125, "learning_rate": 2.479495268138801e-07, "loss": 81.0625, "step": 393 }, { "epoch": 0.003729612555731203, "grad_norm": 1301.73486328125, "learning_rate": 2.4858044164037854e-07, "loss": 80.4062, "step": 394 }, { "epoch": 0.0037390785774462565, "grad_norm": 1028.352783203125, "learning_rate": 2.49211356466877e-07, "loss": 136.875, "step": 395 }, { "epoch": 0.0037485445991613103, "grad_norm": 1098.214111328125, "learning_rate": 2.4984227129337537e-07, "loss": 65.75, "step": 396 }, { "epoch": 0.003758010620876364, "grad_norm": 885.849853515625, "learning_rate": 2.504731861198738e-07, "loss": 76.0, "step": 397 }, { "epoch": 0.003767476642591418, "grad_norm": 1819.6568603515625, "learning_rate": 2.5110410094637225e-07, "loss": 79.625, "step": 398 }, { "epoch": 0.003776942664306472, "grad_norm": 1181.97705078125, "learning_rate": 2.5173501577287063e-07, "loss": 84.0625, "step": 399 }, { "epoch": 0.003786408686021526, "grad_norm": 1351.29443359375, "learning_rate": 2.5236593059936907e-07, "loss": 136.3438, "step": 400 }, { "epoch": 0.0037958747077365797, "grad_norm": 896.8280639648438, "learning_rate": 2.529968454258675e-07, "loss": 74.5625, "step": 401 }, { "epoch": 0.003805340729451633, "grad_norm": 904.6307373046875, "learning_rate": 2.5362776025236595e-07, "loss": 157.5625, "step": 402 }, { "epoch": 0.003814806751166687, "grad_norm": 887.6670532226562, "learning_rate": 2.5425867507886434e-07, "loss": 89.9062, "step": 403 }, { "epoch": 0.003824272772881741, "grad_norm": 1105.4937744140625, "learning_rate": 2.548895899053627e-07, "loss": 128.8125, "step": 404 }, { "epoch": 0.0038337387945967948, "grad_norm": 1030.996337890625, "learning_rate": 2.555205047318612e-07, "loss": 80.875, "step": 405 }, { "epoch": 0.0038432048163118487, "grad_norm": 1993.22705078125, "learning_rate": 2.561514195583596e-07, "loss": 154.125, "step": 406 }, { "epoch": 0.0038526708380269025, "grad_norm": 930.708984375, "learning_rate": 2.5678233438485805e-07, "loss": 99.1562, "step": 407 }, { "epoch": 0.0038621368597419564, "grad_norm": 1368.19482421875, "learning_rate": 2.5741324921135643e-07, "loss": 153.3125, "step": 408 }, { "epoch": 0.0038716028814570103, "grad_norm": 2.4488751888275146, "learning_rate": 2.5804416403785487e-07, "loss": 0.7373, "step": 409 }, { "epoch": 0.0038810689031720637, "grad_norm": 792.1890869140625, "learning_rate": 2.586750788643533e-07, "loss": 64.4688, "step": 410 }, { "epoch": 0.0038905349248871176, "grad_norm": 784.4141235351562, "learning_rate": 2.593059936908517e-07, "loss": 68.6875, "step": 411 }, { "epoch": 0.0039000009466021715, "grad_norm": 661.890869140625, "learning_rate": 2.599369085173502e-07, "loss": 68.0625, "step": 412 }, { "epoch": 0.003909466968317225, "grad_norm": 865.8956909179688, "learning_rate": 2.605678233438486e-07, "loss": 131.3125, "step": 413 }, { "epoch": 0.003918932990032279, "grad_norm": 2314.0205078125, "learning_rate": 2.6119873817034697e-07, "loss": 127.5312, "step": 414 }, { "epoch": 0.003928399011747333, "grad_norm": 920.534912109375, "learning_rate": 2.618296529968454e-07, "loss": 151.0625, "step": 415 }, { "epoch": 0.003937865033462387, "grad_norm": 1379.917724609375, "learning_rate": 2.6246056782334385e-07, "loss": 139.0938, "step": 416 }, { "epoch": 0.003947331055177441, "grad_norm": 2.8442399501800537, "learning_rate": 2.630914826498423e-07, "loss": 0.8975, "step": 417 }, { "epoch": 0.003956797076892495, "grad_norm": 695.2325439453125, "learning_rate": 2.6372239747634067e-07, "loss": 65.1562, "step": 418 }, { "epoch": 0.003966263098607549, "grad_norm": 967.9423828125, "learning_rate": 2.6435331230283906e-07, "loss": 65.375, "step": 419 }, { "epoch": 0.003975729120322602, "grad_norm": 859.5576782226562, "learning_rate": 2.6498422712933755e-07, "loss": 78.8125, "step": 420 }, { "epoch": 0.0039851951420376555, "grad_norm": 3745.72705078125, "learning_rate": 2.6561514195583594e-07, "loss": 137.5312, "step": 421 }, { "epoch": 0.003994661163752709, "grad_norm": 883.601318359375, "learning_rate": 2.662460567823344e-07, "loss": 63.5, "step": 422 }, { "epoch": 0.004004127185467763, "grad_norm": 844.1168823242188, "learning_rate": 2.6687697160883277e-07, "loss": 80.875, "step": 423 }, { "epoch": 0.004013593207182817, "grad_norm": 1464.6785888671875, "learning_rate": 2.675078864353312e-07, "loss": 164.1875, "step": 424 }, { "epoch": 0.004023059228897871, "grad_norm": 1181.98779296875, "learning_rate": 2.6813880126182965e-07, "loss": 121.0312, "step": 425 }, { "epoch": 0.004032525250612925, "grad_norm": 1021.4419555664062, "learning_rate": 2.6876971608832803e-07, "loss": 97.6875, "step": 426 }, { "epoch": 0.004041991272327979, "grad_norm": 1946.1812744140625, "learning_rate": 2.694006309148265e-07, "loss": 76.1562, "step": 427 }, { "epoch": 0.004051457294043033, "grad_norm": 3.32100510597229, "learning_rate": 2.700315457413249e-07, "loss": 1.0259, "step": 428 }, { "epoch": 0.0040609233157580865, "grad_norm": 1090.2227783203125, "learning_rate": 2.706624605678233e-07, "loss": 98.6875, "step": 429 }, { "epoch": 0.00407038933747314, "grad_norm": 989.4611206054688, "learning_rate": 2.7129337539432174e-07, "loss": 72.7188, "step": 430 }, { "epoch": 0.004079855359188194, "grad_norm": 812.07373046875, "learning_rate": 2.719242902208202e-07, "loss": 66.7812, "step": 431 }, { "epoch": 0.004089321380903248, "grad_norm": 767.497802734375, "learning_rate": 2.725552050473186e-07, "loss": 75.8125, "step": 432 }, { "epoch": 0.004098787402618302, "grad_norm": 824.1475219726562, "learning_rate": 2.73186119873817e-07, "loss": 66.7812, "step": 433 }, { "epoch": 0.004108253424333355, "grad_norm": 913.0955810546875, "learning_rate": 2.738170347003154e-07, "loss": 104.0625, "step": 434 }, { "epoch": 0.004117719446048409, "grad_norm": 891.0907592773438, "learning_rate": 2.744479495268139e-07, "loss": 80.5625, "step": 435 }, { "epoch": 0.004127185467763463, "grad_norm": 908.581298828125, "learning_rate": 2.7507886435331227e-07, "loss": 71.375, "step": 436 }, { "epoch": 0.004136651489478517, "grad_norm": 1147.619384765625, "learning_rate": 2.757097791798107e-07, "loss": 93.6875, "step": 437 }, { "epoch": 0.0041461175111935705, "grad_norm": 3.2529542446136475, "learning_rate": 2.7634069400630915e-07, "loss": 0.8408, "step": 438 }, { "epoch": 0.004155583532908624, "grad_norm": 695.8268432617188, "learning_rate": 2.769716088328076e-07, "loss": 61.1562, "step": 439 }, { "epoch": 0.004165049554623678, "grad_norm": 918.6434936523438, "learning_rate": 2.77602523659306e-07, "loss": 85.7188, "step": 440 }, { "epoch": 0.004174515576338732, "grad_norm": 969.8341674804688, "learning_rate": 2.7823343848580437e-07, "loss": 79.3125, "step": 441 }, { "epoch": 0.004183981598053786, "grad_norm": 951.085693359375, "learning_rate": 2.7886435331230286e-07, "loss": 71.7188, "step": 442 }, { "epoch": 0.00419344761976884, "grad_norm": 1156.0338134765625, "learning_rate": 2.7949526813880125e-07, "loss": 191.125, "step": 443 }, { "epoch": 0.004202913641483894, "grad_norm": 790.0103759765625, "learning_rate": 2.801261829652997e-07, "loss": 131.0625, "step": 444 }, { "epoch": 0.004212379663198948, "grad_norm": 931.4601440429688, "learning_rate": 2.8075709779179807e-07, "loss": 134.7812, "step": 445 }, { "epoch": 0.0042218456849140015, "grad_norm": 738.5828857421875, "learning_rate": 2.813880126182965e-07, "loss": 108.1562, "step": 446 }, { "epoch": 0.004231311706629055, "grad_norm": 783.3395385742188, "learning_rate": 2.8201892744479495e-07, "loss": 124.9375, "step": 447 }, { "epoch": 0.004240777728344109, "grad_norm": 784.3565063476562, "learning_rate": 2.8264984227129334e-07, "loss": 81.7812, "step": 448 }, { "epoch": 0.004250243750059162, "grad_norm": 3.651919364929199, "learning_rate": 2.8328075709779183e-07, "loss": 0.8901, "step": 449 }, { "epoch": 0.004259709771774216, "grad_norm": 2071.64306640625, "learning_rate": 2.839116719242902e-07, "loss": 204.0, "step": 450 }, { "epoch": 0.00426917579348927, "grad_norm": 1376.1370849609375, "learning_rate": 2.845425867507886e-07, "loss": 77.5938, "step": 451 }, { "epoch": 0.004278641815204324, "grad_norm": 872.4217529296875, "learning_rate": 2.8517350157728705e-07, "loss": 64.5312, "step": 452 }, { "epoch": 0.004288107836919378, "grad_norm": 928.6163330078125, "learning_rate": 2.858044164037855e-07, "loss": 77.7812, "step": 453 }, { "epoch": 0.004297573858634432, "grad_norm": 872.2057495117188, "learning_rate": 2.864353312302839e-07, "loss": 67.3125, "step": 454 }, { "epoch": 0.0043070398803494855, "grad_norm": 1324.0980224609375, "learning_rate": 2.870662460567823e-07, "loss": 70.0938, "step": 455 }, { "epoch": 0.004316505902064539, "grad_norm": 767.0242309570312, "learning_rate": 2.876971608832807e-07, "loss": 68.1875, "step": 456 }, { "epoch": 0.004325971923779593, "grad_norm": 1290.716552734375, "learning_rate": 2.883280757097792e-07, "loss": 91.3125, "step": 457 }, { "epoch": 0.004335437945494647, "grad_norm": 3.6982409954071045, "learning_rate": 2.889589905362776e-07, "loss": 0.8906, "step": 458 }, { "epoch": 0.004344903967209701, "grad_norm": 687.7546997070312, "learning_rate": 2.89589905362776e-07, "loss": 66.5312, "step": 459 }, { "epoch": 0.004354369988924755, "grad_norm": 726.9271240234375, "learning_rate": 2.9022082018927446e-07, "loss": 68.8438, "step": 460 }, { "epoch": 0.004363836010639809, "grad_norm": 901.81884765625, "learning_rate": 2.9085173501577285e-07, "loss": 82.4062, "step": 461 }, { "epoch": 0.004373302032354863, "grad_norm": 1623.676513671875, "learning_rate": 2.914826498422713e-07, "loss": 154.5625, "step": 462 }, { "epoch": 0.004382768054069916, "grad_norm": 803.6258544921875, "learning_rate": 2.9211356466876967e-07, "loss": 117.0625, "step": 463 }, { "epoch": 0.0043922340757849695, "grad_norm": 891.427734375, "learning_rate": 2.9274447949526817e-07, "loss": 65.1562, "step": 464 }, { "epoch": 0.004401700097500023, "grad_norm": 1312.71337890625, "learning_rate": 2.9337539432176655e-07, "loss": 153.2812, "step": 465 }, { "epoch": 0.004411166119215077, "grad_norm": 2.616243362426758, "learning_rate": 2.9400630914826494e-07, "loss": 0.8545, "step": 466 }, { "epoch": 0.004420632140930131, "grad_norm": 847.6250610351562, "learning_rate": 2.946372239747634e-07, "loss": 69.1875, "step": 467 }, { "epoch": 0.004430098162645185, "grad_norm": 725.62060546875, "learning_rate": 2.952681388012618e-07, "loss": 64.375, "step": 468 }, { "epoch": 0.004439564184360239, "grad_norm": 635.97900390625, "learning_rate": 2.9589905362776026e-07, "loss": 114.9375, "step": 469 }, { "epoch": 0.004449030206075293, "grad_norm": 948.571044921875, "learning_rate": 2.9652996845425865e-07, "loss": 122.625, "step": 470 }, { "epoch": 0.004458496227790347, "grad_norm": 738.7330322265625, "learning_rate": 2.9716088328075703e-07, "loss": 68.7188, "step": 471 }, { "epoch": 0.0044679622495054005, "grad_norm": 2.6619417667388916, "learning_rate": 2.977917981072555e-07, "loss": 0.8384, "step": 472 }, { "epoch": 0.004477428271220454, "grad_norm": 1052.533935546875, "learning_rate": 2.984227129337539e-07, "loss": 62.8438, "step": 473 }, { "epoch": 0.004486894292935508, "grad_norm": 809.7598876953125, "learning_rate": 2.9905362776025235e-07, "loss": 71.25, "step": 474 }, { "epoch": 0.004496360314650562, "grad_norm": 786.0537719726562, "learning_rate": 2.996845425867508e-07, "loss": 70.0, "step": 475 }, { "epoch": 0.004505826336365616, "grad_norm": 996.31201171875, "learning_rate": 3.0031545741324923e-07, "loss": 124.6562, "step": 476 }, { "epoch": 0.004515292358080669, "grad_norm": 888.217529296875, "learning_rate": 3.009463722397476e-07, "loss": 142.0625, "step": 477 }, { "epoch": 0.004524758379795723, "grad_norm": 3515.321533203125, "learning_rate": 3.01577287066246e-07, "loss": 64.1562, "step": 478 }, { "epoch": 0.004534224401510777, "grad_norm": 970.2354125976562, "learning_rate": 3.022082018927445e-07, "loss": 151.875, "step": 479 }, { "epoch": 0.004543690423225831, "grad_norm": 841.5230712890625, "learning_rate": 3.028391167192429e-07, "loss": 133.375, "step": 480 }, { "epoch": 0.0045531564449408845, "grad_norm": 822.075439453125, "learning_rate": 3.034700315457413e-07, "loss": 60.8125, "step": 481 }, { "epoch": 0.004562622466655938, "grad_norm": 861.5018920898438, "learning_rate": 3.041009463722397e-07, "loss": 71.9688, "step": 482 }, { "epoch": 0.004572088488370992, "grad_norm": 666.4907836914062, "learning_rate": 3.0473186119873815e-07, "loss": 66.9375, "step": 483 }, { "epoch": 0.004581554510086046, "grad_norm": 585.662353515625, "learning_rate": 3.053627760252366e-07, "loss": 53.9375, "step": 484 }, { "epoch": 0.0045910205318011, "grad_norm": 1327.086181640625, "learning_rate": 3.05993690851735e-07, "loss": 142.125, "step": 485 }, { "epoch": 0.004600486553516154, "grad_norm": 3.1511454582214355, "learning_rate": 3.0662460567823347e-07, "loss": 1.0029, "step": 486 }, { "epoch": 0.004609952575231208, "grad_norm": 553.4071044921875, "learning_rate": 3.0725552050473186e-07, "loss": 65.0938, "step": 487 }, { "epoch": 0.004619418596946262, "grad_norm": 767.5245971679688, "learning_rate": 3.0788643533123025e-07, "loss": 63.9062, "step": 488 }, { "epoch": 0.0046288846186613155, "grad_norm": 902.817626953125, "learning_rate": 3.085173501577287e-07, "loss": 67.1562, "step": 489 }, { "epoch": 0.004638350640376369, "grad_norm": 1416.4351806640625, "learning_rate": 3.0914826498422713e-07, "loss": 158.5625, "step": 490 }, { "epoch": 0.004647816662091422, "grad_norm": 780.04833984375, "learning_rate": 3.0977917981072557e-07, "loss": 62.0312, "step": 491 }, { "epoch": 0.004657282683806476, "grad_norm": 843.4635620117188, "learning_rate": 3.1041009463722395e-07, "loss": 71.8438, "step": 492 }, { "epoch": 0.00466674870552153, "grad_norm": 651.6881713867188, "learning_rate": 3.1104100946372234e-07, "loss": 60.7812, "step": 493 }, { "epoch": 0.004676214727236584, "grad_norm": 900.7103881835938, "learning_rate": 3.1167192429022083e-07, "loss": 91.2188, "step": 494 }, { "epoch": 0.004685680748951638, "grad_norm": 1069.5946044921875, "learning_rate": 3.123028391167192e-07, "loss": 100.5, "step": 495 }, { "epoch": 0.004695146770666692, "grad_norm": 536.8743286132812, "learning_rate": 3.1293375394321766e-07, "loss": 64.8438, "step": 496 }, { "epoch": 0.004704612792381746, "grad_norm": 697.8759765625, "learning_rate": 3.135646687697161e-07, "loss": 100.9375, "step": 497 }, { "epoch": 0.0047140788140967995, "grad_norm": 905.1168212890625, "learning_rate": 3.141955835962145e-07, "loss": 69.2188, "step": 498 }, { "epoch": 0.004723544835811853, "grad_norm": 1300.9388427734375, "learning_rate": 3.1482649842271293e-07, "loss": 114.0312, "step": 499 }, { "epoch": 0.004733010857526907, "grad_norm": 594.5653686523438, "learning_rate": 3.154574132492113e-07, "loss": 61.9375, "step": 500 }, { "epoch": 0.004742476879241961, "grad_norm": 855.7710571289062, "learning_rate": 3.160883280757098e-07, "loss": 109.5, "step": 501 }, { "epoch": 0.004751942900957015, "grad_norm": 551.5289916992188, "learning_rate": 3.167192429022082e-07, "loss": 66.875, "step": 502 }, { "epoch": 0.004761408922672069, "grad_norm": 780.4547729492188, "learning_rate": 3.173501577287066e-07, "loss": 63.5625, "step": 503 }, { "epoch": 0.004770874944387123, "grad_norm": 1032.2275390625, "learning_rate": 3.17981072555205e-07, "loss": 64.2188, "step": 504 }, { "epoch": 0.004780340966102177, "grad_norm": 3.240508556365967, "learning_rate": 3.1861198738170346e-07, "loss": 0.9658, "step": 505 }, { "epoch": 0.00478980698781723, "grad_norm": 768.9003295898438, "learning_rate": 3.192429022082019e-07, "loss": 161.75, "step": 506 }, { "epoch": 0.0047992730095322836, "grad_norm": 944.4700317382812, "learning_rate": 3.198738170347003e-07, "loss": 81.7812, "step": 507 }, { "epoch": 0.004808739031247337, "grad_norm": 2290.404052734375, "learning_rate": 3.205047318611987e-07, "loss": 141.0, "step": 508 }, { "epoch": 0.004818205052962391, "grad_norm": 1137.8609619140625, "learning_rate": 3.2113564668769717e-07, "loss": 74.375, "step": 509 }, { "epoch": 0.004827671074677445, "grad_norm": 887.4998168945312, "learning_rate": 3.2176656151419555e-07, "loss": 68.7812, "step": 510 }, { "epoch": 0.004837137096392499, "grad_norm": 815.4695434570312, "learning_rate": 3.22397476340694e-07, "loss": 63.1562, "step": 511 }, { "epoch": 0.004846603118107553, "grad_norm": 1618.4085693359375, "learning_rate": 3.2302839116719243e-07, "loss": 61.5312, "step": 512 }, { "epoch": 0.004856069139822607, "grad_norm": 1177.1611328125, "learning_rate": 3.236593059936908e-07, "loss": 90.5, "step": 513 }, { "epoch": 0.004865535161537661, "grad_norm": 922.9889526367188, "learning_rate": 3.2429022082018926e-07, "loss": 87.5, "step": 514 }, { "epoch": 0.0048750011832527146, "grad_norm": 891.3350219726562, "learning_rate": 3.2492113564668765e-07, "loss": 130.2188, "step": 515 }, { "epoch": 0.004884467204967768, "grad_norm": 1355.3280029296875, "learning_rate": 3.2555205047318614e-07, "loss": 154.4062, "step": 516 }, { "epoch": 0.004893933226682822, "grad_norm": 2.927227258682251, "learning_rate": 3.2618296529968453e-07, "loss": 0.9053, "step": 517 }, { "epoch": 0.004903399248397876, "grad_norm": 684.908935546875, "learning_rate": 3.268138801261829e-07, "loss": 61.7812, "step": 518 }, { "epoch": 0.00491286527011293, "grad_norm": 1358.9091796875, "learning_rate": 3.2744479495268135e-07, "loss": 65.8125, "step": 519 }, { "epoch": 0.004922331291827983, "grad_norm": 611.5530395507812, "learning_rate": 3.280757097791798e-07, "loss": 62.25, "step": 520 }, { "epoch": 0.004931797313543037, "grad_norm": 1594.5277099609375, "learning_rate": 3.2870662460567823e-07, "loss": 107.0625, "step": 521 }, { "epoch": 0.004941263335258091, "grad_norm": 751.2952880859375, "learning_rate": 3.293375394321766e-07, "loss": 71.4688, "step": 522 }, { "epoch": 0.004950729356973145, "grad_norm": 901.8346557617188, "learning_rate": 3.299684542586751e-07, "loss": 132.0, "step": 523 }, { "epoch": 0.0049601953786881986, "grad_norm": 977.2008666992188, "learning_rate": 3.305993690851735e-07, "loss": 115.625, "step": 524 }, { "epoch": 0.0049696614004032524, "grad_norm": 1012.7542724609375, "learning_rate": 3.312302839116719e-07, "loss": 77.25, "step": 525 }, { "epoch": 0.004979127422118306, "grad_norm": 996.6942138671875, "learning_rate": 3.3186119873817033e-07, "loss": 68.8438, "step": 526 }, { "epoch": 0.00498859344383336, "grad_norm": 821.8289184570312, "learning_rate": 3.3249211356466877e-07, "loss": 66.3438, "step": 527 }, { "epoch": 0.004998059465548414, "grad_norm": 787.1178588867188, "learning_rate": 3.331230283911672e-07, "loss": 68.4688, "step": 528 }, { "epoch": 0.005007525487263468, "grad_norm": 955.9060668945312, "learning_rate": 3.337539432176656e-07, "loss": 70.9375, "step": 529 }, { "epoch": 0.005016991508978522, "grad_norm": 2.673048257827759, "learning_rate": 3.34384858044164e-07, "loss": 0.8884, "step": 530 }, { "epoch": 0.005026457530693576, "grad_norm": 782.0868530273438, "learning_rate": 3.350157728706625e-07, "loss": 113.4062, "step": 531 }, { "epoch": 0.0050359235524086296, "grad_norm": 1127.822998046875, "learning_rate": 3.3564668769716086e-07, "loss": 108.3125, "step": 532 }, { "epoch": 0.0050453895741236834, "grad_norm": 752.520263671875, "learning_rate": 3.362776025236593e-07, "loss": 60.4062, "step": 533 }, { "epoch": 0.0050548555958387364, "grad_norm": 1536.3759765625, "learning_rate": 3.3690851735015774e-07, "loss": 155.8125, "step": 534 }, { "epoch": 0.00506432161755379, "grad_norm": 1995.37548828125, "learning_rate": 3.3753943217665613e-07, "loss": 65.0938, "step": 535 }, { "epoch": 0.005073787639268844, "grad_norm": 693.6814575195312, "learning_rate": 3.3817034700315457e-07, "loss": 60.9688, "step": 536 }, { "epoch": 0.005083253660983898, "grad_norm": 816.6685791015625, "learning_rate": 3.3880126182965295e-07, "loss": 59.3438, "step": 537 }, { "epoch": 0.005092719682698952, "grad_norm": 4.170598030090332, "learning_rate": 3.3943217665615145e-07, "loss": 0.874, "step": 538 }, { "epoch": 0.005102185704414006, "grad_norm": 1411.790771484375, "learning_rate": 3.4006309148264983e-07, "loss": 125.5312, "step": 539 }, { "epoch": 0.00511165172612906, "grad_norm": 6981.935546875, "learning_rate": 3.406940063091482e-07, "loss": 148.8125, "step": 540 }, { "epoch": 0.005121117747844114, "grad_norm": 809.6035766601562, "learning_rate": 3.4132492113564666e-07, "loss": 65.4688, "step": 541 }, { "epoch": 0.0051305837695591675, "grad_norm": 810.4950561523438, "learning_rate": 3.419558359621451e-07, "loss": 75.5938, "step": 542 }, { "epoch": 0.005140049791274221, "grad_norm": 589.7190551757812, "learning_rate": 3.4258675078864354e-07, "loss": 61.5625, "step": 543 }, { "epoch": 0.005149515812989275, "grad_norm": 3448.148681640625, "learning_rate": 3.4321766561514193e-07, "loss": 242.9375, "step": 544 }, { "epoch": 0.005158981834704329, "grad_norm": 803.6175537109375, "learning_rate": 3.4384858044164037e-07, "loss": 89.0312, "step": 545 }, { "epoch": 0.005168447856419383, "grad_norm": 658.2818603515625, "learning_rate": 3.444794952681388e-07, "loss": 55.5625, "step": 546 }, { "epoch": 0.005177913878134437, "grad_norm": 1006.4055786132812, "learning_rate": 3.451104100946372e-07, "loss": 60.8438, "step": 547 }, { "epoch": 0.00518737989984949, "grad_norm": 1055.0003662109375, "learning_rate": 3.4574132492113563e-07, "loss": 131.125, "step": 548 }, { "epoch": 0.005196845921564544, "grad_norm": 904.6844482421875, "learning_rate": 3.463722397476341e-07, "loss": 142.3125, "step": 549 }, { "epoch": 0.005206311943279598, "grad_norm": 619.63623046875, "learning_rate": 3.4700315457413246e-07, "loss": 62.4688, "step": 550 }, { "epoch": 0.0052157779649946515, "grad_norm": 709.7120361328125, "learning_rate": 3.476340694006309e-07, "loss": 82.3438, "step": 551 }, { "epoch": 0.005225243986709705, "grad_norm": 1482.140869140625, "learning_rate": 3.482649842271293e-07, "loss": 128.625, "step": 552 }, { "epoch": 0.005234710008424759, "grad_norm": 578.4993286132812, "learning_rate": 3.488958990536278e-07, "loss": 59.5625, "step": 553 }, { "epoch": 0.005244176030139813, "grad_norm": 1448.31640625, "learning_rate": 3.4952681388012617e-07, "loss": 61.2812, "step": 554 }, { "epoch": 0.005253642051854867, "grad_norm": 1396.4014892578125, "learning_rate": 3.5015772870662455e-07, "loss": 94.8438, "step": 555 }, { "epoch": 0.005263108073569921, "grad_norm": 1177.962158203125, "learning_rate": 3.50788643533123e-07, "loss": 126.0625, "step": 556 }, { "epoch": 0.005272574095284975, "grad_norm": 975.5050048828125, "learning_rate": 3.5141955835962143e-07, "loss": 74.2188, "step": 557 }, { "epoch": 0.005282040117000029, "grad_norm": 782.9436645507812, "learning_rate": 3.520504731861199e-07, "loss": 67.0625, "step": 558 }, { "epoch": 0.0052915061387150825, "grad_norm": 908.966796875, "learning_rate": 3.5268138801261826e-07, "loss": 56.5625, "step": 559 }, { "epoch": 0.005300972160430136, "grad_norm": 2119.72705078125, "learning_rate": 3.533123028391167e-07, "loss": 84.8438, "step": 560 }, { "epoch": 0.00531043818214519, "grad_norm": 659.0459594726562, "learning_rate": 3.5394321766561514e-07, "loss": 57.6562, "step": 561 }, { "epoch": 0.005319904203860243, "grad_norm": 3.452929973602295, "learning_rate": 3.5457413249211353e-07, "loss": 0.8809, "step": 562 }, { "epoch": 0.005329370225575297, "grad_norm": 593.4617919921875, "learning_rate": 3.5520504731861197e-07, "loss": 57.6875, "step": 563 }, { "epoch": 0.005338836247290351, "grad_norm": 695.5679931640625, "learning_rate": 3.558359621451104e-07, "loss": 71.6562, "step": 564 }, { "epoch": 0.005348302269005405, "grad_norm": 1503.13818359375, "learning_rate": 3.564668769716088e-07, "loss": 127.125, "step": 565 }, { "epoch": 0.005357768290720459, "grad_norm": 698.4495239257812, "learning_rate": 3.5709779179810723e-07, "loss": 81.9062, "step": 566 }, { "epoch": 0.005367234312435513, "grad_norm": 2.516832113265991, "learning_rate": 3.577287066246056e-07, "loss": 0.9082, "step": 567 }, { "epoch": 0.0053767003341505665, "grad_norm": 1684.319091796875, "learning_rate": 3.583596214511041e-07, "loss": 98.375, "step": 568 }, { "epoch": 0.00538616635586562, "grad_norm": 603.4769287109375, "learning_rate": 3.589905362776025e-07, "loss": 57.375, "step": 569 }, { "epoch": 0.005395632377580674, "grad_norm": 766.1370239257812, "learning_rate": 3.5962145110410094e-07, "loss": 55.6562, "step": 570 }, { "epoch": 0.005405098399295728, "grad_norm": 2.693240165710449, "learning_rate": 3.602523659305994e-07, "loss": 0.908, "step": 571 }, { "epoch": 0.005414564421010782, "grad_norm": 1667.3143310546875, "learning_rate": 3.6088328075709777e-07, "loss": 145.375, "step": 572 }, { "epoch": 0.005424030442725836, "grad_norm": 1260.6129150390625, "learning_rate": 3.615141955835962e-07, "loss": 99.5, "step": 573 }, { "epoch": 0.00543349646444089, "grad_norm": 900.5045166015625, "learning_rate": 3.621451104100946e-07, "loss": 86.7188, "step": 574 }, { "epoch": 0.005442962486155944, "grad_norm": 1474.1209716796875, "learning_rate": 3.627760252365931e-07, "loss": 93.8438, "step": 575 }, { "epoch": 0.0054524285078709975, "grad_norm": 610.5883178710938, "learning_rate": 3.634069400630915e-07, "loss": 62.4688, "step": 576 }, { "epoch": 0.0054618945295860505, "grad_norm": 3860.3916015625, "learning_rate": 3.6403785488958986e-07, "loss": 125.5938, "step": 577 }, { "epoch": 0.005471360551301104, "grad_norm": 1044.5986328125, "learning_rate": 3.646687697160883e-07, "loss": 135.9375, "step": 578 }, { "epoch": 0.005480826573016158, "grad_norm": 752.4140014648438, "learning_rate": 3.6529968454258674e-07, "loss": 57.4062, "step": 579 }, { "epoch": 0.005490292594731212, "grad_norm": 624.4622802734375, "learning_rate": 3.659305993690852e-07, "loss": 58.8125, "step": 580 }, { "epoch": 0.005499758616446266, "grad_norm": 882.2796630859375, "learning_rate": 3.6656151419558357e-07, "loss": 64.8438, "step": 581 }, { "epoch": 0.00550922463816132, "grad_norm": 1344.762451171875, "learning_rate": 3.67192429022082e-07, "loss": 101.9375, "step": 582 }, { "epoch": 0.005518690659876374, "grad_norm": 878.3090209960938, "learning_rate": 3.6782334384858045e-07, "loss": 61.6094, "step": 583 }, { "epoch": 0.005528156681591428, "grad_norm": 731.1142578125, "learning_rate": 3.6845425867507883e-07, "loss": 73.5312, "step": 584 }, { "epoch": 0.0055376227033064815, "grad_norm": 1131.4705810546875, "learning_rate": 3.690851735015773e-07, "loss": 130.625, "step": 585 }, { "epoch": 0.005547088725021535, "grad_norm": 1000.5369262695312, "learning_rate": 3.697160883280757e-07, "loss": 106.0625, "step": 586 }, { "epoch": 0.005556554746736589, "grad_norm": 3.076819658279419, "learning_rate": 3.703470031545741e-07, "loss": 0.8169, "step": 587 }, { "epoch": 0.005566020768451643, "grad_norm": 2383.037109375, "learning_rate": 3.7097791798107254e-07, "loss": 69.6562, "step": 588 }, { "epoch": 0.005575486790166697, "grad_norm": 792.3946533203125, "learning_rate": 3.7160883280757093e-07, "loss": 66.8438, "step": 589 }, { "epoch": 0.005584952811881751, "grad_norm": 2.9350340366363525, "learning_rate": 3.722397476340694e-07, "loss": 0.8853, "step": 590 }, { "epoch": 0.005594418833596804, "grad_norm": 1533.6539306640625, "learning_rate": 3.728706624605678e-07, "loss": 58.875, "step": 591 }, { "epoch": 0.005603884855311858, "grad_norm": 1303.335205078125, "learning_rate": 3.735015772870662e-07, "loss": 113.625, "step": 592 }, { "epoch": 0.005613350877026912, "grad_norm": 602.9747924804688, "learning_rate": 3.741324921135647e-07, "loss": 58.375, "step": 593 }, { "epoch": 0.0056228168987419655, "grad_norm": 604.57568359375, "learning_rate": 3.747634069400631e-07, "loss": 57.625, "step": 594 }, { "epoch": 0.005632282920457019, "grad_norm": 865.6827392578125, "learning_rate": 3.753943217665615e-07, "loss": 145.875, "step": 595 }, { "epoch": 0.005641748942172073, "grad_norm": 3.388984203338623, "learning_rate": 3.760252365930599e-07, "loss": 0.8203, "step": 596 }, { "epoch": 0.005651214963887127, "grad_norm": 891.7890014648438, "learning_rate": 3.7665615141955834e-07, "loss": 95.8125, "step": 597 }, { "epoch": 0.005660680985602181, "grad_norm": 2357.702392578125, "learning_rate": 3.772870662460568e-07, "loss": 88.0469, "step": 598 }, { "epoch": 0.005670147007317235, "grad_norm": 1378.74658203125, "learning_rate": 3.7791798107255517e-07, "loss": 138.6875, "step": 599 }, { "epoch": 0.005679613029032289, "grad_norm": 866.7297973632812, "learning_rate": 3.785488958990536e-07, "loss": 115.9375, "step": 600 }, { "epoch": 0.005689079050747343, "grad_norm": 1085.9368896484375, "learning_rate": 3.7917981072555205e-07, "loss": 145.8125, "step": 601 }, { "epoch": 0.0056985450724623965, "grad_norm": 705.8587646484375, "learning_rate": 3.7981072555205043e-07, "loss": 124.5625, "step": 602 }, { "epoch": 0.00570801109417745, "grad_norm": 866.24169921875, "learning_rate": 3.804416403785489e-07, "loss": 63.75, "step": 603 }, { "epoch": 0.005717477115892504, "grad_norm": 682.0865478515625, "learning_rate": 3.8107255520504726e-07, "loss": 57.9688, "step": 604 }, { "epoch": 0.005726943137607557, "grad_norm": 652.6722412109375, "learning_rate": 3.8170347003154575e-07, "loss": 57.0938, "step": 605 }, { "epoch": 0.005736409159322611, "grad_norm": 648.8684692382812, "learning_rate": 3.8233438485804414e-07, "loss": 64.4062, "step": 606 }, { "epoch": 0.005745875181037665, "grad_norm": 3.5317459106445312, "learning_rate": 3.8296529968454253e-07, "loss": 0.9404, "step": 607 }, { "epoch": 0.005755341202752719, "grad_norm": 903.4336547851562, "learning_rate": 3.83596214511041e-07, "loss": 64.8438, "step": 608 }, { "epoch": 0.005764807224467773, "grad_norm": 675.7362670898438, "learning_rate": 3.842271293375394e-07, "loss": 57.7812, "step": 609 }, { "epoch": 0.005774273246182827, "grad_norm": 754.898193359375, "learning_rate": 3.8485804416403785e-07, "loss": 80.5625, "step": 610 }, { "epoch": 0.0057837392678978805, "grad_norm": 1030.74755859375, "learning_rate": 3.8548895899053624e-07, "loss": 113.25, "step": 611 }, { "epoch": 0.005793205289612934, "grad_norm": 2.676038980484009, "learning_rate": 3.8611987381703473e-07, "loss": 0.7505, "step": 612 }, { "epoch": 0.005802671311327988, "grad_norm": 672.6953125, "learning_rate": 3.867507886435331e-07, "loss": 56.8125, "step": 613 }, { "epoch": 0.005812137333043042, "grad_norm": 1044.2171630859375, "learning_rate": 3.873817034700315e-07, "loss": 120.6875, "step": 614 }, { "epoch": 0.005821603354758096, "grad_norm": 2.68900465965271, "learning_rate": 3.8801261829652994e-07, "loss": 0.8315, "step": 615 }, { "epoch": 0.00583106937647315, "grad_norm": 890.1533813476562, "learning_rate": 3.886435331230284e-07, "loss": 141.6875, "step": 616 }, { "epoch": 0.005840535398188204, "grad_norm": 831.1906127929688, "learning_rate": 3.892744479495268e-07, "loss": 61.0625, "step": 617 }, { "epoch": 0.005850001419903258, "grad_norm": 766.3878784179688, "learning_rate": 3.899053627760252e-07, "loss": 64.125, "step": 618 }, { "epoch": 0.005859467441618311, "grad_norm": 928.918212890625, "learning_rate": 3.9053627760252365e-07, "loss": 74.7344, "step": 619 }, { "epoch": 0.0058689334633333645, "grad_norm": 1462.5673828125, "learning_rate": 3.911671924290221e-07, "loss": 113.4375, "step": 620 }, { "epoch": 0.005878399485048418, "grad_norm": 654.6067504882812, "learning_rate": 3.917981072555205e-07, "loss": 66.0938, "step": 621 }, { "epoch": 0.005887865506763472, "grad_norm": 2349.647216796875, "learning_rate": 3.924290220820189e-07, "loss": 127.4375, "step": 622 }, { "epoch": 0.005897331528478526, "grad_norm": 911.2752075195312, "learning_rate": 3.9305993690851735e-07, "loss": 63.125, "step": 623 }, { "epoch": 0.00590679755019358, "grad_norm": 1020.8985595703125, "learning_rate": 3.9369085173501574e-07, "loss": 147.625, "step": 624 }, { "epoch": 0.005916263571908634, "grad_norm": 742.1503295898438, "learning_rate": 3.943217665615142e-07, "loss": 54.0312, "step": 625 }, { "epoch": 0.005925729593623688, "grad_norm": 527.100341796875, "learning_rate": 3.9495268138801257e-07, "loss": 47.9062, "step": 626 }, { "epoch": 0.005935195615338742, "grad_norm": 1978.780029296875, "learning_rate": 3.9558359621451106e-07, "loss": 90.5312, "step": 627 }, { "epoch": 0.0059446616370537955, "grad_norm": 712.3825073242188, "learning_rate": 3.9621451104100945e-07, "loss": 104.75, "step": 628 }, { "epoch": 0.005954127658768849, "grad_norm": 734.8538208007812, "learning_rate": 3.9684542586750784e-07, "loss": 110.125, "step": 629 }, { "epoch": 0.005963593680483903, "grad_norm": 665.1245727539062, "learning_rate": 3.9747634069400633e-07, "loss": 55.6562, "step": 630 }, { "epoch": 0.005973059702198957, "grad_norm": 648.5557861328125, "learning_rate": 3.981072555205047e-07, "loss": 89.9062, "step": 631 }, { "epoch": 0.005982525723914011, "grad_norm": 992.6857299804688, "learning_rate": 3.9873817034700316e-07, "loss": 95.5938, "step": 632 }, { "epoch": 0.005991991745629065, "grad_norm": 1588.2218017578125, "learning_rate": 3.9936908517350154e-07, "loss": 145.125, "step": 633 }, { "epoch": 0.006001457767344118, "grad_norm": 1168.3626708984375, "learning_rate": 4e-07, "loss": 137.8125, "step": 634 }, { "epoch": 0.006010923789059172, "grad_norm": 714.9613037109375, "learning_rate": 4.006309148264984e-07, "loss": 95.7188, "step": 635 }, { "epoch": 0.006020389810774226, "grad_norm": 746.7614135742188, "learning_rate": 4.012618296529968e-07, "loss": 88.1562, "step": 636 }, { "epoch": 0.0060298558324892795, "grad_norm": 706.3826904296875, "learning_rate": 4.0189274447949525e-07, "loss": 99.25, "step": 637 }, { "epoch": 0.006039321854204333, "grad_norm": 1050.4666748046875, "learning_rate": 4.025236593059937e-07, "loss": 64.2812, "step": 638 }, { "epoch": 0.006048787875919387, "grad_norm": 672.5553588867188, "learning_rate": 4.031545741324921e-07, "loss": 66.4688, "step": 639 }, { "epoch": 0.006058253897634441, "grad_norm": 3.886991024017334, "learning_rate": 4.037854889589905e-07, "loss": 0.8491, "step": 640 }, { "epoch": 0.006067719919349495, "grad_norm": 569.8007202148438, "learning_rate": 4.044164037854889e-07, "loss": 50.5625, "step": 641 }, { "epoch": 0.006077185941064549, "grad_norm": 1638.690185546875, "learning_rate": 4.050473186119874e-07, "loss": 121.1875, "step": 642 }, { "epoch": 0.006086651962779603, "grad_norm": 636.4658203125, "learning_rate": 4.056782334384858e-07, "loss": 69.4062, "step": 643 }, { "epoch": 0.006096117984494657, "grad_norm": 1232.5380859375, "learning_rate": 4.0630914826498417e-07, "loss": 127.625, "step": 644 }, { "epoch": 0.0061055840062097105, "grad_norm": 1831.5367431640625, "learning_rate": 4.0694006309148266e-07, "loss": 123.5938, "step": 645 }, { "epoch": 0.006115050027924764, "grad_norm": 706.0447998046875, "learning_rate": 4.0757097791798105e-07, "loss": 74.9375, "step": 646 }, { "epoch": 0.006124516049639818, "grad_norm": 747.140869140625, "learning_rate": 4.082018927444795e-07, "loss": 53.0938, "step": 647 }, { "epoch": 0.006133982071354871, "grad_norm": 1252.6072998046875, "learning_rate": 4.088328075709779e-07, "loss": 82.4375, "step": 648 }, { "epoch": 0.006143448093069925, "grad_norm": 673.9247436523438, "learning_rate": 4.094637223974763e-07, "loss": 53.5938, "step": 649 }, { "epoch": 0.006152914114784979, "grad_norm": 1363.1844482421875, "learning_rate": 4.1009463722397476e-07, "loss": 69.9844, "step": 650 }, { "epoch": 0.006162380136500033, "grad_norm": 632.7025756835938, "learning_rate": 4.1072555205047314e-07, "loss": 66.375, "step": 651 }, { "epoch": 0.006171846158215087, "grad_norm": 697.6436767578125, "learning_rate": 4.113564668769716e-07, "loss": 56.9062, "step": 652 }, { "epoch": 0.006181312179930141, "grad_norm": 1117.9647216796875, "learning_rate": 4.1198738170347e-07, "loss": 115.9844, "step": 653 }, { "epoch": 0.0061907782016451945, "grad_norm": 791.1919555664062, "learning_rate": 4.126182965299684e-07, "loss": 52.2812, "step": 654 }, { "epoch": 0.006200244223360248, "grad_norm": 920.097900390625, "learning_rate": 4.1324921135646685e-07, "loss": 88.8125, "step": 655 }, { "epoch": 0.006209710245075302, "grad_norm": 859.8260498046875, "learning_rate": 4.138801261829653e-07, "loss": 82.9062, "step": 656 }, { "epoch": 0.006219176266790356, "grad_norm": 767.6900634765625, "learning_rate": 4.1451104100946373e-07, "loss": 103.9375, "step": 657 }, { "epoch": 0.00622864228850541, "grad_norm": 1028.5289306640625, "learning_rate": 4.151419558359621e-07, "loss": 142.5625, "step": 658 }, { "epoch": 0.006238108310220464, "grad_norm": 628.8794555664062, "learning_rate": 4.157728706624605e-07, "loss": 52.3125, "step": 659 }, { "epoch": 0.006247574331935518, "grad_norm": 703.647705078125, "learning_rate": 4.16403785488959e-07, "loss": 53.3438, "step": 660 }, { "epoch": 0.006257040353650572, "grad_norm": 852.6135864257812, "learning_rate": 4.170347003154574e-07, "loss": 51.7188, "step": 661 }, { "epoch": 0.006266506375365625, "grad_norm": 621.4190063476562, "learning_rate": 4.176656151419558e-07, "loss": 51.5625, "step": 662 }, { "epoch": 0.0062759723970806786, "grad_norm": 682.772705078125, "learning_rate": 4.182965299684542e-07, "loss": 55.75, "step": 663 }, { "epoch": 0.006285438418795732, "grad_norm": 804.914306640625, "learning_rate": 4.189274447949527e-07, "loss": 75.9062, "step": 664 }, { "epoch": 0.006294904440510786, "grad_norm": 831.501953125, "learning_rate": 4.195583596214511e-07, "loss": 154.25, "step": 665 }, { "epoch": 0.00630437046222584, "grad_norm": 1172.2889404296875, "learning_rate": 4.201892744479495e-07, "loss": 109.9375, "step": 666 }, { "epoch": 0.006313836483940894, "grad_norm": 686.0167236328125, "learning_rate": 4.2082018927444797e-07, "loss": 104.0, "step": 667 }, { "epoch": 0.006323302505655948, "grad_norm": 733.5798950195312, "learning_rate": 4.2145110410094636e-07, "loss": 59.25, "step": 668 }, { "epoch": 0.006332768527371002, "grad_norm": 657.3442993164062, "learning_rate": 4.220820189274448e-07, "loss": 52.4062, "step": 669 }, { "epoch": 0.006342234549086056, "grad_norm": 675.2840576171875, "learning_rate": 4.227129337539432e-07, "loss": 86.7812, "step": 670 }, { "epoch": 0.0063517005708011096, "grad_norm": 608.5492553710938, "learning_rate": 4.233438485804416e-07, "loss": 46.9531, "step": 671 }, { "epoch": 0.006361166592516163, "grad_norm": 1130.2987060546875, "learning_rate": 4.2397476340694006e-07, "loss": 84.125, "step": 672 }, { "epoch": 0.006370632614231217, "grad_norm": 1026.4122314453125, "learning_rate": 4.2460567823343845e-07, "loss": 79.125, "step": 673 }, { "epoch": 0.006380098635946271, "grad_norm": 1241.5045166015625, "learning_rate": 4.252365930599369e-07, "loss": 114.125, "step": 674 }, { "epoch": 0.006389564657661325, "grad_norm": 713.6329345703125, "learning_rate": 4.2586750788643533e-07, "loss": 57.9062, "step": 675 }, { "epoch": 0.006399030679376378, "grad_norm": 712.4388427734375, "learning_rate": 4.264984227129337e-07, "loss": 63.3438, "step": 676 }, { "epoch": 0.006408496701091432, "grad_norm": 656.9547729492188, "learning_rate": 4.2712933753943216e-07, "loss": 60.0625, "step": 677 }, { "epoch": 0.006417962722806486, "grad_norm": 525.9691772460938, "learning_rate": 4.277602523659306e-07, "loss": 57.25, "step": 678 }, { "epoch": 0.00642742874452154, "grad_norm": 914.7711181640625, "learning_rate": 4.2839116719242904e-07, "loss": 56.625, "step": 679 }, { "epoch": 0.006436894766236594, "grad_norm": 703.0784912109375, "learning_rate": 4.290220820189274e-07, "loss": 59.0625, "step": 680 }, { "epoch": 0.0064463607879516474, "grad_norm": 620.483154296875, "learning_rate": 4.296529968454258e-07, "loss": 60.75, "step": 681 }, { "epoch": 0.006455826809666701, "grad_norm": 2.6872291564941406, "learning_rate": 4.302839116719243e-07, "loss": 0.9487, "step": 682 }, { "epoch": 0.006465292831381755, "grad_norm": 560.2081298828125, "learning_rate": 4.309148264984227e-07, "loss": 54.1875, "step": 683 }, { "epoch": 0.006474758853096809, "grad_norm": 474.3762512207031, "learning_rate": 4.3154574132492113e-07, "loss": 49.5938, "step": 684 }, { "epoch": 0.006484224874811863, "grad_norm": 868.765380859375, "learning_rate": 4.321766561514195e-07, "loss": 119.0312, "step": 685 }, { "epoch": 0.006493690896526917, "grad_norm": 905.2570190429688, "learning_rate": 4.3280757097791796e-07, "loss": 58.125, "step": 686 }, { "epoch": 0.006503156918241971, "grad_norm": 740.9848022460938, "learning_rate": 4.334384858044164e-07, "loss": 108.1875, "step": 687 }, { "epoch": 0.006512622939957025, "grad_norm": 932.3906860351562, "learning_rate": 4.340694006309148e-07, "loss": 106.2812, "step": 688 }, { "epoch": 0.0065220889616720784, "grad_norm": 2.7568440437316895, "learning_rate": 4.347003154574132e-07, "loss": 0.7859, "step": 689 }, { "epoch": 0.006531554983387132, "grad_norm": 1660.8675537109375, "learning_rate": 4.3533123028391166e-07, "loss": 111.875, "step": 690 }, { "epoch": 0.006541021005102185, "grad_norm": 1078.038818359375, "learning_rate": 4.3596214511041005e-07, "loss": 115.5625, "step": 691 }, { "epoch": 0.006550487026817239, "grad_norm": 759.6041259765625, "learning_rate": 4.365930599369085e-07, "loss": 86.3125, "step": 692 }, { "epoch": 0.006559953048532293, "grad_norm": 667.2096557617188, "learning_rate": 4.3722397476340693e-07, "loss": 53.25, "step": 693 }, { "epoch": 0.006569419070247347, "grad_norm": 552.6778564453125, "learning_rate": 4.3785488958990537e-07, "loss": 51.6875, "step": 694 }, { "epoch": 0.006578885091962401, "grad_norm": 650.8267822265625, "learning_rate": 4.3848580441640376e-07, "loss": 50.0625, "step": 695 }, { "epoch": 0.006588351113677455, "grad_norm": 948.421875, "learning_rate": 4.3911671924290214e-07, "loss": 104.7188, "step": 696 }, { "epoch": 0.006597817135392509, "grad_norm": 929.8365478515625, "learning_rate": 4.3974763406940064e-07, "loss": 89.3438, "step": 697 }, { "epoch": 0.0066072831571075625, "grad_norm": 993.6218872070312, "learning_rate": 4.40378548895899e-07, "loss": 56.1562, "step": 698 }, { "epoch": 0.006616749178822616, "grad_norm": 872.5192260742188, "learning_rate": 4.4100946372239746e-07, "loss": 114.125, "step": 699 }, { "epoch": 0.00662621520053767, "grad_norm": 711.3560180664062, "learning_rate": 4.4164037854889585e-07, "loss": 60.4062, "step": 700 }, { "epoch": 0.006635681222252724, "grad_norm": 3.1337811946868896, "learning_rate": 4.4227129337539434e-07, "loss": 0.9575, "step": 701 }, { "epoch": 0.006645147243967778, "grad_norm": 1135.262939453125, "learning_rate": 4.4290220820189273e-07, "loss": 72.25, "step": 702 }, { "epoch": 0.006654613265682832, "grad_norm": 1047.9757080078125, "learning_rate": 4.435331230283911e-07, "loss": 98.8438, "step": 703 }, { "epoch": 0.006664079287397886, "grad_norm": 704.785400390625, "learning_rate": 4.441640378548896e-07, "loss": 57.0, "step": 704 }, { "epoch": 0.006673545309112939, "grad_norm": 802.1261596679688, "learning_rate": 4.44794952681388e-07, "loss": 60.4688, "step": 705 }, { "epoch": 0.006683011330827993, "grad_norm": 742.6767578125, "learning_rate": 4.4542586750788644e-07, "loss": 59.1875, "step": 706 }, { "epoch": 0.0066924773525430465, "grad_norm": 3554.342041015625, "learning_rate": 4.460567823343848e-07, "loss": 201.4375, "step": 707 }, { "epoch": 0.0067019433742581, "grad_norm": 1034.7484130859375, "learning_rate": 4.4668769716088326e-07, "loss": 65.9375, "step": 708 }, { "epoch": 0.006711409395973154, "grad_norm": 622.508056640625, "learning_rate": 4.473186119873817e-07, "loss": 132.875, "step": 709 }, { "epoch": 0.006720875417688208, "grad_norm": 897.9864501953125, "learning_rate": 4.479495268138801e-07, "loss": 121.0, "step": 710 }, { "epoch": 0.006730341439403262, "grad_norm": 633.8663940429688, "learning_rate": 4.4858044164037853e-07, "loss": 56.875, "step": 711 }, { "epoch": 0.006739807461118316, "grad_norm": 2.3826282024383545, "learning_rate": 4.4921135646687697e-07, "loss": 0.8003, "step": 712 }, { "epoch": 0.00674927348283337, "grad_norm": 755.7293701171875, "learning_rate": 4.4984227129337536e-07, "loss": 100.2656, "step": 713 }, { "epoch": 0.006758739504548424, "grad_norm": 874.4550170898438, "learning_rate": 4.504731861198738e-07, "loss": 96.2812, "step": 714 }, { "epoch": 0.0067682055262634775, "grad_norm": 857.3119506835938, "learning_rate": 4.5110410094637224e-07, "loss": 128.625, "step": 715 }, { "epoch": 0.006777671547978531, "grad_norm": 632.5770263671875, "learning_rate": 4.517350157728707e-07, "loss": 90.8438, "step": 716 }, { "epoch": 0.006787137569693585, "grad_norm": 1686.2781982421875, "learning_rate": 4.5236593059936906e-07, "loss": 108.4375, "step": 717 }, { "epoch": 0.006796603591408639, "grad_norm": 618.0745849609375, "learning_rate": 4.5299684542586745e-07, "loss": 47.5781, "step": 718 }, { "epoch": 0.006806069613123692, "grad_norm": 899.2180786132812, "learning_rate": 4.5362776025236594e-07, "loss": 90.5312, "step": 719 }, { "epoch": 0.006815535634838746, "grad_norm": 921.3800659179688, "learning_rate": 4.5425867507886433e-07, "loss": 78.625, "step": 720 }, { "epoch": 0.0068250016565538, "grad_norm": 606.9647827148438, "learning_rate": 4.5488958990536277e-07, "loss": 47.4062, "step": 721 }, { "epoch": 0.006834467678268854, "grad_norm": 1095.116943359375, "learning_rate": 4.5552050473186116e-07, "loss": 110.4375, "step": 722 }, { "epoch": 0.006843933699983908, "grad_norm": 700.3568115234375, "learning_rate": 4.561514195583596e-07, "loss": 58.7344, "step": 723 }, { "epoch": 0.0068533997216989615, "grad_norm": 900.1468505859375, "learning_rate": 4.5678233438485804e-07, "loss": 128.5, "step": 724 }, { "epoch": 0.006862865743414015, "grad_norm": 557.4404907226562, "learning_rate": 4.574132492113564e-07, "loss": 61.9688, "step": 725 }, { "epoch": 0.006872331765129069, "grad_norm": 775.9334106445312, "learning_rate": 4.580441640378549e-07, "loss": 61.9375, "step": 726 }, { "epoch": 0.006881797786844123, "grad_norm": 956.7257080078125, "learning_rate": 4.586750788643533e-07, "loss": 88.5312, "step": 727 }, { "epoch": 0.006891263808559177, "grad_norm": 682.5673828125, "learning_rate": 4.593059936908517e-07, "loss": 86.0312, "step": 728 }, { "epoch": 0.006900729830274231, "grad_norm": 1588.4832763671875, "learning_rate": 4.5993690851735013e-07, "loss": 103.4688, "step": 729 }, { "epoch": 0.006910195851989285, "grad_norm": 2223.914306640625, "learning_rate": 4.6056782334384857e-07, "loss": 130.2188, "step": 730 }, { "epoch": 0.006919661873704339, "grad_norm": 1426.7650146484375, "learning_rate": 4.61198738170347e-07, "loss": 82.7812, "step": 731 }, { "epoch": 0.0069291278954193925, "grad_norm": 683.0283203125, "learning_rate": 4.618296529968454e-07, "loss": 48.25, "step": 732 }, { "epoch": 0.0069385939171344455, "grad_norm": 699.4717407226562, "learning_rate": 4.624605678233438e-07, "loss": 90.0312, "step": 733 }, { "epoch": 0.006948059938849499, "grad_norm": 537.8048095703125, "learning_rate": 4.630914826498423e-07, "loss": 46.8438, "step": 734 }, { "epoch": 0.006957525960564553, "grad_norm": 519.2514038085938, "learning_rate": 4.6372239747634066e-07, "loss": 54.5, "step": 735 }, { "epoch": 0.006966991982279607, "grad_norm": 711.2446899414062, "learning_rate": 4.643533123028391e-07, "loss": 91.125, "step": 736 }, { "epoch": 0.006976458003994661, "grad_norm": 671.8798828125, "learning_rate": 4.649842271293375e-07, "loss": 47.5, "step": 737 }, { "epoch": 0.006985924025709715, "grad_norm": 1387.1195068359375, "learning_rate": 4.6561514195583593e-07, "loss": 103.3125, "step": 738 }, { "epoch": 0.006995390047424769, "grad_norm": 1116.261474609375, "learning_rate": 4.6624605678233437e-07, "loss": 113.9688, "step": 739 }, { "epoch": 0.007004856069139823, "grad_norm": 905.7135009765625, "learning_rate": 4.6687697160883276e-07, "loss": 66.1406, "step": 740 }, { "epoch": 0.0070143220908548765, "grad_norm": 690.0347290039062, "learning_rate": 4.6750788643533125e-07, "loss": 53.1562, "step": 741 }, { "epoch": 0.00702378811256993, "grad_norm": 568.18505859375, "learning_rate": 4.6813880126182964e-07, "loss": 96.9062, "step": 742 }, { "epoch": 0.007033254134284984, "grad_norm": 677.7637329101562, "learning_rate": 4.68769716088328e-07, "loss": 70.0938, "step": 743 }, { "epoch": 0.007042720156000038, "grad_norm": 654.349853515625, "learning_rate": 4.6940063091482646e-07, "loss": 59.4688, "step": 744 }, { "epoch": 0.007052186177715092, "grad_norm": 868.0798950195312, "learning_rate": 4.700315457413249e-07, "loss": 64.4375, "step": 745 }, { "epoch": 0.007061652199430146, "grad_norm": 785.4501953125, "learning_rate": 4.7066246056782334e-07, "loss": 78.375, "step": 746 }, { "epoch": 0.007071118221145199, "grad_norm": 780.6588745117188, "learning_rate": 4.7129337539432173e-07, "loss": 85.125, "step": 747 }, { "epoch": 0.007080584242860253, "grad_norm": 666.7556762695312, "learning_rate": 4.719242902208201e-07, "loss": 57.3438, "step": 748 }, { "epoch": 0.007090050264575307, "grad_norm": 666.6965942382812, "learning_rate": 4.725552050473186e-07, "loss": 102.4375, "step": 749 }, { "epoch": 0.0070995162862903605, "grad_norm": 1134.0828857421875, "learning_rate": 4.73186119873817e-07, "loss": 61.0625, "step": 750 }, { "epoch": 0.007108982308005414, "grad_norm": 877.4514770507812, "learning_rate": 4.7381703470031544e-07, "loss": 57.75, "step": 751 }, { "epoch": 0.007118448329720468, "grad_norm": 567.602294921875, "learning_rate": 4.744479495268139e-07, "loss": 50.0625, "step": 752 }, { "epoch": 0.007127914351435522, "grad_norm": 901.8771362304688, "learning_rate": 4.750788643533123e-07, "loss": 61.1562, "step": 753 }, { "epoch": 0.007137380373150576, "grad_norm": 638.4566650390625, "learning_rate": 4.757097791798107e-07, "loss": 60.25, "step": 754 }, { "epoch": 0.00714684639486563, "grad_norm": 1189.4766845703125, "learning_rate": 4.763406940063091e-07, "loss": 94.375, "step": 755 }, { "epoch": 0.007156312416580684, "grad_norm": 801.3098754882812, "learning_rate": 4.769716088328075e-07, "loss": 54.0, "step": 756 }, { "epoch": 0.007165778438295738, "grad_norm": 783.3068237304688, "learning_rate": 4.77602523659306e-07, "loss": 116.5625, "step": 757 }, { "epoch": 0.0071752444600107915, "grad_norm": 511.90802001953125, "learning_rate": 4.782334384858044e-07, "loss": 45.5, "step": 758 }, { "epoch": 0.007184710481725845, "grad_norm": 478.81121826171875, "learning_rate": 4.788643533123028e-07, "loss": 49.3125, "step": 759 }, { "epoch": 0.007194176503440899, "grad_norm": 669.1679077148438, "learning_rate": 4.794952681388013e-07, "loss": 47.0938, "step": 760 }, { "epoch": 0.007203642525155953, "grad_norm": 774.081787109375, "learning_rate": 4.801261829652997e-07, "loss": 55.8438, "step": 761 }, { "epoch": 0.007213108546871006, "grad_norm": 507.76708984375, "learning_rate": 4.807570977917981e-07, "loss": 50.0938, "step": 762 }, { "epoch": 0.00722257456858606, "grad_norm": 1447.4273681640625, "learning_rate": 4.813880126182966e-07, "loss": 84.6094, "step": 763 }, { "epoch": 0.007232040590301114, "grad_norm": 506.8823547363281, "learning_rate": 4.820189274447949e-07, "loss": 51.125, "step": 764 }, { "epoch": 0.007241506612016168, "grad_norm": 939.7547607421875, "learning_rate": 4.826498422712933e-07, "loss": 123.3125, "step": 765 }, { "epoch": 0.007250972633731222, "grad_norm": 765.6340942382812, "learning_rate": 4.832807570977917e-07, "loss": 94.3125, "step": 766 }, { "epoch": 0.0072604386554462755, "grad_norm": 926.777587890625, "learning_rate": 4.839116719242902e-07, "loss": 123.6719, "step": 767 }, { "epoch": 0.007269904677161329, "grad_norm": 537.46875, "learning_rate": 4.845425867507886e-07, "loss": 64.9062, "step": 768 }, { "epoch": 0.007279370698876383, "grad_norm": 818.5309448242188, "learning_rate": 4.85173501577287e-07, "loss": 116.125, "step": 769 }, { "epoch": 0.007288836720591437, "grad_norm": 906.6332397460938, "learning_rate": 4.858044164037855e-07, "loss": 100.9375, "step": 770 }, { "epoch": 0.007298302742306491, "grad_norm": 703.8358764648438, "learning_rate": 4.864353312302839e-07, "loss": 57.8438, "step": 771 }, { "epoch": 0.007307768764021545, "grad_norm": 562.2327270507812, "learning_rate": 4.870662460567824e-07, "loss": 76.8438, "step": 772 }, { "epoch": 0.007317234785736599, "grad_norm": 784.0732421875, "learning_rate": 4.876971608832807e-07, "loss": 48.2188, "step": 773 }, { "epoch": 0.007326700807451653, "grad_norm": 542.60546875, "learning_rate": 4.883280757097791e-07, "loss": 51.2188, "step": 774 }, { "epoch": 0.0073361668291667065, "grad_norm": 1071.7242431640625, "learning_rate": 4.889589905362776e-07, "loss": 54.8125, "step": 775 }, { "epoch": 0.0073456328508817595, "grad_norm": 860.04638671875, "learning_rate": 4.89589905362776e-07, "loss": 45.5938, "step": 776 }, { "epoch": 0.007355098872596813, "grad_norm": 739.45947265625, "learning_rate": 4.902208201892744e-07, "loss": 57.3125, "step": 777 }, { "epoch": 0.007364564894311867, "grad_norm": 1081.9669189453125, "learning_rate": 4.908517350157729e-07, "loss": 105.5625, "step": 778 }, { "epoch": 0.007374030916026921, "grad_norm": 666.6940307617188, "learning_rate": 4.914826498422713e-07, "loss": 57.0938, "step": 779 }, { "epoch": 0.007383496937741975, "grad_norm": 1013.7083129882812, "learning_rate": 4.921135646687697e-07, "loss": 113.8438, "step": 780 }, { "epoch": 0.007392962959457029, "grad_norm": 736.87353515625, "learning_rate": 4.92744479495268e-07, "loss": 97.4062, "step": 781 }, { "epoch": 0.007402428981172083, "grad_norm": 3.03008770942688, "learning_rate": 4.933753943217665e-07, "loss": 0.978, "step": 782 }, { "epoch": 0.007411895002887137, "grad_norm": 764.3482666015625, "learning_rate": 4.940063091482649e-07, "loss": 90.0312, "step": 783 }, { "epoch": 0.0074213610246021905, "grad_norm": 1034.0223388671875, "learning_rate": 4.946372239747634e-07, "loss": 93.25, "step": 784 }, { "epoch": 0.007430827046317244, "grad_norm": 4.111385345458984, "learning_rate": 4.952681388012618e-07, "loss": 0.9053, "step": 785 }, { "epoch": 0.007440293068032298, "grad_norm": 1394.77099609375, "learning_rate": 4.958990536277602e-07, "loss": 133.2812, "step": 786 }, { "epoch": 0.007449759089747352, "grad_norm": 1801.6568603515625, "learning_rate": 4.965299684542587e-07, "loss": 125.7812, "step": 787 }, { "epoch": 0.007459225111462406, "grad_norm": 3.2125141620635986, "learning_rate": 4.971608832807571e-07, "loss": 0.8711, "step": 788 }, { "epoch": 0.00746869113317746, "grad_norm": 534.101318359375, "learning_rate": 4.977917981072556e-07, "loss": 47.5625, "step": 789 }, { "epoch": 0.007478157154892513, "grad_norm": 1366.0341796875, "learning_rate": 4.98422712933754e-07, "loss": 87.2031, "step": 790 }, { "epoch": 0.007487623176607567, "grad_norm": 4.03013801574707, "learning_rate": 4.990536277602523e-07, "loss": 0.8652, "step": 791 }, { "epoch": 0.007497089198322621, "grad_norm": 587.3823852539062, "learning_rate": 4.996845425867507e-07, "loss": 54.9688, "step": 792 }, { "epoch": 0.0075065552200376745, "grad_norm": 736.0506591796875, "learning_rate": 5.003154574132492e-07, "loss": 68.0938, "step": 793 }, { "epoch": 0.007516021241752728, "grad_norm": 639.2147827148438, "learning_rate": 5.009463722397476e-07, "loss": 70.0312, "step": 794 }, { "epoch": 0.007525487263467782, "grad_norm": 3.1425139904022217, "learning_rate": 5.01577287066246e-07, "loss": 0.7754, "step": 795 }, { "epoch": 0.007534953285182836, "grad_norm": 2.9051320552825928, "learning_rate": 5.022082018927445e-07, "loss": 0.96, "step": 796 }, { "epoch": 0.00754441930689789, "grad_norm": 1276.66796875, "learning_rate": 5.028391167192429e-07, "loss": 122.875, "step": 797 }, { "epoch": 0.007553885328612944, "grad_norm": 673.965087890625, "learning_rate": 5.034700315457413e-07, "loss": 100.5938, "step": 798 }, { "epoch": 0.007563351350327998, "grad_norm": 642.2201538085938, "learning_rate": 5.041009463722398e-07, "loss": 37.4219, "step": 799 }, { "epoch": 0.007572817372043052, "grad_norm": 934.3065185546875, "learning_rate": 5.047318611987381e-07, "loss": 87.5, "step": 800 }, { "epoch": 0.0075822833937581055, "grad_norm": 1148.65771484375, "learning_rate": 5.053627760252365e-07, "loss": 89.2812, "step": 801 }, { "epoch": 0.007591749415473159, "grad_norm": 1826.3553466796875, "learning_rate": 5.05993690851735e-07, "loss": 106.8438, "step": 802 }, { "epoch": 0.007601215437188213, "grad_norm": 964.21044921875, "learning_rate": 5.066246056782334e-07, "loss": 54.8438, "step": 803 }, { "epoch": 0.007610681458903266, "grad_norm": 511.60369873046875, "learning_rate": 5.072555205047319e-07, "loss": 89.6094, "step": 804 }, { "epoch": 0.00762014748061832, "grad_norm": 510.3746337890625, "learning_rate": 5.078864353312302e-07, "loss": 57.375, "step": 805 }, { "epoch": 0.007629613502333374, "grad_norm": 589.3353881835938, "learning_rate": 5.085173501577287e-07, "loss": 42.5938, "step": 806 }, { "epoch": 0.007639079524048428, "grad_norm": 839.3663330078125, "learning_rate": 5.091482649842272e-07, "loss": 57.1875, "step": 807 }, { "epoch": 0.007648545545763482, "grad_norm": 1263.7366943359375, "learning_rate": 5.097791798107255e-07, "loss": 99.4688, "step": 808 }, { "epoch": 0.007658011567478536, "grad_norm": 1232.43017578125, "learning_rate": 5.104100946372239e-07, "loss": 60.2812, "step": 809 }, { "epoch": 0.0076674775891935896, "grad_norm": 673.701904296875, "learning_rate": 5.110410094637224e-07, "loss": 48.2812, "step": 810 }, { "epoch": 0.007676943610908643, "grad_norm": 990.0816650390625, "learning_rate": 5.116719242902207e-07, "loss": 71.1562, "step": 811 }, { "epoch": 0.007686409632623697, "grad_norm": 829.873046875, "learning_rate": 5.123028391167192e-07, "loss": 58.5938, "step": 812 }, { "epoch": 0.007695875654338751, "grad_norm": 2400.70849609375, "learning_rate": 5.129337539432177e-07, "loss": 125.0, "step": 813 }, { "epoch": 0.007705341676053805, "grad_norm": 511.437255859375, "learning_rate": 5.135646687697161e-07, "loss": 63.4531, "step": 814 }, { "epoch": 0.007714807697768859, "grad_norm": 697.353515625, "learning_rate": 5.141955835962145e-07, "loss": 71.9844, "step": 815 }, { "epoch": 0.007724273719483913, "grad_norm": 629.1488037109375, "learning_rate": 5.148264984227129e-07, "loss": 48.5312, "step": 816 }, { "epoch": 0.007733739741198967, "grad_norm": 424.27081298828125, "learning_rate": 5.154574132492114e-07, "loss": 50.3125, "step": 817 }, { "epoch": 0.0077432057629140206, "grad_norm": 1043.326171875, "learning_rate": 5.160883280757097e-07, "loss": 123.7188, "step": 818 }, { "epoch": 0.0077526717846290736, "grad_norm": 524.2852172851562, "learning_rate": 5.167192429022081e-07, "loss": 49.25, "step": 819 }, { "epoch": 0.0077621378063441274, "grad_norm": 565.6692504882812, "learning_rate": 5.173501577287066e-07, "loss": 83.125, "step": 820 }, { "epoch": 0.007771603828059181, "grad_norm": 849.1063232421875, "learning_rate": 5.179810725552051e-07, "loss": 97.7969, "step": 821 }, { "epoch": 0.007781069849774235, "grad_norm": 525.5205688476562, "learning_rate": 5.186119873817034e-07, "loss": 46.8438, "step": 822 }, { "epoch": 0.007790535871489289, "grad_norm": 813.9027099609375, "learning_rate": 5.192429022082019e-07, "loss": 62.0, "step": 823 }, { "epoch": 0.007800001893204343, "grad_norm": 2.8228847980499268, "learning_rate": 5.198738170347004e-07, "loss": 0.874, "step": 824 }, { "epoch": 0.007809467914919397, "grad_norm": 428.4258117675781, "learning_rate": 5.205047318611987e-07, "loss": 53.2188, "step": 825 }, { "epoch": 0.00781893393663445, "grad_norm": 515.7318115234375, "learning_rate": 5.211356466876972e-07, "loss": 43.8125, "step": 826 }, { "epoch": 0.007828399958349504, "grad_norm": 597.9266967773438, "learning_rate": 5.217665615141955e-07, "loss": 50.4062, "step": 827 }, { "epoch": 0.007837865980064558, "grad_norm": 567.211181640625, "learning_rate": 5.223974763406939e-07, "loss": 52.0625, "step": 828 }, { "epoch": 0.007847332001779611, "grad_norm": 1001.0836181640625, "learning_rate": 5.230283911671924e-07, "loss": 70.75, "step": 829 }, { "epoch": 0.007856798023494666, "grad_norm": 746.6680297851562, "learning_rate": 5.236593059936908e-07, "loss": 48.6875, "step": 830 }, { "epoch": 0.00786626404520972, "grad_norm": 800.999755859375, "learning_rate": 5.242902208201893e-07, "loss": 102.5625, "step": 831 }, { "epoch": 0.007875730066924774, "grad_norm": 475.74212646484375, "learning_rate": 5.249211356466877e-07, "loss": 45.0938, "step": 832 }, { "epoch": 0.007885196088639827, "grad_norm": 2.6574461460113525, "learning_rate": 5.255520504731861e-07, "loss": 0.7451, "step": 833 }, { "epoch": 0.007894662110354882, "grad_norm": 2.644270420074463, "learning_rate": 5.261829652996846e-07, "loss": 0.957, "step": 834 }, { "epoch": 0.007904128132069935, "grad_norm": 483.36212158203125, "learning_rate": 5.26813880126183e-07, "loss": 48.9688, "step": 835 }, { "epoch": 0.00791359415378499, "grad_norm": 985.6268920898438, "learning_rate": 5.274447949526813e-07, "loss": 88.4062, "step": 836 }, { "epoch": 0.007923060175500042, "grad_norm": 3.3204052448272705, "learning_rate": 5.280757097791798e-07, "loss": 0.8538, "step": 837 }, { "epoch": 0.007932526197215097, "grad_norm": 977.9467163085938, "learning_rate": 5.287066246056781e-07, "loss": 74.0312, "step": 838 }, { "epoch": 0.00794199221893015, "grad_norm": 594.3565063476562, "learning_rate": 5.293375394321766e-07, "loss": 59.8438, "step": 839 }, { "epoch": 0.007951458240645203, "grad_norm": 760.4666137695312, "learning_rate": 5.299684542586751e-07, "loss": 55.4062, "step": 840 }, { "epoch": 0.007960924262360258, "grad_norm": 579.6248779296875, "learning_rate": 5.305993690851735e-07, "loss": 50.1875, "step": 841 }, { "epoch": 0.007970390284075311, "grad_norm": 582.2992553710938, "learning_rate": 5.312302839116719e-07, "loss": 52.0312, "step": 842 }, { "epoch": 0.007979856305790366, "grad_norm": 1976.796875, "learning_rate": 5.318611987381704e-07, "loss": 92.0938, "step": 843 }, { "epoch": 0.007989322327505419, "grad_norm": 1077.587890625, "learning_rate": 5.324921135646688e-07, "loss": 120.4688, "step": 844 }, { "epoch": 0.007998788349220473, "grad_norm": 384.5727844238281, "learning_rate": 5.331230283911671e-07, "loss": 47.0156, "step": 845 }, { "epoch": 0.008008254370935526, "grad_norm": 544.6519775390625, "learning_rate": 5.337539432176655e-07, "loss": 51.4375, "step": 846 }, { "epoch": 0.008017720392650581, "grad_norm": 1123.7867431640625, "learning_rate": 5.34384858044164e-07, "loss": 104.8125, "step": 847 }, { "epoch": 0.008027186414365634, "grad_norm": 550.1346435546875, "learning_rate": 5.350157728706624e-07, "loss": 86.5312, "step": 848 }, { "epoch": 0.008036652436080689, "grad_norm": 557.1901245117188, "learning_rate": 5.356466876971608e-07, "loss": 46.5312, "step": 849 }, { "epoch": 0.008046118457795742, "grad_norm": 942.27734375, "learning_rate": 5.362776025236593e-07, "loss": 97.3125, "step": 850 }, { "epoch": 0.008055584479510797, "grad_norm": 530.2299194335938, "learning_rate": 5.369085173501578e-07, "loss": 54.5312, "step": 851 }, { "epoch": 0.00806505050122585, "grad_norm": 583.9393920898438, "learning_rate": 5.375394321766561e-07, "loss": 63.7812, "step": 852 }, { "epoch": 0.008074516522940904, "grad_norm": 350.1147155761719, "learning_rate": 5.381703470031546e-07, "loss": 39.7031, "step": 853 }, { "epoch": 0.008083982544655957, "grad_norm": 589.2879028320312, "learning_rate": 5.38801261829653e-07, "loss": 63.9375, "step": 854 }, { "epoch": 0.00809344856637101, "grad_norm": 754.3623046875, "learning_rate": 5.394321766561513e-07, "loss": 51.0938, "step": 855 }, { "epoch": 0.008102914588086065, "grad_norm": 3.411783218383789, "learning_rate": 5.400630914826498e-07, "loss": 0.8623, "step": 856 }, { "epoch": 0.008112380609801118, "grad_norm": 503.009033203125, "learning_rate": 5.406940063091482e-07, "loss": 46.375, "step": 857 }, { "epoch": 0.008121846631516173, "grad_norm": 473.1482238769531, "learning_rate": 5.413249211356466e-07, "loss": 48.7656, "step": 858 }, { "epoch": 0.008131312653231226, "grad_norm": 497.8544006347656, "learning_rate": 5.419558359621451e-07, "loss": 53.0, "step": 859 }, { "epoch": 0.00814077867494628, "grad_norm": 655.8378295898438, "learning_rate": 5.425867507886435e-07, "loss": 48.75, "step": 860 }, { "epoch": 0.008150244696661334, "grad_norm": 482.3836669921875, "learning_rate": 5.43217665615142e-07, "loss": 34.6094, "step": 861 }, { "epoch": 0.008159710718376388, "grad_norm": 655.23095703125, "learning_rate": 5.438485804416404e-07, "loss": 47.3125, "step": 862 }, { "epoch": 0.008169176740091441, "grad_norm": 561.69970703125, "learning_rate": 5.444794952681387e-07, "loss": 49.4688, "step": 863 }, { "epoch": 0.008178642761806496, "grad_norm": 468.29144287109375, "learning_rate": 5.451104100946372e-07, "loss": 54.0625, "step": 864 }, { "epoch": 0.00818810878352155, "grad_norm": 922.1507568359375, "learning_rate": 5.457413249211356e-07, "loss": 92.25, "step": 865 }, { "epoch": 0.008197574805236604, "grad_norm": 578.6650390625, "learning_rate": 5.46372239747634e-07, "loss": 51.2188, "step": 866 }, { "epoch": 0.008207040826951657, "grad_norm": 555.2592163085938, "learning_rate": 5.470031545741325e-07, "loss": 78.125, "step": 867 }, { "epoch": 0.00821650684866671, "grad_norm": 2.4686732292175293, "learning_rate": 5.476340694006308e-07, "loss": 0.7295, "step": 868 }, { "epoch": 0.008225972870381765, "grad_norm": 713.4580688476562, "learning_rate": 5.482649842271293e-07, "loss": 86.9062, "step": 869 }, { "epoch": 0.008235438892096818, "grad_norm": 503.7627868652344, "learning_rate": 5.488958990536278e-07, "loss": 46.5938, "step": 870 }, { "epoch": 0.008244904913811872, "grad_norm": 474.4834899902344, "learning_rate": 5.495268138801262e-07, "loss": 101.7188, "step": 871 }, { "epoch": 0.008254370935526925, "grad_norm": 918.9407348632812, "learning_rate": 5.501577287066245e-07, "loss": 87.5156, "step": 872 }, { "epoch": 0.00826383695724198, "grad_norm": 550.7669677734375, "learning_rate": 5.50788643533123e-07, "loss": 48.25, "step": 873 }, { "epoch": 0.008273302978957033, "grad_norm": 495.71112060546875, "learning_rate": 5.514195583596214e-07, "loss": 50.375, "step": 874 }, { "epoch": 0.008282769000672088, "grad_norm": 3042.91552734375, "learning_rate": 5.520504731861198e-07, "loss": 74.6875, "step": 875 }, { "epoch": 0.008292235022387141, "grad_norm": 491.931396484375, "learning_rate": 5.526813880126183e-07, "loss": 46.6875, "step": 876 }, { "epoch": 0.008301701044102196, "grad_norm": 606.964111328125, "learning_rate": 5.533123028391167e-07, "loss": 47.0312, "step": 877 }, { "epoch": 0.008311167065817249, "grad_norm": 636.4404907226562, "learning_rate": 5.539432176656152e-07, "loss": 48.8438, "step": 878 }, { "epoch": 0.008320633087532303, "grad_norm": 735.9544067382812, "learning_rate": 5.545741324921135e-07, "loss": 70.9062, "step": 879 }, { "epoch": 0.008330099109247356, "grad_norm": 1061.9390869140625, "learning_rate": 5.55205047318612e-07, "loss": 63.6562, "step": 880 }, { "epoch": 0.008339565130962411, "grad_norm": 535.3318481445312, "learning_rate": 5.558359621451105e-07, "loss": 44.4375, "step": 881 }, { "epoch": 0.008349031152677464, "grad_norm": 814.4985961914062, "learning_rate": 5.564668769716087e-07, "loss": 71.875, "step": 882 }, { "epoch": 0.008358497174392517, "grad_norm": 508.14398193359375, "learning_rate": 5.570977917981072e-07, "loss": 54.8438, "step": 883 }, { "epoch": 0.008367963196107572, "grad_norm": 666.0277099609375, "learning_rate": 5.577287066246057e-07, "loss": 64.4688, "step": 884 }, { "epoch": 0.008377429217822625, "grad_norm": 540.49951171875, "learning_rate": 5.58359621451104e-07, "loss": 51.6562, "step": 885 }, { "epoch": 0.00838689523953768, "grad_norm": 467.25811767578125, "learning_rate": 5.589905362776025e-07, "loss": 46.3125, "step": 886 }, { "epoch": 0.008396361261252733, "grad_norm": 563.1368408203125, "learning_rate": 5.59621451104101e-07, "loss": 51.875, "step": 887 }, { "epoch": 0.008405827282967787, "grad_norm": 684.3067626953125, "learning_rate": 5.602523659305994e-07, "loss": 94.5312, "step": 888 }, { "epoch": 0.00841529330468284, "grad_norm": 655.529052734375, "learning_rate": 5.608832807570978e-07, "loss": 48.4375, "step": 889 }, { "epoch": 0.008424759326397895, "grad_norm": 458.71649169921875, "learning_rate": 5.615141955835961e-07, "loss": 49.0312, "step": 890 }, { "epoch": 0.008434225348112948, "grad_norm": 1300.093017578125, "learning_rate": 5.621451104100946e-07, "loss": 107.0625, "step": 891 }, { "epoch": 0.008443691369828003, "grad_norm": 1156.27587890625, "learning_rate": 5.62776025236593e-07, "loss": 72.0, "step": 892 }, { "epoch": 0.008453157391543056, "grad_norm": 570.901611328125, "learning_rate": 5.634069400630914e-07, "loss": 54.2812, "step": 893 }, { "epoch": 0.00846262341325811, "grad_norm": 879.7730712890625, "learning_rate": 5.640378548895899e-07, "loss": 53.0938, "step": 894 }, { "epoch": 0.008472089434973164, "grad_norm": 1132.3984375, "learning_rate": 5.646687697160883e-07, "loss": 52.8281, "step": 895 }, { "epoch": 0.008481555456688218, "grad_norm": 563.0892333984375, "learning_rate": 5.652996845425867e-07, "loss": 49.75, "step": 896 }, { "epoch": 0.008491021478403271, "grad_norm": 927.1497802734375, "learning_rate": 5.659305993690852e-07, "loss": 69.1406, "step": 897 }, { "epoch": 0.008500487500118325, "grad_norm": 746.087158203125, "learning_rate": 5.665615141955837e-07, "loss": 103.4062, "step": 898 }, { "epoch": 0.00850995352183338, "grad_norm": 516.09912109375, "learning_rate": 5.671924290220819e-07, "loss": 53.125, "step": 899 }, { "epoch": 0.008519419543548432, "grad_norm": 2.5828592777252197, "learning_rate": 5.678233438485804e-07, "loss": 0.8521, "step": 900 }, { "epoch": 0.008528885565263487, "grad_norm": 3.1559479236602783, "learning_rate": 5.684542586750788e-07, "loss": 0.9404, "step": 901 }, { "epoch": 0.00853835158697854, "grad_norm": 661.6016845703125, "learning_rate": 5.690851735015772e-07, "loss": 50.3438, "step": 902 }, { "epoch": 0.008547817608693595, "grad_norm": 697.524658203125, "learning_rate": 5.697160883280757e-07, "loss": 89.8125, "step": 903 }, { "epoch": 0.008557283630408648, "grad_norm": 518.76611328125, "learning_rate": 5.703470031545741e-07, "loss": 45.2031, "step": 904 }, { "epoch": 0.008566749652123702, "grad_norm": 847.5130615234375, "learning_rate": 5.709779179810725e-07, "loss": 74.1562, "step": 905 }, { "epoch": 0.008576215673838756, "grad_norm": 643.4619140625, "learning_rate": 5.71608832807571e-07, "loss": 47.1562, "step": 906 }, { "epoch": 0.00858568169555381, "grad_norm": 597.977294921875, "learning_rate": 5.722397476340694e-07, "loss": 72.7188, "step": 907 }, { "epoch": 0.008595147717268863, "grad_norm": 589.1958618164062, "learning_rate": 5.728706624605679e-07, "loss": 41.2969, "step": 908 }, { "epoch": 0.008604613738983918, "grad_norm": 540.9647216796875, "learning_rate": 5.735015772870662e-07, "loss": 47.0938, "step": 909 }, { "epoch": 0.008614079760698971, "grad_norm": 722.0404052734375, "learning_rate": 5.741324921135646e-07, "loss": 99.5625, "step": 910 }, { "epoch": 0.008623545782414024, "grad_norm": 695.4044189453125, "learning_rate": 5.747634069400631e-07, "loss": 49.875, "step": 911 }, { "epoch": 0.008633011804129079, "grad_norm": 1577.857421875, "learning_rate": 5.753943217665614e-07, "loss": 126.5938, "step": 912 }, { "epoch": 0.008642477825844132, "grad_norm": 648.773193359375, "learning_rate": 5.760252365930599e-07, "loss": 66.3125, "step": 913 }, { "epoch": 0.008651943847559187, "grad_norm": 551.3426513671875, "learning_rate": 5.766561514195584e-07, "loss": 51.0312, "step": 914 }, { "epoch": 0.00866140986927424, "grad_norm": 997.7872924804688, "learning_rate": 5.772870662460568e-07, "loss": 115.5156, "step": 915 }, { "epoch": 0.008670875890989294, "grad_norm": 575.0909423828125, "learning_rate": 5.779179810725552e-07, "loss": 43.7812, "step": 916 }, { "epoch": 0.008680341912704347, "grad_norm": 821.425048828125, "learning_rate": 5.785488958990537e-07, "loss": 68.9688, "step": 917 }, { "epoch": 0.008689807934419402, "grad_norm": 489.1051330566406, "learning_rate": 5.79179810725552e-07, "loss": 43.1562, "step": 918 }, { "epoch": 0.008699273956134455, "grad_norm": 413.65753173828125, "learning_rate": 5.798107255520504e-07, "loss": 44.5938, "step": 919 }, { "epoch": 0.00870873997784951, "grad_norm": 499.95855712890625, "learning_rate": 5.804416403785489e-07, "loss": 44.1562, "step": 920 }, { "epoch": 0.008718205999564563, "grad_norm": 1091.332763671875, "learning_rate": 5.810725552050473e-07, "loss": 57.9688, "step": 921 }, { "epoch": 0.008727672021279618, "grad_norm": 863.7023315429688, "learning_rate": 5.817034700315457e-07, "loss": 116.75, "step": 922 }, { "epoch": 0.00873713804299467, "grad_norm": 2.4957759380340576, "learning_rate": 5.823343848580441e-07, "loss": 0.8101, "step": 923 }, { "epoch": 0.008746604064709725, "grad_norm": 863.5458374023438, "learning_rate": 5.829652996845426e-07, "loss": 106.8906, "step": 924 }, { "epoch": 0.008756070086424778, "grad_norm": 572.3380126953125, "learning_rate": 5.835962145110411e-07, "loss": 43.4375, "step": 925 }, { "epoch": 0.008765536108139831, "grad_norm": 847.18310546875, "learning_rate": 5.842271293375393e-07, "loss": 96.6875, "step": 926 }, { "epoch": 0.008775002129854886, "grad_norm": 451.4510192871094, "learning_rate": 5.848580441640378e-07, "loss": 39.7812, "step": 927 }, { "epoch": 0.008784468151569939, "grad_norm": 746.7451171875, "learning_rate": 5.854889589905363e-07, "loss": 89.7031, "step": 928 }, { "epoch": 0.008793934173284994, "grad_norm": 741.4230346679688, "learning_rate": 5.861198738170346e-07, "loss": 75.9688, "step": 929 }, { "epoch": 0.008803400195000047, "grad_norm": 886.7265014648438, "learning_rate": 5.867507886435331e-07, "loss": 98.6562, "step": 930 }, { "epoch": 0.008812866216715102, "grad_norm": 1386.0406494140625, "learning_rate": 5.873817034700315e-07, "loss": 121.375, "step": 931 }, { "epoch": 0.008822332238430155, "grad_norm": 782.8112182617188, "learning_rate": 5.880126182965299e-07, "loss": 45.0625, "step": 932 }, { "epoch": 0.00883179826014521, "grad_norm": 834.4720458984375, "learning_rate": 5.886435331230284e-07, "loss": 102.625, "step": 933 }, { "epoch": 0.008841264281860262, "grad_norm": 562.317626953125, "learning_rate": 5.892744479495268e-07, "loss": 47.2188, "step": 934 }, { "epoch": 0.008850730303575317, "grad_norm": 570.50390625, "learning_rate": 5.899053627760253e-07, "loss": 55.6562, "step": 935 }, { "epoch": 0.00886019632529037, "grad_norm": 3540.698974609375, "learning_rate": 5.905362776025236e-07, "loss": 100.75, "step": 936 }, { "epoch": 0.008869662347005425, "grad_norm": 847.4028930664062, "learning_rate": 5.91167192429022e-07, "loss": 120.4375, "step": 937 }, { "epoch": 0.008879128368720478, "grad_norm": 480.6512451171875, "learning_rate": 5.917981072555205e-07, "loss": 48.5781, "step": 938 }, { "epoch": 0.008888594390435533, "grad_norm": 584.4874877929688, "learning_rate": 5.924290220820189e-07, "loss": 47.7188, "step": 939 }, { "epoch": 0.008898060412150586, "grad_norm": 3.527505874633789, "learning_rate": 5.930599369085173e-07, "loss": 0.8545, "step": 940 }, { "epoch": 0.008907526433865639, "grad_norm": 511.4902648925781, "learning_rate": 5.936908517350158e-07, "loss": 51.1875, "step": 941 }, { "epoch": 0.008916992455580693, "grad_norm": 798.3194580078125, "learning_rate": 5.943217665615141e-07, "loss": 94.2812, "step": 942 }, { "epoch": 0.008926458477295746, "grad_norm": 662.8392333984375, "learning_rate": 5.949526813880126e-07, "loss": 83.75, "step": 943 }, { "epoch": 0.008935924499010801, "grad_norm": 534.926513671875, "learning_rate": 5.95583596214511e-07, "loss": 43.7188, "step": 944 }, { "epoch": 0.008945390520725854, "grad_norm": 987.9302368164062, "learning_rate": 5.962145110410094e-07, "loss": 68.25, "step": 945 }, { "epoch": 0.008954856542440909, "grad_norm": 415.9031982421875, "learning_rate": 5.968454258675078e-07, "loss": 49.0938, "step": 946 }, { "epoch": 0.008964322564155962, "grad_norm": 812.2520141601562, "learning_rate": 5.974763406940063e-07, "loss": 47.4688, "step": 947 }, { "epoch": 0.008973788585871017, "grad_norm": 442.0588073730469, "learning_rate": 5.981072555205047e-07, "loss": 47.0, "step": 948 }, { "epoch": 0.00898325460758607, "grad_norm": 1221.280517578125, "learning_rate": 5.987381703470031e-07, "loss": 82.0547, "step": 949 }, { "epoch": 0.008992720629301124, "grad_norm": 1087.75439453125, "learning_rate": 5.993690851735016e-07, "loss": 47.5938, "step": 950 }, { "epoch": 0.009002186651016177, "grad_norm": 514.0580444335938, "learning_rate": 6e-07, "loss": 75.5625, "step": 951 }, { "epoch": 0.009011652672731232, "grad_norm": 681.3024291992188, "learning_rate": 6.006309148264985e-07, "loss": 53.5938, "step": 952 }, { "epoch": 0.009021118694446285, "grad_norm": 469.5665283203125, "learning_rate": 6.012618296529967e-07, "loss": 44.4375, "step": 953 }, { "epoch": 0.009030584716161338, "grad_norm": 648.4255981445312, "learning_rate": 6.018927444794952e-07, "loss": 42.375, "step": 954 }, { "epoch": 0.009040050737876393, "grad_norm": 1290.115478515625, "learning_rate": 6.025236593059937e-07, "loss": 79.6406, "step": 955 }, { "epoch": 0.009049516759591446, "grad_norm": 584.070068359375, "learning_rate": 6.03154574132492e-07, "loss": 84.875, "step": 956 }, { "epoch": 0.0090589827813065, "grad_norm": 1431.1536865234375, "learning_rate": 6.037854889589905e-07, "loss": 94.0469, "step": 957 }, { "epoch": 0.009068448803021554, "grad_norm": 689.6102905273438, "learning_rate": 6.04416403785489e-07, "loss": 114.4062, "step": 958 }, { "epoch": 0.009077914824736608, "grad_norm": 832.2657470703125, "learning_rate": 6.050473186119873e-07, "loss": 101.1875, "step": 959 }, { "epoch": 0.009087380846451661, "grad_norm": 624.6807250976562, "learning_rate": 6.056782334384858e-07, "loss": 47.9062, "step": 960 }, { "epoch": 0.009096846868166716, "grad_norm": 510.3832092285156, "learning_rate": 6.063091482649843e-07, "loss": 50.5938, "step": 961 }, { "epoch": 0.009106312889881769, "grad_norm": 1298.28271484375, "learning_rate": 6.069400630914827e-07, "loss": 108.5625, "step": 962 }, { "epoch": 0.009115778911596824, "grad_norm": 587.4338989257812, "learning_rate": 6.07570977917981e-07, "loss": 68.2812, "step": 963 }, { "epoch": 0.009125244933311877, "grad_norm": 769.8639526367188, "learning_rate": 6.082018927444794e-07, "loss": 48.0, "step": 964 }, { "epoch": 0.009134710955026932, "grad_norm": 985.637939453125, "learning_rate": 6.088328075709779e-07, "loss": 125.5156, "step": 965 }, { "epoch": 0.009144176976741985, "grad_norm": 495.4888916015625, "learning_rate": 6.094637223974763e-07, "loss": 52.625, "step": 966 }, { "epoch": 0.00915364299845704, "grad_norm": 472.966064453125, "learning_rate": 6.100946372239747e-07, "loss": 52.5938, "step": 967 }, { "epoch": 0.009163109020172092, "grad_norm": 534.1757202148438, "learning_rate": 6.107255520504732e-07, "loss": 64.125, "step": 968 }, { "epoch": 0.009172575041887145, "grad_norm": 3.035447597503662, "learning_rate": 6.113564668769716e-07, "loss": 0.9189, "step": 969 }, { "epoch": 0.0091820410636022, "grad_norm": 850.8175659179688, "learning_rate": 6.1198738170347e-07, "loss": 44.0625, "step": 970 }, { "epoch": 0.009191507085317253, "grad_norm": 549.1148681640625, "learning_rate": 6.126182965299685e-07, "loss": 55.1562, "step": 971 }, { "epoch": 0.009200973107032308, "grad_norm": 556.2156372070312, "learning_rate": 6.132492113564669e-07, "loss": 44.625, "step": 972 }, { "epoch": 0.00921043912874736, "grad_norm": 878.2605590820312, "learning_rate": 6.138801261829652e-07, "loss": 106.2188, "step": 973 }, { "epoch": 0.009219905150462416, "grad_norm": 731.95263671875, "learning_rate": 6.145110410094637e-07, "loss": 99.1562, "step": 974 }, { "epoch": 0.009229371172177469, "grad_norm": 2.931413412094116, "learning_rate": 6.151419558359621e-07, "loss": 0.9131, "step": 975 }, { "epoch": 0.009238837193892523, "grad_norm": 982.4998779296875, "learning_rate": 6.157728706624605e-07, "loss": 103.2188, "step": 976 }, { "epoch": 0.009248303215607576, "grad_norm": 929.77099609375, "learning_rate": 6.16403785488959e-07, "loss": 85.25, "step": 977 }, { "epoch": 0.009257769237322631, "grad_norm": 1033.7044677734375, "learning_rate": 6.170347003154574e-07, "loss": 47.625, "step": 978 }, { "epoch": 0.009267235259037684, "grad_norm": 666.0188598632812, "learning_rate": 6.176656151419558e-07, "loss": 50.5, "step": 979 }, { "epoch": 0.009276701280752739, "grad_norm": 736.0408935546875, "learning_rate": 6.182965299684543e-07, "loss": 65.0469, "step": 980 }, { "epoch": 0.009286167302467792, "grad_norm": 823.9521484375, "learning_rate": 6.189274447949526e-07, "loss": 59.25, "step": 981 }, { "epoch": 0.009295633324182845, "grad_norm": 761.8516845703125, "learning_rate": 6.195583596214511e-07, "loss": 47.0938, "step": 982 }, { "epoch": 0.0093050993458979, "grad_norm": 554.5966186523438, "learning_rate": 6.201892744479495e-07, "loss": 47.6875, "step": 983 }, { "epoch": 0.009314565367612953, "grad_norm": 593.66064453125, "learning_rate": 6.208201892744479e-07, "loss": 48.8125, "step": 984 }, { "epoch": 0.009324031389328007, "grad_norm": 570.67724609375, "learning_rate": 6.214511041009464e-07, "loss": 51.25, "step": 985 }, { "epoch": 0.00933349741104306, "grad_norm": 453.7633972167969, "learning_rate": 6.220820189274447e-07, "loss": 41.4688, "step": 986 }, { "epoch": 0.009342963432758115, "grad_norm": 819.5733032226562, "learning_rate": 6.227129337539432e-07, "loss": 97.6562, "step": 987 }, { "epoch": 0.009352429454473168, "grad_norm": 649.6461791992188, "learning_rate": 6.233438485804417e-07, "loss": 46.6562, "step": 988 }, { "epoch": 0.009361895476188223, "grad_norm": 448.1849365234375, "learning_rate": 6.2397476340694e-07, "loss": 74.6875, "step": 989 }, { "epoch": 0.009371361497903276, "grad_norm": 1169.21337890625, "learning_rate": 6.246056782334384e-07, "loss": 88.0312, "step": 990 }, { "epoch": 0.00938082751961833, "grad_norm": 488.2369384765625, "learning_rate": 6.252365930599369e-07, "loss": 49.0625, "step": 991 }, { "epoch": 0.009390293541333384, "grad_norm": 577.0293579101562, "learning_rate": 6.258675078864353e-07, "loss": 62.4688, "step": 992 }, { "epoch": 0.009399759563048438, "grad_norm": 549.228515625, "learning_rate": 6.264984227129337e-07, "loss": 49.125, "step": 993 }, { "epoch": 0.009409225584763491, "grad_norm": 421.6153564453125, "learning_rate": 6.271293375394322e-07, "loss": 44.6094, "step": 994 }, { "epoch": 0.009418691606478546, "grad_norm": 583.7643432617188, "learning_rate": 6.277602523659306e-07, "loss": 47.8906, "step": 995 }, { "epoch": 0.009428157628193599, "grad_norm": 895.9959716796875, "learning_rate": 6.28391167192429e-07, "loss": 76.8125, "step": 996 }, { "epoch": 0.009437623649908652, "grad_norm": 665.9041748046875, "learning_rate": 6.290220820189274e-07, "loss": 45.75, "step": 997 }, { "epoch": 0.009447089671623707, "grad_norm": 1264.355224609375, "learning_rate": 6.296529968454259e-07, "loss": 43.5625, "step": 998 }, { "epoch": 0.00945655569333876, "grad_norm": 999.9818725585938, "learning_rate": 6.302839116719243e-07, "loss": 115.0625, "step": 999 }, { "epoch": 0.009466021715053815, "grad_norm": 428.024169921875, "learning_rate": 6.309148264984226e-07, "loss": 40.1094, "step": 1000 }, { "epoch": 0.009475487736768868, "grad_norm": 572.3364868164062, "learning_rate": 6.315457413249211e-07, "loss": 67.5312, "step": 1001 }, { "epoch": 0.009484953758483922, "grad_norm": 787.1051025390625, "learning_rate": 6.321766561514196e-07, "loss": 64.4688, "step": 1002 }, { "epoch": 0.009494419780198975, "grad_norm": 640.351318359375, "learning_rate": 6.328075709779179e-07, "loss": 62.4375, "step": 1003 }, { "epoch": 0.00950388580191403, "grad_norm": 687.2269287109375, "learning_rate": 6.334384858044164e-07, "loss": 52.1719, "step": 1004 }, { "epoch": 0.009513351823629083, "grad_norm": 819.5322875976562, "learning_rate": 6.340694006309149e-07, "loss": 58.9375, "step": 1005 }, { "epoch": 0.009522817845344138, "grad_norm": 813.7279052734375, "learning_rate": 6.347003154574132e-07, "loss": 64.6094, "step": 1006 }, { "epoch": 0.00953228386705919, "grad_norm": 2.957379102706909, "learning_rate": 6.353312302839117e-07, "loss": 0.8623, "step": 1007 }, { "epoch": 0.009541749888774246, "grad_norm": 562.7893676757812, "learning_rate": 6.3596214511041e-07, "loss": 44.9375, "step": 1008 }, { "epoch": 0.009551215910489299, "grad_norm": 380.1034851074219, "learning_rate": 6.365930599369085e-07, "loss": 38.7969, "step": 1009 }, { "epoch": 0.009560681932204353, "grad_norm": 3.1068406105041504, "learning_rate": 6.372239747634069e-07, "loss": 1.0342, "step": 1010 }, { "epoch": 0.009570147953919406, "grad_norm": 1024.3533935546875, "learning_rate": 6.378548895899053e-07, "loss": 102.1562, "step": 1011 }, { "epoch": 0.00957961397563446, "grad_norm": 684.3878784179688, "learning_rate": 6.384858044164038e-07, "loss": 71.5156, "step": 1012 }, { "epoch": 0.009589079997349514, "grad_norm": 2.9818782806396484, "learning_rate": 6.391167192429022e-07, "loss": 0.9092, "step": 1013 }, { "epoch": 0.009598546019064567, "grad_norm": 596.9166870117188, "learning_rate": 6.397476340694006e-07, "loss": 41.9688, "step": 1014 }, { "epoch": 0.009608012040779622, "grad_norm": 3.332792043685913, "learning_rate": 6.403785488958991e-07, "loss": 0.9287, "step": 1015 }, { "epoch": 0.009617478062494675, "grad_norm": 2.767293930053711, "learning_rate": 6.410094637223973e-07, "loss": 0.877, "step": 1016 }, { "epoch": 0.00962694408420973, "grad_norm": 592.91455078125, "learning_rate": 6.416403785488958e-07, "loss": 42.3594, "step": 1017 }, { "epoch": 0.009636410105924783, "grad_norm": 734.4127197265625, "learning_rate": 6.422712933753943e-07, "loss": 44.2188, "step": 1018 }, { "epoch": 0.009645876127639837, "grad_norm": 1181.5130615234375, "learning_rate": 6.429022082018927e-07, "loss": 127.9062, "step": 1019 }, { "epoch": 0.00965534214935489, "grad_norm": 396.13238525390625, "learning_rate": 6.435331230283911e-07, "loss": 42.4062, "step": 1020 }, { "epoch": 0.009664808171069945, "grad_norm": 1417.650390625, "learning_rate": 6.441640378548896e-07, "loss": 118.2812, "step": 1021 }, { "epoch": 0.009674274192784998, "grad_norm": 438.482177734375, "learning_rate": 6.44794952681388e-07, "loss": 48.875, "step": 1022 }, { "epoch": 0.009683740214500053, "grad_norm": 525.5158081054688, "learning_rate": 6.454258675078864e-07, "loss": 44.8594, "step": 1023 }, { "epoch": 0.009693206236215106, "grad_norm": 583.6134643554688, "learning_rate": 6.460567823343849e-07, "loss": 35.5312, "step": 1024 }, { "epoch": 0.009702672257930159, "grad_norm": 447.4403991699219, "learning_rate": 6.466876971608833e-07, "loss": 44.3125, "step": 1025 }, { "epoch": 0.009712138279645214, "grad_norm": 903.1251831054688, "learning_rate": 6.473186119873816e-07, "loss": 83.375, "step": 1026 }, { "epoch": 0.009721604301360267, "grad_norm": 806.1966552734375, "learning_rate": 6.4794952681388e-07, "loss": 50.0, "step": 1027 }, { "epoch": 0.009731070323075321, "grad_norm": 593.6306762695312, "learning_rate": 6.485804416403785e-07, "loss": 49.625, "step": 1028 }, { "epoch": 0.009740536344790374, "grad_norm": 959.9777221679688, "learning_rate": 6.49211356466877e-07, "loss": 42.2656, "step": 1029 }, { "epoch": 0.009750002366505429, "grad_norm": 1276.7330322265625, "learning_rate": 6.498422712933753e-07, "loss": 124.3438, "step": 1030 }, { "epoch": 0.009759468388220482, "grad_norm": 804.0978393554688, "learning_rate": 6.504731861198738e-07, "loss": 109.4688, "step": 1031 }, { "epoch": 0.009768934409935537, "grad_norm": 447.599853515625, "learning_rate": 6.511041009463723e-07, "loss": 48.4062, "step": 1032 }, { "epoch": 0.00977840043165059, "grad_norm": 432.9711608886719, "learning_rate": 6.517350157728706e-07, "loss": 41.75, "step": 1033 }, { "epoch": 0.009787866453365645, "grad_norm": 518.04443359375, "learning_rate": 6.523659305993691e-07, "loss": 56.0, "step": 1034 }, { "epoch": 0.009797332475080698, "grad_norm": 723.4774169921875, "learning_rate": 6.529968454258675e-07, "loss": 46.4219, "step": 1035 }, { "epoch": 0.009806798496795752, "grad_norm": 474.6865234375, "learning_rate": 6.536277602523658e-07, "loss": 45.0938, "step": 1036 }, { "epoch": 0.009816264518510805, "grad_norm": 815.59130859375, "learning_rate": 6.542586750788643e-07, "loss": 82.7969, "step": 1037 }, { "epoch": 0.00982573054022586, "grad_norm": 906.5792846679688, "learning_rate": 6.548895899053627e-07, "loss": 45.0469, "step": 1038 }, { "epoch": 0.009835196561940913, "grad_norm": 522.0941162109375, "learning_rate": 6.555205047318612e-07, "loss": 42.75, "step": 1039 }, { "epoch": 0.009844662583655966, "grad_norm": 695.6386108398438, "learning_rate": 6.561514195583596e-07, "loss": 83.3125, "step": 1040 }, { "epoch": 0.00985412860537102, "grad_norm": 620.719482421875, "learning_rate": 6.56782334384858e-07, "loss": 56.7188, "step": 1041 }, { "epoch": 0.009863594627086074, "grad_norm": 441.2978820800781, "learning_rate": 6.574132492113565e-07, "loss": 46.1562, "step": 1042 }, { "epoch": 0.009873060648801129, "grad_norm": 986.527587890625, "learning_rate": 6.580441640378549e-07, "loss": 104.6875, "step": 1043 }, { "epoch": 0.009882526670516182, "grad_norm": 2.6964478492736816, "learning_rate": 6.586750788643532e-07, "loss": 0.8887, "step": 1044 }, { "epoch": 0.009891992692231236, "grad_norm": 652.0984497070312, "learning_rate": 6.593059936908517e-07, "loss": 99.0938, "step": 1045 }, { "epoch": 0.00990145871394629, "grad_norm": 792.8449096679688, "learning_rate": 6.599369085173502e-07, "loss": 46.0, "step": 1046 }, { "epoch": 0.009910924735661344, "grad_norm": 765.668701171875, "learning_rate": 6.605678233438485e-07, "loss": 71.8281, "step": 1047 }, { "epoch": 0.009920390757376397, "grad_norm": 852.4647827148438, "learning_rate": 6.61198738170347e-07, "loss": 97.8438, "step": 1048 }, { "epoch": 0.009929856779091452, "grad_norm": 835.20849609375, "learning_rate": 6.618296529968454e-07, "loss": 52.375, "step": 1049 }, { "epoch": 0.009939322800806505, "grad_norm": 740.9287109375, "learning_rate": 6.624605678233438e-07, "loss": 74.25, "step": 1050 }, { "epoch": 0.00994878882252156, "grad_norm": 5165.1748046875, "learning_rate": 6.630914826498423e-07, "loss": 91.4062, "step": 1051 }, { "epoch": 0.009958254844236613, "grad_norm": 453.8856506347656, "learning_rate": 6.637223974763407e-07, "loss": 45.1875, "step": 1052 }, { "epoch": 0.009967720865951666, "grad_norm": 350.9989318847656, "learning_rate": 6.64353312302839e-07, "loss": 42.4688, "step": 1053 }, { "epoch": 0.00997718688766672, "grad_norm": 557.787353515625, "learning_rate": 6.649842271293375e-07, "loss": 46.4062, "step": 1054 }, { "epoch": 0.009986652909381773, "grad_norm": 643.2884521484375, "learning_rate": 6.656151419558359e-07, "loss": 37.3438, "step": 1055 }, { "epoch": 0.009996118931096828, "grad_norm": 954.6444091796875, "learning_rate": 6.662460567823344e-07, "loss": 46.625, "step": 1056 }, { "epoch": 0.010005584952811881, "grad_norm": 669.3492431640625, "learning_rate": 6.668769716088328e-07, "loss": 60.4062, "step": 1057 }, { "epoch": 0.010015050974526936, "grad_norm": 579.873291015625, "learning_rate": 6.675078864353312e-07, "loss": 54.5625, "step": 1058 }, { "epoch": 0.010024516996241989, "grad_norm": 1750.781005859375, "learning_rate": 6.681388012618297e-07, "loss": 47.7812, "step": 1059 }, { "epoch": 0.010033983017957044, "grad_norm": 1044.7310791015625, "learning_rate": 6.68769716088328e-07, "loss": 114.0312, "step": 1060 }, { "epoch": 0.010043449039672097, "grad_norm": 433.9689025878906, "learning_rate": 6.694006309148265e-07, "loss": 38.7656, "step": 1061 }, { "epoch": 0.010052915061387151, "grad_norm": 641.3822631835938, "learning_rate": 6.70031545741325e-07, "loss": 111.5, "step": 1062 }, { "epoch": 0.010062381083102204, "grad_norm": 369.2008361816406, "learning_rate": 6.706624605678232e-07, "loss": 42.9688, "step": 1063 }, { "epoch": 0.010071847104817259, "grad_norm": 2.9918320178985596, "learning_rate": 6.712933753943217e-07, "loss": 0.9243, "step": 1064 }, { "epoch": 0.010081313126532312, "grad_norm": 659.8385009765625, "learning_rate": 6.719242902208202e-07, "loss": 121.625, "step": 1065 }, { "epoch": 0.010090779148247367, "grad_norm": 725.3938598632812, "learning_rate": 6.725552050473186e-07, "loss": 63.4688, "step": 1066 }, { "epoch": 0.01010024516996242, "grad_norm": 815.791259765625, "learning_rate": 6.73186119873817e-07, "loss": 63.0625, "step": 1067 }, { "epoch": 0.010109711191677473, "grad_norm": 1127.51953125, "learning_rate": 6.738170347003155e-07, "loss": 69.9688, "step": 1068 }, { "epoch": 0.010119177213392528, "grad_norm": 634.3098754882812, "learning_rate": 6.744479495268139e-07, "loss": 52.4375, "step": 1069 }, { "epoch": 0.01012864323510758, "grad_norm": 472.3537902832031, "learning_rate": 6.750788643533123e-07, "loss": 43.25, "step": 1070 }, { "epoch": 0.010138109256822635, "grad_norm": 632.0870971679688, "learning_rate": 6.757097791798106e-07, "loss": 108.5938, "step": 1071 }, { "epoch": 0.010147575278537688, "grad_norm": 658.1650390625, "learning_rate": 6.763406940063091e-07, "loss": 73.8125, "step": 1072 }, { "epoch": 0.010157041300252743, "grad_norm": 710.2623901367188, "learning_rate": 6.769716088328075e-07, "loss": 73.25, "step": 1073 }, { "epoch": 0.010166507321967796, "grad_norm": 454.2070007324219, "learning_rate": 6.776025236593059e-07, "loss": 38.875, "step": 1074 }, { "epoch": 0.010175973343682851, "grad_norm": 1152.6356201171875, "learning_rate": 6.782334384858044e-07, "loss": 111.2188, "step": 1075 }, { "epoch": 0.010185439365397904, "grad_norm": 490.9013977050781, "learning_rate": 6.788643533123029e-07, "loss": 58.125, "step": 1076 }, { "epoch": 0.010194905387112959, "grad_norm": 629.836669921875, "learning_rate": 6.794952681388012e-07, "loss": 65.4375, "step": 1077 }, { "epoch": 0.010204371408828012, "grad_norm": 380.32220458984375, "learning_rate": 6.801261829652997e-07, "loss": 43.25, "step": 1078 }, { "epoch": 0.010213837430543066, "grad_norm": 964.2496337890625, "learning_rate": 6.807570977917982e-07, "loss": 33.5781, "step": 1079 }, { "epoch": 0.01022330345225812, "grad_norm": 880.822998046875, "learning_rate": 6.813880126182964e-07, "loss": 43.4062, "step": 1080 }, { "epoch": 0.010232769473973174, "grad_norm": 682.4675903320312, "learning_rate": 6.820189274447949e-07, "loss": 46.0, "step": 1081 }, { "epoch": 0.010242235495688227, "grad_norm": 619.7885131835938, "learning_rate": 6.826498422712933e-07, "loss": 66.7188, "step": 1082 }, { "epoch": 0.01025170151740328, "grad_norm": 450.420654296875, "learning_rate": 6.832807570977917e-07, "loss": 39.6875, "step": 1083 }, { "epoch": 0.010261167539118335, "grad_norm": 504.1923828125, "learning_rate": 6.839116719242902e-07, "loss": 48.6875, "step": 1084 }, { "epoch": 0.010270633560833388, "grad_norm": 409.48089599609375, "learning_rate": 6.845425867507886e-07, "loss": 42.0156, "step": 1085 }, { "epoch": 0.010280099582548443, "grad_norm": 737.5615844726562, "learning_rate": 6.851735015772871e-07, "loss": 65.1094, "step": 1086 }, { "epoch": 0.010289565604263496, "grad_norm": 463.4221496582031, "learning_rate": 6.858044164037855e-07, "loss": 45.7188, "step": 1087 }, { "epoch": 0.01029903162597855, "grad_norm": 621.4237670898438, "learning_rate": 6.864353312302839e-07, "loss": 84.3438, "step": 1088 }, { "epoch": 0.010308497647693603, "grad_norm": 1241.0513916015625, "learning_rate": 6.870662460567823e-07, "loss": 76.0156, "step": 1089 }, { "epoch": 0.010317963669408658, "grad_norm": 465.3297424316406, "learning_rate": 6.876971608832807e-07, "loss": 37.4375, "step": 1090 }, { "epoch": 0.010327429691123711, "grad_norm": 506.2282409667969, "learning_rate": 6.883280757097791e-07, "loss": 45.9375, "step": 1091 }, { "epoch": 0.010336895712838766, "grad_norm": 517.960205078125, "learning_rate": 6.889589905362776e-07, "loss": 41.4688, "step": 1092 }, { "epoch": 0.010346361734553819, "grad_norm": 538.6528930664062, "learning_rate": 6.895899053627759e-07, "loss": 85.8438, "step": 1093 }, { "epoch": 0.010355827756268874, "grad_norm": 751.6373901367188, "learning_rate": 6.902208201892744e-07, "loss": 56.7656, "step": 1094 }, { "epoch": 0.010365293777983927, "grad_norm": 767.9102172851562, "learning_rate": 6.908517350157729e-07, "loss": 89.2188, "step": 1095 }, { "epoch": 0.01037475979969898, "grad_norm": 627.80517578125, "learning_rate": 6.914826498422713e-07, "loss": 41.7812, "step": 1096 }, { "epoch": 0.010384225821414034, "grad_norm": 619.3834228515625, "learning_rate": 6.921135646687697e-07, "loss": 86.2812, "step": 1097 }, { "epoch": 0.010393691843129087, "grad_norm": 1102.9801025390625, "learning_rate": 6.927444794952681e-07, "loss": 61.7188, "step": 1098 }, { "epoch": 0.010403157864844142, "grad_norm": 776.8458862304688, "learning_rate": 6.933753943217665e-07, "loss": 87.7656, "step": 1099 }, { "epoch": 0.010412623886559195, "grad_norm": 602.0908813476562, "learning_rate": 6.940063091482649e-07, "loss": 44.7812, "step": 1100 }, { "epoch": 0.01042208990827425, "grad_norm": 575.8475952148438, "learning_rate": 6.946372239747633e-07, "loss": 52.9844, "step": 1101 }, { "epoch": 0.010431555929989303, "grad_norm": 616.6809692382812, "learning_rate": 6.952681388012618e-07, "loss": 65.9062, "step": 1102 }, { "epoch": 0.010441021951704358, "grad_norm": 521.318115234375, "learning_rate": 6.958990536277603e-07, "loss": 45.8906, "step": 1103 }, { "epoch": 0.01045048797341941, "grad_norm": 612.0499267578125, "learning_rate": 6.965299684542586e-07, "loss": 49.9844, "step": 1104 }, { "epoch": 0.010459953995134465, "grad_norm": 798.2891845703125, "learning_rate": 6.971608832807571e-07, "loss": 92.0625, "step": 1105 }, { "epoch": 0.010469420016849518, "grad_norm": 961.2871704101562, "learning_rate": 6.977917981072556e-07, "loss": 103.5, "step": 1106 }, { "epoch": 0.010478886038564573, "grad_norm": 480.06219482421875, "learning_rate": 6.984227129337538e-07, "loss": 42.3438, "step": 1107 }, { "epoch": 0.010488352060279626, "grad_norm": 650.8759155273438, "learning_rate": 6.990536277602523e-07, "loss": 57.6406, "step": 1108 }, { "epoch": 0.010497818081994681, "grad_norm": 764.945068359375, "learning_rate": 6.996845425867508e-07, "loss": 33.8125, "step": 1109 }, { "epoch": 0.010507284103709734, "grad_norm": 823.4776611328125, "learning_rate": 7.003154574132491e-07, "loss": 61.5625, "step": 1110 }, { "epoch": 0.010516750125424787, "grad_norm": 829.9736938476562, "learning_rate": 7.009463722397476e-07, "loss": 56.9375, "step": 1111 }, { "epoch": 0.010526216147139842, "grad_norm": 438.975830078125, "learning_rate": 7.01577287066246e-07, "loss": 45.9688, "step": 1112 }, { "epoch": 0.010535682168854895, "grad_norm": 858.3340454101562, "learning_rate": 7.022082018927445e-07, "loss": 44.6875, "step": 1113 }, { "epoch": 0.01054514819056995, "grad_norm": 583.1493530273438, "learning_rate": 7.028391167192429e-07, "loss": 72.8125, "step": 1114 }, { "epoch": 0.010554614212285002, "grad_norm": 483.8931579589844, "learning_rate": 7.034700315457413e-07, "loss": 50.7969, "step": 1115 }, { "epoch": 0.010564080234000057, "grad_norm": 623.7691040039062, "learning_rate": 7.041009463722397e-07, "loss": 43.1875, "step": 1116 }, { "epoch": 0.01057354625571511, "grad_norm": 540.7147827148438, "learning_rate": 7.047318611987381e-07, "loss": 102.75, "step": 1117 }, { "epoch": 0.010583012277430165, "grad_norm": 378.2498779296875, "learning_rate": 7.053627760252365e-07, "loss": 38.1562, "step": 1118 }, { "epoch": 0.010592478299145218, "grad_norm": 3.1447324752807617, "learning_rate": 7.05993690851735e-07, "loss": 0.8916, "step": 1119 }, { "epoch": 0.010601944320860273, "grad_norm": 871.4137573242188, "learning_rate": 7.066246056782334e-07, "loss": 122.0625, "step": 1120 }, { "epoch": 0.010611410342575326, "grad_norm": 780.8139038085938, "learning_rate": 7.072555205047318e-07, "loss": 49.2188, "step": 1121 }, { "epoch": 0.01062087636429038, "grad_norm": 704.7964477539062, "learning_rate": 7.078864353312303e-07, "loss": 88.9375, "step": 1122 }, { "epoch": 0.010630342386005433, "grad_norm": 693.3671264648438, "learning_rate": 7.085173501577287e-07, "loss": 44.7188, "step": 1123 }, { "epoch": 0.010639808407720486, "grad_norm": 1275.65625, "learning_rate": 7.091482649842271e-07, "loss": 98.6562, "step": 1124 }, { "epoch": 0.010649274429435541, "grad_norm": 491.3037109375, "learning_rate": 7.097791798107255e-07, "loss": 63.9375, "step": 1125 }, { "epoch": 0.010658740451150594, "grad_norm": 417.89508056640625, "learning_rate": 7.104100946372239e-07, "loss": 44.9062, "step": 1126 }, { "epoch": 0.010668206472865649, "grad_norm": 407.1385192871094, "learning_rate": 7.110410094637223e-07, "loss": 38.9062, "step": 1127 }, { "epoch": 0.010677672494580702, "grad_norm": 1325.8424072265625, "learning_rate": 7.116719242902208e-07, "loss": 115.625, "step": 1128 }, { "epoch": 0.010687138516295757, "grad_norm": 2.8936243057250977, "learning_rate": 7.123028391167192e-07, "loss": 1.0005, "step": 1129 }, { "epoch": 0.01069660453801081, "grad_norm": 796.4735717773438, "learning_rate": 7.129337539432176e-07, "loss": 48.8125, "step": 1130 }, { "epoch": 0.010706070559725864, "grad_norm": 617.4832763671875, "learning_rate": 7.135646687697161e-07, "loss": 85.75, "step": 1131 }, { "epoch": 0.010715536581440917, "grad_norm": 768.2022094726562, "learning_rate": 7.141955835962145e-07, "loss": 40.5156, "step": 1132 }, { "epoch": 0.010725002603155972, "grad_norm": 429.1643981933594, "learning_rate": 7.14826498422713e-07, "loss": 46.7344, "step": 1133 }, { "epoch": 0.010734468624871025, "grad_norm": 627.5374145507812, "learning_rate": 7.154574132492112e-07, "loss": 88.75, "step": 1134 }, { "epoch": 0.01074393464658608, "grad_norm": 535.9691162109375, "learning_rate": 7.160883280757097e-07, "loss": 44.9688, "step": 1135 }, { "epoch": 0.010753400668301133, "grad_norm": 2.791865110397339, "learning_rate": 7.167192429022082e-07, "loss": 0.9404, "step": 1136 }, { "epoch": 0.010762866690016188, "grad_norm": 972.8465576171875, "learning_rate": 7.173501577287065e-07, "loss": 91.875, "step": 1137 }, { "epoch": 0.01077233271173124, "grad_norm": 998.28271484375, "learning_rate": 7.17981072555205e-07, "loss": 130.375, "step": 1138 }, { "epoch": 0.010781798733446294, "grad_norm": 687.6041870117188, "learning_rate": 7.186119873817035e-07, "loss": 68.375, "step": 1139 }, { "epoch": 0.010791264755161348, "grad_norm": 599.0660400390625, "learning_rate": 7.192429022082019e-07, "loss": 60.7812, "step": 1140 }, { "epoch": 0.010800730776876401, "grad_norm": 440.9027404785156, "learning_rate": 7.198738170347003e-07, "loss": 42.1719, "step": 1141 }, { "epoch": 0.010810196798591456, "grad_norm": 566.9161376953125, "learning_rate": 7.205047318611988e-07, "loss": 58.25, "step": 1142 }, { "epoch": 0.01081966282030651, "grad_norm": 738.4607543945312, "learning_rate": 7.211356466876971e-07, "loss": 85.1875, "step": 1143 }, { "epoch": 0.010829128842021564, "grad_norm": 523.1826782226562, "learning_rate": 7.217665615141955e-07, "loss": 55.0625, "step": 1144 }, { "epoch": 0.010838594863736617, "grad_norm": 1281.4342041015625, "learning_rate": 7.223974763406939e-07, "loss": 97.5, "step": 1145 }, { "epoch": 0.010848060885451672, "grad_norm": 3.028278112411499, "learning_rate": 7.230283911671924e-07, "loss": 0.8862, "step": 1146 }, { "epoch": 0.010857526907166725, "grad_norm": 497.7133483886719, "learning_rate": 7.236593059936908e-07, "loss": 43.875, "step": 1147 }, { "epoch": 0.01086699292888178, "grad_norm": 605.82080078125, "learning_rate": 7.242902208201892e-07, "loss": 43.4062, "step": 1148 }, { "epoch": 0.010876458950596832, "grad_norm": 1414.20166015625, "learning_rate": 7.249211356466877e-07, "loss": 90.5938, "step": 1149 }, { "epoch": 0.010885924972311887, "grad_norm": 2.4926888942718506, "learning_rate": 7.255520504731862e-07, "loss": 0.8311, "step": 1150 }, { "epoch": 0.01089539099402694, "grad_norm": 844.3794555664062, "learning_rate": 7.261829652996845e-07, "loss": 92.9688, "step": 1151 }, { "epoch": 0.010904857015741995, "grad_norm": 964.7125244140625, "learning_rate": 7.26813880126183e-07, "loss": 66.2188, "step": 1152 }, { "epoch": 0.010914323037457048, "grad_norm": 480.13623046875, "learning_rate": 7.274447949526814e-07, "loss": 46.0469, "step": 1153 }, { "epoch": 0.010923789059172101, "grad_norm": 380.8642578125, "learning_rate": 7.280757097791797e-07, "loss": 39.7969, "step": 1154 }, { "epoch": 0.010933255080887156, "grad_norm": 587.11962890625, "learning_rate": 7.287066246056782e-07, "loss": 55.3594, "step": 1155 }, { "epoch": 0.010942721102602209, "grad_norm": 395.0832214355469, "learning_rate": 7.293375394321766e-07, "loss": 68.5312, "step": 1156 }, { "epoch": 0.010952187124317263, "grad_norm": 361.3826599121094, "learning_rate": 7.29968454258675e-07, "loss": 37.6562, "step": 1157 }, { "epoch": 0.010961653146032316, "grad_norm": 542.1301879882812, "learning_rate": 7.305993690851735e-07, "loss": 67.5625, "step": 1158 }, { "epoch": 0.010971119167747371, "grad_norm": 416.7520751953125, "learning_rate": 7.312302839116719e-07, "loss": 43.9062, "step": 1159 }, { "epoch": 0.010980585189462424, "grad_norm": 668.8477172851562, "learning_rate": 7.318611987381704e-07, "loss": 86.1094, "step": 1160 }, { "epoch": 0.010990051211177479, "grad_norm": 483.09503173828125, "learning_rate": 7.324921135646687e-07, "loss": 47.8125, "step": 1161 }, { "epoch": 0.010999517232892532, "grad_norm": 793.0693359375, "learning_rate": 7.331230283911671e-07, "loss": 94.75, "step": 1162 }, { "epoch": 0.011008983254607587, "grad_norm": 389.770263671875, "learning_rate": 7.337539432176656e-07, "loss": 44.7031, "step": 1163 }, { "epoch": 0.01101844927632264, "grad_norm": 929.1907958984375, "learning_rate": 7.34384858044164e-07, "loss": 69.8438, "step": 1164 }, { "epoch": 0.011027915298037694, "grad_norm": 1012.8470458984375, "learning_rate": 7.350157728706624e-07, "loss": 43.5625, "step": 1165 }, { "epoch": 0.011037381319752747, "grad_norm": 511.25439453125, "learning_rate": 7.356466876971609e-07, "loss": 42.7031, "step": 1166 }, { "epoch": 0.0110468473414678, "grad_norm": 550.285888671875, "learning_rate": 7.362776025236592e-07, "loss": 64.5625, "step": 1167 }, { "epoch": 0.011056313363182855, "grad_norm": 1324.77392578125, "learning_rate": 7.369085173501577e-07, "loss": 104.7188, "step": 1168 }, { "epoch": 0.011065779384897908, "grad_norm": 2.3348450660705566, "learning_rate": 7.375394321766562e-07, "loss": 0.7603, "step": 1169 }, { "epoch": 0.011075245406612963, "grad_norm": 684.0159301757812, "learning_rate": 7.381703470031545e-07, "loss": 42.6719, "step": 1170 }, { "epoch": 0.011084711428328016, "grad_norm": 575.6126708984375, "learning_rate": 7.388012618296529e-07, "loss": 69.6875, "step": 1171 }, { "epoch": 0.01109417745004307, "grad_norm": 617.8461303710938, "learning_rate": 7.394321766561514e-07, "loss": 68.5938, "step": 1172 }, { "epoch": 0.011103643471758124, "grad_norm": 983.6828002929688, "learning_rate": 7.400630914826498e-07, "loss": 100.4375, "step": 1173 }, { "epoch": 0.011113109493473178, "grad_norm": 647.299560546875, "learning_rate": 7.406940063091482e-07, "loss": 51.1562, "step": 1174 }, { "epoch": 0.011122575515188231, "grad_norm": 674.8086547851562, "learning_rate": 7.413249211356467e-07, "loss": 58.2812, "step": 1175 }, { "epoch": 0.011132041536903286, "grad_norm": 575.8084106445312, "learning_rate": 7.419558359621451e-07, "loss": 72.3438, "step": 1176 }, { "epoch": 0.01114150755861834, "grad_norm": 361.0384216308594, "learning_rate": 7.425867507886436e-07, "loss": 39.9688, "step": 1177 }, { "epoch": 0.011150973580333394, "grad_norm": 546.4700927734375, "learning_rate": 7.432176656151419e-07, "loss": 49.4219, "step": 1178 }, { "epoch": 0.011160439602048447, "grad_norm": 941.0468139648438, "learning_rate": 7.438485804416403e-07, "loss": 73.9688, "step": 1179 }, { "epoch": 0.011169905623763502, "grad_norm": 547.6505737304688, "learning_rate": 7.444794952681388e-07, "loss": 59.125, "step": 1180 }, { "epoch": 0.011179371645478555, "grad_norm": 570.5455322265625, "learning_rate": 7.451104100946371e-07, "loss": 89.0938, "step": 1181 }, { "epoch": 0.011188837667193608, "grad_norm": 1102.265625, "learning_rate": 7.457413249211356e-07, "loss": 45.2344, "step": 1182 }, { "epoch": 0.011198303688908662, "grad_norm": 761.554443359375, "learning_rate": 7.463722397476341e-07, "loss": 42.4688, "step": 1183 }, { "epoch": 0.011207769710623715, "grad_norm": 823.8197631835938, "learning_rate": 7.470031545741324e-07, "loss": 51.4375, "step": 1184 }, { "epoch": 0.01121723573233877, "grad_norm": 791.961181640625, "learning_rate": 7.476340694006309e-07, "loss": 101.2969, "step": 1185 }, { "epoch": 0.011226701754053823, "grad_norm": 641.5358276367188, "learning_rate": 7.482649842271294e-07, "loss": 80.0, "step": 1186 }, { "epoch": 0.011236167775768878, "grad_norm": 3.2808167934417725, "learning_rate": 7.488958990536278e-07, "loss": 0.9468, "step": 1187 }, { "epoch": 0.011245633797483931, "grad_norm": 584.5095825195312, "learning_rate": 7.495268138801261e-07, "loss": 83.0625, "step": 1188 }, { "epoch": 0.011255099819198986, "grad_norm": 727.7489624023438, "learning_rate": 7.501577287066245e-07, "loss": 78.5625, "step": 1189 }, { "epoch": 0.011264565840914039, "grad_norm": 525.2669677734375, "learning_rate": 7.50788643533123e-07, "loss": 83.0625, "step": 1190 }, { "epoch": 0.011274031862629093, "grad_norm": 1470.11328125, "learning_rate": 7.514195583596214e-07, "loss": 62.875, "step": 1191 }, { "epoch": 0.011283497884344146, "grad_norm": 655.149169921875, "learning_rate": 7.520504731861198e-07, "loss": 88.0625, "step": 1192 }, { "epoch": 0.011292963906059201, "grad_norm": 701.13525390625, "learning_rate": 7.526813880126183e-07, "loss": 75.25, "step": 1193 }, { "epoch": 0.011302429927774254, "grad_norm": 634.4474487304688, "learning_rate": 7.533123028391167e-07, "loss": 63.9688, "step": 1194 }, { "epoch": 0.011311895949489309, "grad_norm": 1587.64111328125, "learning_rate": 7.539432176656151e-07, "loss": 74.2812, "step": 1195 }, { "epoch": 0.011321361971204362, "grad_norm": 776.825439453125, "learning_rate": 7.545741324921136e-07, "loss": 40.4688, "step": 1196 }, { "epoch": 0.011330827992919415, "grad_norm": 2.978459119796753, "learning_rate": 7.55205047318612e-07, "loss": 0.9155, "step": 1197 }, { "epoch": 0.01134029401463447, "grad_norm": 1323.5599365234375, "learning_rate": 7.558359621451103e-07, "loss": 106.2969, "step": 1198 }, { "epoch": 0.011349760036349523, "grad_norm": 997.7550048828125, "learning_rate": 7.564668769716088e-07, "loss": 63.9062, "step": 1199 }, { "epoch": 0.011359226058064577, "grad_norm": 925.4991455078125, "learning_rate": 7.570977917981072e-07, "loss": 88.2812, "step": 1200 }, { "epoch": 0.01136869207977963, "grad_norm": 714.1985473632812, "learning_rate": 7.577287066246056e-07, "loss": 46.2969, "step": 1201 }, { "epoch": 0.011378158101494685, "grad_norm": 908.1348266601562, "learning_rate": 7.583596214511041e-07, "loss": 64.4219, "step": 1202 }, { "epoch": 0.011387624123209738, "grad_norm": 503.49932861328125, "learning_rate": 7.589905362776025e-07, "loss": 48.9531, "step": 1203 }, { "epoch": 0.011397090144924793, "grad_norm": 611.954345703125, "learning_rate": 7.596214511041009e-07, "loss": 69.125, "step": 1204 }, { "epoch": 0.011406556166639846, "grad_norm": 471.5843505859375, "learning_rate": 7.602523659305994e-07, "loss": 45.8438, "step": 1205 }, { "epoch": 0.0114160221883549, "grad_norm": 890.114013671875, "learning_rate": 7.608832807570977e-07, "loss": 50.8438, "step": 1206 }, { "epoch": 0.011425488210069954, "grad_norm": 467.24603271484375, "learning_rate": 7.615141955835962e-07, "loss": 42.0, "step": 1207 }, { "epoch": 0.011434954231785008, "grad_norm": 3.9368038177490234, "learning_rate": 7.621451104100945e-07, "loss": 0.7314, "step": 1208 }, { "epoch": 0.011444420253500062, "grad_norm": 1478.703369140625, "learning_rate": 7.62776025236593e-07, "loss": 108.2812, "step": 1209 }, { "epoch": 0.011453886275215115, "grad_norm": 935.6073608398438, "learning_rate": 7.634069400630915e-07, "loss": 111.5781, "step": 1210 }, { "epoch": 0.01146335229693017, "grad_norm": 661.17041015625, "learning_rate": 7.640378548895898e-07, "loss": 54.5938, "step": 1211 }, { "epoch": 0.011472818318645222, "grad_norm": 535.427001953125, "learning_rate": 7.646687697160883e-07, "loss": 86.625, "step": 1212 }, { "epoch": 0.011482284340360277, "grad_norm": 519.889892578125, "learning_rate": 7.652996845425868e-07, "loss": 76.875, "step": 1213 }, { "epoch": 0.01149175036207533, "grad_norm": 917.4583740234375, "learning_rate": 7.659305993690851e-07, "loss": 86.3125, "step": 1214 }, { "epoch": 0.011501216383790385, "grad_norm": 595.05322265625, "learning_rate": 7.665615141955835e-07, "loss": 42.0938, "step": 1215 }, { "epoch": 0.011510682405505438, "grad_norm": 1580.8245849609375, "learning_rate": 7.67192429022082e-07, "loss": 88.6562, "step": 1216 }, { "epoch": 0.011520148427220493, "grad_norm": 655.0455322265625, "learning_rate": 7.678233438485804e-07, "loss": 64.0, "step": 1217 }, { "epoch": 0.011529614448935546, "grad_norm": 361.3646545410156, "learning_rate": 7.684542586750788e-07, "loss": 36.5625, "step": 1218 }, { "epoch": 0.0115390804706506, "grad_norm": 475.9730529785156, "learning_rate": 7.690851735015772e-07, "loss": 47.5781, "step": 1219 }, { "epoch": 0.011548546492365653, "grad_norm": 847.6423950195312, "learning_rate": 7.697160883280757e-07, "loss": 97.4062, "step": 1220 }, { "epoch": 0.011558012514080708, "grad_norm": 2.8572347164154053, "learning_rate": 7.703470031545741e-07, "loss": 0.8721, "step": 1221 }, { "epoch": 0.011567478535795761, "grad_norm": 628.3573608398438, "learning_rate": 7.709779179810725e-07, "loss": 46.9375, "step": 1222 }, { "epoch": 0.011576944557510816, "grad_norm": 706.1398315429688, "learning_rate": 7.71608832807571e-07, "loss": 61.8125, "step": 1223 }, { "epoch": 0.011586410579225869, "grad_norm": 695.4225463867188, "learning_rate": 7.722397476340695e-07, "loss": 105.375, "step": 1224 }, { "epoch": 0.011595876600940922, "grad_norm": 593.7330932617188, "learning_rate": 7.728706624605677e-07, "loss": 78.1562, "step": 1225 }, { "epoch": 0.011605342622655977, "grad_norm": 437.4502868652344, "learning_rate": 7.735015772870662e-07, "loss": 34.5156, "step": 1226 }, { "epoch": 0.01161480864437103, "grad_norm": 506.0677795410156, "learning_rate": 7.741324921135647e-07, "loss": 54.5, "step": 1227 }, { "epoch": 0.011624274666086084, "grad_norm": 2275.670166015625, "learning_rate": 7.74763406940063e-07, "loss": 95.8438, "step": 1228 }, { "epoch": 0.011633740687801137, "grad_norm": 1117.0675048828125, "learning_rate": 7.753943217665615e-07, "loss": 72.2188, "step": 1229 }, { "epoch": 0.011643206709516192, "grad_norm": 492.5444030761719, "learning_rate": 7.760252365930599e-07, "loss": 37.5938, "step": 1230 }, { "epoch": 0.011652672731231245, "grad_norm": 479.2723693847656, "learning_rate": 7.766561514195583e-07, "loss": 42.4531, "step": 1231 }, { "epoch": 0.0116621387529463, "grad_norm": 686.6377563476562, "learning_rate": 7.772870662460568e-07, "loss": 99.6875, "step": 1232 }, { "epoch": 0.011671604774661353, "grad_norm": 597.7495727539062, "learning_rate": 7.779179810725552e-07, "loss": 49.8125, "step": 1233 }, { "epoch": 0.011681070796376408, "grad_norm": 582.8693237304688, "learning_rate": 7.785488958990536e-07, "loss": 41.1875, "step": 1234 }, { "epoch": 0.01169053681809146, "grad_norm": 992.7817993164062, "learning_rate": 7.79179810725552e-07, "loss": 122.8125, "step": 1235 }, { "epoch": 0.011700002839806515, "grad_norm": 617.5075073242188, "learning_rate": 7.798107255520504e-07, "loss": 60.4688, "step": 1236 }, { "epoch": 0.011709468861521568, "grad_norm": 474.9856262207031, "learning_rate": 7.804416403785489e-07, "loss": 50.1719, "step": 1237 }, { "epoch": 0.011718934883236621, "grad_norm": 723.9612426757812, "learning_rate": 7.810725552050473e-07, "loss": 100.1875, "step": 1238 }, { "epoch": 0.011728400904951676, "grad_norm": 916.9580688476562, "learning_rate": 7.817034700315457e-07, "loss": 81.7812, "step": 1239 }, { "epoch": 0.011737866926666729, "grad_norm": 1040.59326171875, "learning_rate": 7.823343848580442e-07, "loss": 76.2344, "step": 1240 }, { "epoch": 0.011747332948381784, "grad_norm": 549.6194458007812, "learning_rate": 7.829652996845425e-07, "loss": 40.5312, "step": 1241 }, { "epoch": 0.011756798970096837, "grad_norm": 1106.5904541015625, "learning_rate": 7.83596214511041e-07, "loss": 42.125, "step": 1242 }, { "epoch": 0.011766264991811892, "grad_norm": 488.5059509277344, "learning_rate": 7.842271293375394e-07, "loss": 47.9062, "step": 1243 }, { "epoch": 0.011775731013526945, "grad_norm": 781.1760864257812, "learning_rate": 7.848580441640378e-07, "loss": 91.6562, "step": 1244 }, { "epoch": 0.011785197035242, "grad_norm": 523.8766479492188, "learning_rate": 7.854889589905362e-07, "loss": 54.9062, "step": 1245 }, { "epoch": 0.011794663056957052, "grad_norm": 1125.2108154296875, "learning_rate": 7.861198738170347e-07, "loss": 87.2188, "step": 1246 }, { "epoch": 0.011804129078672107, "grad_norm": 2.768118381500244, "learning_rate": 7.867507886435331e-07, "loss": 0.7827, "step": 1247 }, { "epoch": 0.01181359510038716, "grad_norm": 487.3508605957031, "learning_rate": 7.873817034700315e-07, "loss": 68.125, "step": 1248 }, { "epoch": 0.011823061122102215, "grad_norm": 490.568359375, "learning_rate": 7.8801261829653e-07, "loss": 38.125, "step": 1249 }, { "epoch": 0.011832527143817268, "grad_norm": 578.349365234375, "learning_rate": 7.886435331230284e-07, "loss": 49.2812, "step": 1250 }, { "epoch": 0.011841993165532323, "grad_norm": 605.2449340820312, "learning_rate": 7.892744479495268e-07, "loss": 89.5938, "step": 1251 }, { "epoch": 0.011851459187247376, "grad_norm": 977.90380859375, "learning_rate": 7.899053627760251e-07, "loss": 69.9844, "step": 1252 }, { "epoch": 0.011860925208962429, "grad_norm": 812.7798461914062, "learning_rate": 7.905362776025236e-07, "loss": 49.125, "step": 1253 }, { "epoch": 0.011870391230677483, "grad_norm": 558.04443359375, "learning_rate": 7.911671924290221e-07, "loss": 50.4375, "step": 1254 }, { "epoch": 0.011879857252392536, "grad_norm": 642.2891845703125, "learning_rate": 7.917981072555204e-07, "loss": 45.0625, "step": 1255 }, { "epoch": 0.011889323274107591, "grad_norm": 840.6593627929688, "learning_rate": 7.924290220820189e-07, "loss": 42.2188, "step": 1256 }, { "epoch": 0.011898789295822644, "grad_norm": 524.13427734375, "learning_rate": 7.930599369085174e-07, "loss": 46.8438, "step": 1257 }, { "epoch": 0.011908255317537699, "grad_norm": 942.5008544921875, "learning_rate": 7.936908517350157e-07, "loss": 49.25, "step": 1258 }, { "epoch": 0.011917721339252752, "grad_norm": 468.6698913574219, "learning_rate": 7.943217665615142e-07, "loss": 51.7812, "step": 1259 }, { "epoch": 0.011927187360967807, "grad_norm": 406.6846923828125, "learning_rate": 7.949526813880127e-07, "loss": 37.7031, "step": 1260 }, { "epoch": 0.01193665338268286, "grad_norm": 668.6524658203125, "learning_rate": 7.955835962145109e-07, "loss": 92.0, "step": 1261 }, { "epoch": 0.011946119404397914, "grad_norm": 502.8438415527344, "learning_rate": 7.962145110410094e-07, "loss": 90.5938, "step": 1262 }, { "epoch": 0.011955585426112967, "grad_norm": 669.2996215820312, "learning_rate": 7.968454258675078e-07, "loss": 48.2188, "step": 1263 }, { "epoch": 0.011965051447828022, "grad_norm": 1613.38330078125, "learning_rate": 7.974763406940063e-07, "loss": 92.8594, "step": 1264 }, { "epoch": 0.011974517469543075, "grad_norm": 358.9454040527344, "learning_rate": 7.981072555205047e-07, "loss": 40.5156, "step": 1265 }, { "epoch": 0.01198398349125813, "grad_norm": 888.8532104492188, "learning_rate": 7.987381703470031e-07, "loss": 49.625, "step": 1266 }, { "epoch": 0.011993449512973183, "grad_norm": 728.9982299804688, "learning_rate": 7.993690851735016e-07, "loss": 89.8906, "step": 1267 }, { "epoch": 0.012002915534688236, "grad_norm": 413.3031921386719, "learning_rate": 8e-07, "loss": 49.0469, "step": 1268 }, { "epoch": 0.01201238155640329, "grad_norm": 2.8767576217651367, "learning_rate": 8.006309148264984e-07, "loss": 0.7988, "step": 1269 }, { "epoch": 0.012021847578118344, "grad_norm": 708.7467651367188, "learning_rate": 8.012618296529968e-07, "loss": 85.375, "step": 1270 }, { "epoch": 0.012031313599833398, "grad_norm": 487.2876281738281, "learning_rate": 8.018927444794953e-07, "loss": 39.0781, "step": 1271 }, { "epoch": 0.012040779621548451, "grad_norm": 1376.2908935546875, "learning_rate": 8.025236593059936e-07, "loss": 81.5469, "step": 1272 }, { "epoch": 0.012050245643263506, "grad_norm": 429.0411682128906, "learning_rate": 8.031545741324921e-07, "loss": 38.6875, "step": 1273 }, { "epoch": 0.012059711664978559, "grad_norm": 897.2138061523438, "learning_rate": 8.037854889589905e-07, "loss": 45.9062, "step": 1274 }, { "epoch": 0.012069177686693614, "grad_norm": 506.9822692871094, "learning_rate": 8.044164037854889e-07, "loss": 51.5938, "step": 1275 }, { "epoch": 0.012078643708408667, "grad_norm": 1107.029296875, "learning_rate": 8.050473186119874e-07, "loss": 46.6719, "step": 1276 }, { "epoch": 0.012088109730123722, "grad_norm": 507.7869567871094, "learning_rate": 8.056782334384858e-07, "loss": 38.375, "step": 1277 }, { "epoch": 0.012097575751838775, "grad_norm": 460.1737976074219, "learning_rate": 8.063091482649842e-07, "loss": 38.375, "step": 1278 }, { "epoch": 0.01210704177355383, "grad_norm": 721.6969604492188, "learning_rate": 8.069400630914826e-07, "loss": 45.375, "step": 1279 }, { "epoch": 0.012116507795268882, "grad_norm": 656.0482788085938, "learning_rate": 8.07570977917981e-07, "loss": 80.4062, "step": 1280 }, { "epoch": 0.012125973816983935, "grad_norm": 603.2412109375, "learning_rate": 8.082018927444795e-07, "loss": 78.125, "step": 1281 }, { "epoch": 0.01213543983869899, "grad_norm": 719.6988525390625, "learning_rate": 8.088328075709778e-07, "loss": 43.0312, "step": 1282 }, { "epoch": 0.012144905860414043, "grad_norm": 660.5941162109375, "learning_rate": 8.094637223974763e-07, "loss": 42.8906, "step": 1283 }, { "epoch": 0.012154371882129098, "grad_norm": 505.85272216796875, "learning_rate": 8.100946372239748e-07, "loss": 36.7344, "step": 1284 }, { "epoch": 0.01216383790384415, "grad_norm": 783.5394287109375, "learning_rate": 8.107255520504731e-07, "loss": 80.5156, "step": 1285 }, { "epoch": 0.012173303925559206, "grad_norm": 330.84259033203125, "learning_rate": 8.113564668769716e-07, "loss": 38.0625, "step": 1286 }, { "epoch": 0.012182769947274259, "grad_norm": 688.4213256835938, "learning_rate": 8.119873817034701e-07, "loss": 47.6875, "step": 1287 }, { "epoch": 0.012192235968989313, "grad_norm": 428.7878112792969, "learning_rate": 8.126182965299683e-07, "loss": 38.2188, "step": 1288 }, { "epoch": 0.012201701990704366, "grad_norm": 389.74810791015625, "learning_rate": 8.132492113564668e-07, "loss": 43.1406, "step": 1289 }, { "epoch": 0.012211168012419421, "grad_norm": 693.2640380859375, "learning_rate": 8.138801261829653e-07, "loss": 53.3594, "step": 1290 }, { "epoch": 0.012220634034134474, "grad_norm": 742.7010498046875, "learning_rate": 8.145110410094637e-07, "loss": 63.6719, "step": 1291 }, { "epoch": 0.012230100055849529, "grad_norm": 472.6009521484375, "learning_rate": 8.151419558359621e-07, "loss": 48.9062, "step": 1292 }, { "epoch": 0.012239566077564582, "grad_norm": 420.6577453613281, "learning_rate": 8.157728706624605e-07, "loss": 45.3594, "step": 1293 }, { "epoch": 0.012249032099279637, "grad_norm": 2.9607489109039307, "learning_rate": 8.16403785488959e-07, "loss": 0.9351, "step": 1294 }, { "epoch": 0.01225849812099469, "grad_norm": 619.4861450195312, "learning_rate": 8.170347003154574e-07, "loss": 47.875, "step": 1295 }, { "epoch": 0.012267964142709743, "grad_norm": 450.12255859375, "learning_rate": 8.176656151419558e-07, "loss": 38.8906, "step": 1296 }, { "epoch": 0.012277430164424797, "grad_norm": 394.38055419921875, "learning_rate": 8.182965299684542e-07, "loss": 40.3125, "step": 1297 }, { "epoch": 0.01228689618613985, "grad_norm": 549.5364379882812, "learning_rate": 8.189274447949526e-07, "loss": 42.25, "step": 1298 }, { "epoch": 0.012296362207854905, "grad_norm": 674.5668334960938, "learning_rate": 8.19558359621451e-07, "loss": 75.0312, "step": 1299 }, { "epoch": 0.012305828229569958, "grad_norm": 1077.7392578125, "learning_rate": 8.201892744479495e-07, "loss": 79.1562, "step": 1300 }, { "epoch": 0.012315294251285013, "grad_norm": 1165.7916259765625, "learning_rate": 8.20820189274448e-07, "loss": 105.7188, "step": 1301 }, { "epoch": 0.012324760273000066, "grad_norm": 993.7110595703125, "learning_rate": 8.214511041009463e-07, "loss": 80.625, "step": 1302 }, { "epoch": 0.01233422629471512, "grad_norm": 1974.6302490234375, "learning_rate": 8.220820189274448e-07, "loss": 96.4688, "step": 1303 }, { "epoch": 0.012343692316430174, "grad_norm": 988.7732543945312, "learning_rate": 8.227129337539432e-07, "loss": 58.0781, "step": 1304 }, { "epoch": 0.012353158338145228, "grad_norm": 535.5286865234375, "learning_rate": 8.233438485804416e-07, "loss": 40.1406, "step": 1305 }, { "epoch": 0.012362624359860281, "grad_norm": 486.5096435546875, "learning_rate": 8.2397476340694e-07, "loss": 47.8594, "step": 1306 }, { "epoch": 0.012372090381575336, "grad_norm": 428.5718994140625, "learning_rate": 8.246056782334384e-07, "loss": 36.9844, "step": 1307 }, { "epoch": 0.012381556403290389, "grad_norm": 1061.1573486328125, "learning_rate": 8.252365930599368e-07, "loss": 101.9688, "step": 1308 }, { "epoch": 0.012391022425005442, "grad_norm": 472.0103759765625, "learning_rate": 8.258675078864353e-07, "loss": 35.1875, "step": 1309 }, { "epoch": 0.012400488446720497, "grad_norm": 1277.217041015625, "learning_rate": 8.264984227129337e-07, "loss": 70.3594, "step": 1310 }, { "epoch": 0.01240995446843555, "grad_norm": 368.2793884277344, "learning_rate": 8.271293375394322e-07, "loss": 39.6562, "step": 1311 }, { "epoch": 0.012419420490150605, "grad_norm": 463.2733459472656, "learning_rate": 8.277602523659306e-07, "loss": 46.7031, "step": 1312 }, { "epoch": 0.012428886511865658, "grad_norm": 2406.74365234375, "learning_rate": 8.28391167192429e-07, "loss": 47.9531, "step": 1313 }, { "epoch": 0.012438352533580712, "grad_norm": 2.376315116882324, "learning_rate": 8.290220820189275e-07, "loss": 0.8843, "step": 1314 }, { "epoch": 0.012447818555295765, "grad_norm": 415.8637390136719, "learning_rate": 8.296529968454257e-07, "loss": 38.0312, "step": 1315 }, { "epoch": 0.01245728457701082, "grad_norm": 1135.4820556640625, "learning_rate": 8.302839116719242e-07, "loss": 94.0312, "step": 1316 }, { "epoch": 0.012466750598725873, "grad_norm": 812.2321166992188, "learning_rate": 8.309148264984227e-07, "loss": 56.75, "step": 1317 }, { "epoch": 0.012476216620440928, "grad_norm": 678.1378173828125, "learning_rate": 8.31545741324921e-07, "loss": 43.1562, "step": 1318 }, { "epoch": 0.01248568264215598, "grad_norm": 1250.24462890625, "learning_rate": 8.321766561514195e-07, "loss": 54.25, "step": 1319 }, { "epoch": 0.012495148663871036, "grad_norm": 918.5830688476562, "learning_rate": 8.32807570977918e-07, "loss": 79.6406, "step": 1320 }, { "epoch": 0.012504614685586089, "grad_norm": 443.50384521484375, "learning_rate": 8.334384858044164e-07, "loss": 67.25, "step": 1321 }, { "epoch": 0.012514080707301143, "grad_norm": 466.285400390625, "learning_rate": 8.340694006309148e-07, "loss": 67.625, "step": 1322 }, { "epoch": 0.012523546729016196, "grad_norm": 1024.94775390625, "learning_rate": 8.347003154574133e-07, "loss": 96.7188, "step": 1323 }, { "epoch": 0.01253301275073125, "grad_norm": 481.89703369140625, "learning_rate": 8.353312302839116e-07, "loss": 39.4688, "step": 1324 }, { "epoch": 0.012542478772446304, "grad_norm": 1057.9022216796875, "learning_rate": 8.3596214511041e-07, "loss": 110.4375, "step": 1325 }, { "epoch": 0.012551944794161357, "grad_norm": 468.0632629394531, "learning_rate": 8.365930599369084e-07, "loss": 45.0, "step": 1326 }, { "epoch": 0.012561410815876412, "grad_norm": 2.9193124771118164, "learning_rate": 8.372239747634069e-07, "loss": 0.8169, "step": 1327 }, { "epoch": 0.012570876837591465, "grad_norm": 565.5068969726562, "learning_rate": 8.378548895899054e-07, "loss": 44.7812, "step": 1328 }, { "epoch": 0.01258034285930652, "grad_norm": 569.8988647460938, "learning_rate": 8.384858044164037e-07, "loss": 52.9219, "step": 1329 }, { "epoch": 0.012589808881021573, "grad_norm": 623.792724609375, "learning_rate": 8.391167192429022e-07, "loss": 46.1094, "step": 1330 }, { "epoch": 0.012599274902736627, "grad_norm": 1470.1363525390625, "learning_rate": 8.397476340694007e-07, "loss": 53.5156, "step": 1331 }, { "epoch": 0.01260874092445168, "grad_norm": 454.1531677246094, "learning_rate": 8.40378548895899e-07, "loss": 38.1562, "step": 1332 }, { "epoch": 0.012618206946166735, "grad_norm": 574.9443969726562, "learning_rate": 8.410094637223974e-07, "loss": 88.0312, "step": 1333 }, { "epoch": 0.012627672967881788, "grad_norm": 791.2825317382812, "learning_rate": 8.416403785488959e-07, "loss": 65.25, "step": 1334 }, { "epoch": 0.012637138989596843, "grad_norm": 478.2488098144531, "learning_rate": 8.422712933753942e-07, "loss": 38.7969, "step": 1335 }, { "epoch": 0.012646605011311896, "grad_norm": 1005.2443237304688, "learning_rate": 8.429022082018927e-07, "loss": 57.3125, "step": 1336 }, { "epoch": 0.01265607103302695, "grad_norm": 534.0778198242188, "learning_rate": 8.435331230283911e-07, "loss": 39.5625, "step": 1337 }, { "epoch": 0.012665537054742004, "grad_norm": 920.1619262695312, "learning_rate": 8.441640378548896e-07, "loss": 90.9375, "step": 1338 }, { "epoch": 0.012675003076457057, "grad_norm": 766.330322265625, "learning_rate": 8.44794952681388e-07, "loss": 42.0625, "step": 1339 }, { "epoch": 0.012684469098172111, "grad_norm": 478.56695556640625, "learning_rate": 8.454258675078864e-07, "loss": 38.6875, "step": 1340 }, { "epoch": 0.012693935119887164, "grad_norm": 671.4730834960938, "learning_rate": 8.460567823343849e-07, "loss": 53.9375, "step": 1341 }, { "epoch": 0.012703401141602219, "grad_norm": 524.099853515625, "learning_rate": 8.466876971608832e-07, "loss": 37.6875, "step": 1342 }, { "epoch": 0.012712867163317272, "grad_norm": 765.1116333007812, "learning_rate": 8.473186119873816e-07, "loss": 81.7188, "step": 1343 }, { "epoch": 0.012722333185032327, "grad_norm": 480.188720703125, "learning_rate": 8.479495268138801e-07, "loss": 51.4688, "step": 1344 }, { "epoch": 0.01273179920674738, "grad_norm": 532.4927978515625, "learning_rate": 8.485804416403785e-07, "loss": 38.0625, "step": 1345 }, { "epoch": 0.012741265228462435, "grad_norm": 2.4093666076660156, "learning_rate": 8.492113564668769e-07, "loss": 0.7751, "step": 1346 }, { "epoch": 0.012750731250177488, "grad_norm": 951.308349609375, "learning_rate": 8.498422712933754e-07, "loss": 73.5781, "step": 1347 }, { "epoch": 0.012760197271892542, "grad_norm": 530.938232421875, "learning_rate": 8.504731861198738e-07, "loss": 40.7344, "step": 1348 }, { "epoch": 0.012769663293607595, "grad_norm": 737.6542358398438, "learning_rate": 8.511041009463722e-07, "loss": 48.4062, "step": 1349 }, { "epoch": 0.01277912931532265, "grad_norm": 485.5086975097656, "learning_rate": 8.517350157728707e-07, "loss": 39.7344, "step": 1350 }, { "epoch": 0.012788595337037703, "grad_norm": 1785.971923828125, "learning_rate": 8.52365930599369e-07, "loss": 58.1094, "step": 1351 }, { "epoch": 0.012798061358752756, "grad_norm": 465.22564697265625, "learning_rate": 8.529968454258674e-07, "loss": 37.1875, "step": 1352 }, { "epoch": 0.012807527380467811, "grad_norm": 939.3936157226562, "learning_rate": 8.536277602523659e-07, "loss": 87.8438, "step": 1353 }, { "epoch": 0.012816993402182864, "grad_norm": 545.9471435546875, "learning_rate": 8.542586750788643e-07, "loss": 39.1875, "step": 1354 }, { "epoch": 0.012826459423897919, "grad_norm": 334.21453857421875, "learning_rate": 8.548895899053627e-07, "loss": 40.6875, "step": 1355 }, { "epoch": 0.012835925445612972, "grad_norm": 2.8116471767425537, "learning_rate": 8.555205047318612e-07, "loss": 0.8408, "step": 1356 }, { "epoch": 0.012845391467328026, "grad_norm": 507.2449035644531, "learning_rate": 8.561514195583596e-07, "loss": 40.8438, "step": 1357 }, { "epoch": 0.01285485748904308, "grad_norm": 2.6533560752868652, "learning_rate": 8.567823343848581e-07, "loss": 0.8555, "step": 1358 }, { "epoch": 0.012864323510758134, "grad_norm": 903.3565063476562, "learning_rate": 8.574132492113564e-07, "loss": 71.5938, "step": 1359 }, { "epoch": 0.012873789532473187, "grad_norm": 573.8442993164062, "learning_rate": 8.580441640378548e-07, "loss": 78.5312, "step": 1360 }, { "epoch": 0.012883255554188242, "grad_norm": 661.5220947265625, "learning_rate": 8.586750788643533e-07, "loss": 41.4375, "step": 1361 }, { "epoch": 0.012892721575903295, "grad_norm": 1836.662109375, "learning_rate": 8.593059936908516e-07, "loss": 35.9219, "step": 1362 }, { "epoch": 0.01290218759761835, "grad_norm": 547.4818725585938, "learning_rate": 8.599369085173501e-07, "loss": 46.7344, "step": 1363 }, { "epoch": 0.012911653619333403, "grad_norm": 782.3678588867188, "learning_rate": 8.605678233438486e-07, "loss": 72.3594, "step": 1364 }, { "epoch": 0.012921119641048457, "grad_norm": 649.7166137695312, "learning_rate": 8.61198738170347e-07, "loss": 51.3906, "step": 1365 }, { "epoch": 0.01293058566276351, "grad_norm": 1104.708740234375, "learning_rate": 8.618296529968454e-07, "loss": 120.9375, "step": 1366 }, { "epoch": 0.012940051684478563, "grad_norm": 456.02386474609375, "learning_rate": 8.624605678233438e-07, "loss": 41.2812, "step": 1367 }, { "epoch": 0.012949517706193618, "grad_norm": 400.4045715332031, "learning_rate": 8.630914826498423e-07, "loss": 52.9688, "step": 1368 }, { "epoch": 0.012958983727908671, "grad_norm": 888.3413696289062, "learning_rate": 8.637223974763406e-07, "loss": 42.6094, "step": 1369 }, { "epoch": 0.012968449749623726, "grad_norm": 652.0835571289062, "learning_rate": 8.64353312302839e-07, "loss": 49.9688, "step": 1370 }, { "epoch": 0.012977915771338779, "grad_norm": 1159.310791015625, "learning_rate": 8.649842271293375e-07, "loss": 91.0938, "step": 1371 }, { "epoch": 0.012987381793053834, "grad_norm": 745.7328491210938, "learning_rate": 8.656151419558359e-07, "loss": 86.375, "step": 1372 }, { "epoch": 0.012996847814768887, "grad_norm": 763.663330078125, "learning_rate": 8.662460567823343e-07, "loss": 72.75, "step": 1373 }, { "epoch": 0.013006313836483941, "grad_norm": 507.6583251953125, "learning_rate": 8.668769716088328e-07, "loss": 36.0, "step": 1374 }, { "epoch": 0.013015779858198994, "grad_norm": 623.54443359375, "learning_rate": 8.675078864353313e-07, "loss": 52.8125, "step": 1375 }, { "epoch": 0.01302524587991405, "grad_norm": 343.00640869140625, "learning_rate": 8.681388012618296e-07, "loss": 42.9688, "step": 1376 }, { "epoch": 0.013034711901629102, "grad_norm": 405.62847900390625, "learning_rate": 8.687697160883281e-07, "loss": 63.5156, "step": 1377 }, { "epoch": 0.013044177923344157, "grad_norm": 446.8899230957031, "learning_rate": 8.694006309148264e-07, "loss": 43.6562, "step": 1378 }, { "epoch": 0.01305364394505921, "grad_norm": 592.6976928710938, "learning_rate": 8.700315457413248e-07, "loss": 66.2188, "step": 1379 }, { "epoch": 0.013063109966774265, "grad_norm": 331.5686950683594, "learning_rate": 8.706624605678233e-07, "loss": 33.875, "step": 1380 }, { "epoch": 0.013072575988489318, "grad_norm": 1398.0421142578125, "learning_rate": 8.712933753943217e-07, "loss": 94.2812, "step": 1381 }, { "epoch": 0.01308204201020437, "grad_norm": 418.016845703125, "learning_rate": 8.719242902208201e-07, "loss": 44.0469, "step": 1382 }, { "epoch": 0.013091508031919425, "grad_norm": 487.48724365234375, "learning_rate": 8.725552050473186e-07, "loss": 51.8125, "step": 1383 }, { "epoch": 0.013100974053634478, "grad_norm": 461.1952209472656, "learning_rate": 8.73186119873817e-07, "loss": 41.7344, "step": 1384 }, { "epoch": 0.013110440075349533, "grad_norm": 445.71807861328125, "learning_rate": 8.738170347003155e-07, "loss": 60.125, "step": 1385 }, { "epoch": 0.013119906097064586, "grad_norm": 435.5230712890625, "learning_rate": 8.744479495268139e-07, "loss": 43.3438, "step": 1386 }, { "epoch": 0.013129372118779641, "grad_norm": 881.2249145507812, "learning_rate": 8.750788643533122e-07, "loss": 79.6562, "step": 1387 }, { "epoch": 0.013138838140494694, "grad_norm": 821.0104370117188, "learning_rate": 8.757097791798107e-07, "loss": 46.625, "step": 1388 }, { "epoch": 0.013148304162209749, "grad_norm": 1187.657470703125, "learning_rate": 8.76340694006309e-07, "loss": 96.6875, "step": 1389 }, { "epoch": 0.013157770183924802, "grad_norm": 1455.6048583984375, "learning_rate": 8.769716088328075e-07, "loss": 91.1562, "step": 1390 }, { "epoch": 0.013167236205639856, "grad_norm": 681.7944946289062, "learning_rate": 8.77602523659306e-07, "loss": 40.2031, "step": 1391 }, { "epoch": 0.01317670222735491, "grad_norm": 547.618408203125, "learning_rate": 8.782334384858043e-07, "loss": 62.9062, "step": 1392 }, { "epoch": 0.013186168249069964, "grad_norm": 713.3663330078125, "learning_rate": 8.788643533123028e-07, "loss": 51.4375, "step": 1393 }, { "epoch": 0.013195634270785017, "grad_norm": 1101.2574462890625, "learning_rate": 8.794952681388013e-07, "loss": 62.5, "step": 1394 }, { "epoch": 0.01320510029250007, "grad_norm": 499.2628479003906, "learning_rate": 8.801261829652997e-07, "loss": 36.9219, "step": 1395 }, { "epoch": 0.013214566314215125, "grad_norm": 3.1810498237609863, "learning_rate": 8.80757097791798e-07, "loss": 0.8477, "step": 1396 }, { "epoch": 0.013224032335930178, "grad_norm": 448.5975341796875, "learning_rate": 8.813880126182965e-07, "loss": 35.5781, "step": 1397 }, { "epoch": 0.013233498357645233, "grad_norm": 574.132080078125, "learning_rate": 8.820189274447949e-07, "loss": 44.9844, "step": 1398 }, { "epoch": 0.013242964379360286, "grad_norm": 781.0548095703125, "learning_rate": 8.826498422712933e-07, "loss": 80.1562, "step": 1399 }, { "epoch": 0.01325243040107534, "grad_norm": 608.9871215820312, "learning_rate": 8.832807570977917e-07, "loss": 45.6875, "step": 1400 }, { "epoch": 0.013261896422790393, "grad_norm": 449.8024597167969, "learning_rate": 8.839116719242902e-07, "loss": 39.2969, "step": 1401 }, { "epoch": 0.013271362444505448, "grad_norm": 713.97802734375, "learning_rate": 8.845425867507887e-07, "loss": 82.2812, "step": 1402 }, { "epoch": 0.013280828466220501, "grad_norm": 627.7367553710938, "learning_rate": 8.85173501577287e-07, "loss": 67.0312, "step": 1403 }, { "epoch": 0.013290294487935556, "grad_norm": 1545.5152587890625, "learning_rate": 8.858044164037855e-07, "loss": 115.0, "step": 1404 }, { "epoch": 0.013299760509650609, "grad_norm": 533.2507934570312, "learning_rate": 8.86435331230284e-07, "loss": 35.3125, "step": 1405 }, { "epoch": 0.013309226531365664, "grad_norm": 385.31866455078125, "learning_rate": 8.870662460567822e-07, "loss": 44.0625, "step": 1406 }, { "epoch": 0.013318692553080717, "grad_norm": 836.9967651367188, "learning_rate": 8.876971608832807e-07, "loss": 45.1562, "step": 1407 }, { "epoch": 0.013328158574795771, "grad_norm": 562.7202758789062, "learning_rate": 8.883280757097792e-07, "loss": 74.125, "step": 1408 }, { "epoch": 0.013337624596510824, "grad_norm": 710.3111572265625, "learning_rate": 8.889589905362775e-07, "loss": 39.375, "step": 1409 }, { "epoch": 0.013347090618225877, "grad_norm": 624.1787109375, "learning_rate": 8.89589905362776e-07, "loss": 70.6875, "step": 1410 }, { "epoch": 0.013356556639940932, "grad_norm": 835.3054809570312, "learning_rate": 8.902208201892744e-07, "loss": 71.9062, "step": 1411 }, { "epoch": 0.013366022661655985, "grad_norm": 358.1769714355469, "learning_rate": 8.908517350157729e-07, "loss": 33.5, "step": 1412 }, { "epoch": 0.01337548868337104, "grad_norm": 413.16943359375, "learning_rate": 8.914826498422713e-07, "loss": 37.1406, "step": 1413 }, { "epoch": 0.013384954705086093, "grad_norm": 564.9970703125, "learning_rate": 8.921135646687696e-07, "loss": 45.0312, "step": 1414 }, { "epoch": 0.013394420726801148, "grad_norm": 524.179931640625, "learning_rate": 8.927444794952681e-07, "loss": 39.0781, "step": 1415 }, { "epoch": 0.0134038867485162, "grad_norm": 803.7221069335938, "learning_rate": 8.933753943217665e-07, "loss": 101.0, "step": 1416 }, { "epoch": 0.013413352770231255, "grad_norm": 1163.7073974609375, "learning_rate": 8.940063091482649e-07, "loss": 75.0156, "step": 1417 }, { "epoch": 0.013422818791946308, "grad_norm": 498.9340515136719, "learning_rate": 8.946372239747634e-07, "loss": 43.2656, "step": 1418 }, { "epoch": 0.013432284813661363, "grad_norm": 653.8701171875, "learning_rate": 8.952681388012618e-07, "loss": 43.8594, "step": 1419 }, { "epoch": 0.013441750835376416, "grad_norm": 570.5690307617188, "learning_rate": 8.958990536277602e-07, "loss": 60.7969, "step": 1420 }, { "epoch": 0.013451216857091471, "grad_norm": 612.148193359375, "learning_rate": 8.965299684542587e-07, "loss": 75.75, "step": 1421 }, { "epoch": 0.013460682878806524, "grad_norm": 567.9755249023438, "learning_rate": 8.971608832807571e-07, "loss": 47.8438, "step": 1422 }, { "epoch": 0.013470148900521577, "grad_norm": 557.1666870117188, "learning_rate": 8.977917981072554e-07, "loss": 48.25, "step": 1423 }, { "epoch": 0.013479614922236632, "grad_norm": 657.5366821289062, "learning_rate": 8.984227129337539e-07, "loss": 82.375, "step": 1424 }, { "epoch": 0.013489080943951685, "grad_norm": 464.5755615234375, "learning_rate": 8.990536277602523e-07, "loss": 40.3594, "step": 1425 }, { "epoch": 0.01349854696566674, "grad_norm": 472.5301513671875, "learning_rate": 8.996845425867507e-07, "loss": 46.4219, "step": 1426 }, { "epoch": 0.013508012987381792, "grad_norm": 340.2803039550781, "learning_rate": 9.003154574132492e-07, "loss": 36.2031, "step": 1427 }, { "epoch": 0.013517479009096847, "grad_norm": 624.3343505859375, "learning_rate": 9.009463722397476e-07, "loss": 72.875, "step": 1428 }, { "epoch": 0.0135269450308119, "grad_norm": 516.1412353515625, "learning_rate": 9.01577287066246e-07, "loss": 48.8125, "step": 1429 }, { "epoch": 0.013536411052526955, "grad_norm": 1154.2579345703125, "learning_rate": 9.022082018927445e-07, "loss": 76.4688, "step": 1430 }, { "epoch": 0.013545877074242008, "grad_norm": 3.112196922302246, "learning_rate": 9.028391167192429e-07, "loss": 0.8594, "step": 1431 }, { "epoch": 0.013555343095957063, "grad_norm": 787.1168212890625, "learning_rate": 9.034700315457414e-07, "loss": 40.1406, "step": 1432 }, { "epoch": 0.013564809117672116, "grad_norm": 660.9312744140625, "learning_rate": 9.041009463722396e-07, "loss": 46.9062, "step": 1433 }, { "epoch": 0.01357427513938717, "grad_norm": 354.42498779296875, "learning_rate": 9.047318611987381e-07, "loss": 36.2656, "step": 1434 }, { "epoch": 0.013583741161102223, "grad_norm": 470.1373596191406, "learning_rate": 9.053627760252366e-07, "loss": 41.8594, "step": 1435 }, { "epoch": 0.013593207182817278, "grad_norm": 756.4203491210938, "learning_rate": 9.059936908517349e-07, "loss": 69.5938, "step": 1436 }, { "epoch": 0.013602673204532331, "grad_norm": 591.5595092773438, "learning_rate": 9.066246056782334e-07, "loss": 41.6875, "step": 1437 }, { "epoch": 0.013612139226247384, "grad_norm": 388.9606628417969, "learning_rate": 9.072555205047319e-07, "loss": 44.875, "step": 1438 }, { "epoch": 0.013621605247962439, "grad_norm": 634.19775390625, "learning_rate": 9.078864353312302e-07, "loss": 61.5625, "step": 1439 }, { "epoch": 0.013631071269677492, "grad_norm": 726.5520629882812, "learning_rate": 9.085173501577287e-07, "loss": 57.0938, "step": 1440 }, { "epoch": 0.013640537291392547, "grad_norm": 459.2733154296875, "learning_rate": 9.091482649842272e-07, "loss": 47.875, "step": 1441 }, { "epoch": 0.0136500033131076, "grad_norm": 417.86383056640625, "learning_rate": 9.097791798107255e-07, "loss": 65.375, "step": 1442 }, { "epoch": 0.013659469334822654, "grad_norm": 492.90875244140625, "learning_rate": 9.104100946372239e-07, "loss": 42.0938, "step": 1443 }, { "epoch": 0.013668935356537707, "grad_norm": 1055.4324951171875, "learning_rate": 9.110410094637223e-07, "loss": 83.7656, "step": 1444 }, { "epoch": 0.013678401378252762, "grad_norm": 855.2055053710938, "learning_rate": 9.116719242902208e-07, "loss": 84.375, "step": 1445 }, { "epoch": 0.013687867399967815, "grad_norm": 3.1018226146698, "learning_rate": 9.123028391167192e-07, "loss": 0.9126, "step": 1446 }, { "epoch": 0.01369733342168287, "grad_norm": 430.2931823730469, "learning_rate": 9.129337539432176e-07, "loss": 30.0312, "step": 1447 }, { "epoch": 0.013706799443397923, "grad_norm": 518.375244140625, "learning_rate": 9.135646687697161e-07, "loss": 37.625, "step": 1448 }, { "epoch": 0.013716265465112978, "grad_norm": 650.762939453125, "learning_rate": 9.141955835962146e-07, "loss": 41.125, "step": 1449 }, { "epoch": 0.01372573148682803, "grad_norm": 745.5169677734375, "learning_rate": 9.148264984227128e-07, "loss": 94.7188, "step": 1450 }, { "epoch": 0.013735197508543085, "grad_norm": 559.1017456054688, "learning_rate": 9.154574132492113e-07, "loss": 42.7812, "step": 1451 }, { "epoch": 0.013744663530258138, "grad_norm": 344.24554443359375, "learning_rate": 9.160883280757098e-07, "loss": 45.3125, "step": 1452 }, { "epoch": 0.013754129551973191, "grad_norm": 2.658862829208374, "learning_rate": 9.167192429022081e-07, "loss": 0.8394, "step": 1453 }, { "epoch": 0.013763595573688246, "grad_norm": 519.7094116210938, "learning_rate": 9.173501577287066e-07, "loss": 47.1875, "step": 1454 }, { "epoch": 0.0137730615954033, "grad_norm": 1831.1029052734375, "learning_rate": 9.17981072555205e-07, "loss": 74.7031, "step": 1455 }, { "epoch": 0.013782527617118354, "grad_norm": 3.121443033218384, "learning_rate": 9.186119873817034e-07, "loss": 0.8384, "step": 1456 }, { "epoch": 0.013791993638833407, "grad_norm": 353.1127014160156, "learning_rate": 9.192429022082019e-07, "loss": 33.5156, "step": 1457 }, { "epoch": 0.013801459660548462, "grad_norm": 381.7391052246094, "learning_rate": 9.198738170347003e-07, "loss": 38.6094, "step": 1458 }, { "epoch": 0.013810925682263515, "grad_norm": 560.4081420898438, "learning_rate": 9.205047318611988e-07, "loss": 37.5938, "step": 1459 }, { "epoch": 0.01382039170397857, "grad_norm": 683.053466796875, "learning_rate": 9.211356466876971e-07, "loss": 78.8906, "step": 1460 }, { "epoch": 0.013829857725693622, "grad_norm": 972.4369506835938, "learning_rate": 9.217665615141955e-07, "loss": 45.5156, "step": 1461 }, { "epoch": 0.013839323747408677, "grad_norm": 648.32421875, "learning_rate": 9.22397476340694e-07, "loss": 72.4219, "step": 1462 }, { "epoch": 0.01384878976912373, "grad_norm": 525.7362670898438, "learning_rate": 9.230283911671923e-07, "loss": 41.3906, "step": 1463 }, { "epoch": 0.013858255790838785, "grad_norm": 914.3699951171875, "learning_rate": 9.236593059936908e-07, "loss": 85.875, "step": 1464 }, { "epoch": 0.013867721812553838, "grad_norm": 1168.3109130859375, "learning_rate": 9.242902208201893e-07, "loss": 58.5469, "step": 1465 }, { "epoch": 0.013877187834268891, "grad_norm": 2.8909316062927246, "learning_rate": 9.249211356466876e-07, "loss": 0.8804, "step": 1466 }, { "epoch": 0.013886653855983946, "grad_norm": 588.3839111328125, "learning_rate": 9.255520504731861e-07, "loss": 43.5938, "step": 1467 }, { "epoch": 0.013896119877698999, "grad_norm": 547.1492309570312, "learning_rate": 9.261829652996846e-07, "loss": 42.9219, "step": 1468 }, { "epoch": 0.013905585899414053, "grad_norm": 629.670166015625, "learning_rate": 9.268138801261829e-07, "loss": 44.375, "step": 1469 }, { "epoch": 0.013915051921129106, "grad_norm": 1000.9705810546875, "learning_rate": 9.274447949526813e-07, "loss": 94.6875, "step": 1470 }, { "epoch": 0.013924517942844161, "grad_norm": 453.7991638183594, "learning_rate": 9.280757097791798e-07, "loss": 37.1719, "step": 1471 }, { "epoch": 0.013933983964559214, "grad_norm": 431.3731994628906, "learning_rate": 9.287066246056782e-07, "loss": 42.3906, "step": 1472 }, { "epoch": 0.013943449986274269, "grad_norm": 727.1135864257812, "learning_rate": 9.293375394321766e-07, "loss": 69.4375, "step": 1473 }, { "epoch": 0.013952916007989322, "grad_norm": 3.21260142326355, "learning_rate": 9.29968454258675e-07, "loss": 0.8521, "step": 1474 }, { "epoch": 0.013962382029704377, "grad_norm": 534.3606567382812, "learning_rate": 9.305993690851735e-07, "loss": 37.9219, "step": 1475 }, { "epoch": 0.01397184805141943, "grad_norm": 481.35626220703125, "learning_rate": 9.312302839116719e-07, "loss": 43.0625, "step": 1476 }, { "epoch": 0.013981314073134484, "grad_norm": 506.96563720703125, "learning_rate": 9.318611987381702e-07, "loss": 49.2812, "step": 1477 }, { "epoch": 0.013990780094849537, "grad_norm": 577.5778198242188, "learning_rate": 9.324921135646687e-07, "loss": 41.1094, "step": 1478 }, { "epoch": 0.014000246116564592, "grad_norm": 601.3617553710938, "learning_rate": 9.331230283911672e-07, "loss": 62.9844, "step": 1479 }, { "epoch": 0.014009712138279645, "grad_norm": 509.481689453125, "learning_rate": 9.337539432176655e-07, "loss": 40.7344, "step": 1480 }, { "epoch": 0.014019178159994698, "grad_norm": 864.44677734375, "learning_rate": 9.34384858044164e-07, "loss": 68.1875, "step": 1481 }, { "epoch": 0.014028644181709753, "grad_norm": 512.6714477539062, "learning_rate": 9.350157728706625e-07, "loss": 38.8281, "step": 1482 }, { "epoch": 0.014038110203424806, "grad_norm": 391.63494873046875, "learning_rate": 9.356466876971608e-07, "loss": 35.7188, "step": 1483 }, { "epoch": 0.01404757622513986, "grad_norm": 416.9952697753906, "learning_rate": 9.362776025236593e-07, "loss": 32.8438, "step": 1484 }, { "epoch": 0.014057042246854914, "grad_norm": 564.627197265625, "learning_rate": 9.369085173501577e-07, "loss": 44.1719, "step": 1485 }, { "epoch": 0.014066508268569968, "grad_norm": 543.583984375, "learning_rate": 9.37539432176656e-07, "loss": 48.7344, "step": 1486 }, { "epoch": 0.014075974290285021, "grad_norm": 705.085205078125, "learning_rate": 9.381703470031545e-07, "loss": 85.9062, "step": 1487 }, { "epoch": 0.014085440312000076, "grad_norm": 1149.811767578125, "learning_rate": 9.388012618296529e-07, "loss": 84.375, "step": 1488 }, { "epoch": 0.01409490633371513, "grad_norm": 453.5669860839844, "learning_rate": 9.394321766561514e-07, "loss": 36.5312, "step": 1489 }, { "epoch": 0.014104372355430184, "grad_norm": 690.2760620117188, "learning_rate": 9.400630914826498e-07, "loss": 76.9688, "step": 1490 }, { "epoch": 0.014113838377145237, "grad_norm": 397.5094299316406, "learning_rate": 9.406940063091482e-07, "loss": 44.4375, "step": 1491 }, { "epoch": 0.014123304398860292, "grad_norm": 387.2882385253906, "learning_rate": 9.413249211356467e-07, "loss": 36.2812, "step": 1492 }, { "epoch": 0.014132770420575345, "grad_norm": 3.019580125808716, "learning_rate": 9.419558359621451e-07, "loss": 0.9253, "step": 1493 }, { "epoch": 0.014142236442290398, "grad_norm": 478.53741455078125, "learning_rate": 9.425867507886435e-07, "loss": 41.4531, "step": 1494 }, { "epoch": 0.014151702464005452, "grad_norm": 837.216064453125, "learning_rate": 9.43217665615142e-07, "loss": 96.7812, "step": 1495 }, { "epoch": 0.014161168485720506, "grad_norm": 785.90673828125, "learning_rate": 9.438485804416402e-07, "loss": 41.0312, "step": 1496 }, { "epoch": 0.01417063450743556, "grad_norm": 1076.6455078125, "learning_rate": 9.444794952681387e-07, "loss": 124.0938, "step": 1497 }, { "epoch": 0.014180100529150613, "grad_norm": 388.8402099609375, "learning_rate": 9.451104100946372e-07, "loss": 44.8125, "step": 1498 }, { "epoch": 0.014189566550865668, "grad_norm": 439.56536865234375, "learning_rate": 9.457413249211356e-07, "loss": 36.5469, "step": 1499 }, { "epoch": 0.014199032572580721, "grad_norm": 673.4337158203125, "learning_rate": 9.46372239747634e-07, "loss": 38.7344, "step": 1500 }, { "epoch": 0.014208498594295776, "grad_norm": 779.1889038085938, "learning_rate": 9.470031545741325e-07, "loss": 57.5547, "step": 1501 }, { "epoch": 0.014217964616010829, "grad_norm": 872.2674560546875, "learning_rate": 9.476340694006309e-07, "loss": 90.4531, "step": 1502 }, { "epoch": 0.014227430637725883, "grad_norm": 467.3123474121094, "learning_rate": 9.482649842271293e-07, "loss": 41.625, "step": 1503 }, { "epoch": 0.014236896659440937, "grad_norm": 557.76806640625, "learning_rate": 9.488958990536278e-07, "loss": 47.3594, "step": 1504 }, { "epoch": 0.014246362681155991, "grad_norm": 444.07647705078125, "learning_rate": 9.495268138801261e-07, "loss": 41.9531, "step": 1505 }, { "epoch": 0.014255828702871044, "grad_norm": 643.6551513671875, "learning_rate": 9.501577287066246e-07, "loss": 46.0625, "step": 1506 }, { "epoch": 0.014265294724586099, "grad_norm": 290.8764953613281, "learning_rate": 9.507886435331229e-07, "loss": 35.0, "step": 1507 }, { "epoch": 0.014274760746301152, "grad_norm": 2.6246261596679688, "learning_rate": 9.514195583596214e-07, "loss": 0.8828, "step": 1508 }, { "epoch": 0.014284226768016205, "grad_norm": 591.4506225585938, "learning_rate": 9.520504731861199e-07, "loss": 75.7812, "step": 1509 }, { "epoch": 0.01429369278973126, "grad_norm": 583.549072265625, "learning_rate": 9.526813880126182e-07, "loss": 42.3125, "step": 1510 }, { "epoch": 0.014303158811446313, "grad_norm": 834.4395141601562, "learning_rate": 9.533123028391167e-07, "loss": 58.5469, "step": 1511 }, { "epoch": 0.014312624833161368, "grad_norm": 977.6520385742188, "learning_rate": 9.53943217665615e-07, "loss": 70.8906, "step": 1512 }, { "epoch": 0.01432209085487642, "grad_norm": 3.245422124862671, "learning_rate": 9.545741324921136e-07, "loss": 0.8335, "step": 1513 }, { "epoch": 0.014331556876591475, "grad_norm": 535.1561279296875, "learning_rate": 9.55205047318612e-07, "loss": 39.4844, "step": 1514 }, { "epoch": 0.014341022898306528, "grad_norm": 315.49462890625, "learning_rate": 9.558359621451103e-07, "loss": 33.6875, "step": 1515 }, { "epoch": 0.014350488920021583, "grad_norm": 376.9138488769531, "learning_rate": 9.564668769716088e-07, "loss": 33.1562, "step": 1516 }, { "epoch": 0.014359954941736636, "grad_norm": 625.4567260742188, "learning_rate": 9.570977917981073e-07, "loss": 95.9062, "step": 1517 }, { "epoch": 0.01436942096345169, "grad_norm": 810.0696411132812, "learning_rate": 9.577287066246056e-07, "loss": 60.6406, "step": 1518 }, { "epoch": 0.014378886985166744, "grad_norm": 1323.385009765625, "learning_rate": 9.58359621451104e-07, "loss": 36.625, "step": 1519 }, { "epoch": 0.014388353006881799, "grad_norm": 624.9921264648438, "learning_rate": 9.589905362776026e-07, "loss": 41.1562, "step": 1520 }, { "epoch": 0.014397819028596852, "grad_norm": 348.5631408691406, "learning_rate": 9.596214511041009e-07, "loss": 32.3438, "step": 1521 }, { "epoch": 0.014407285050311906, "grad_norm": 488.4847412109375, "learning_rate": 9.602523659305994e-07, "loss": 37.3906, "step": 1522 }, { "epoch": 0.01441675107202696, "grad_norm": 1286.49072265625, "learning_rate": 9.608832807570978e-07, "loss": 86.4375, "step": 1523 }, { "epoch": 0.014426217093742012, "grad_norm": 645.9542846679688, "learning_rate": 9.615141955835961e-07, "loss": 55.0469, "step": 1524 }, { "epoch": 0.014435683115457067, "grad_norm": 777.2451171875, "learning_rate": 9.621451104100946e-07, "loss": 85.5938, "step": 1525 }, { "epoch": 0.01444514913717212, "grad_norm": 1294.2974853515625, "learning_rate": 9.627760252365931e-07, "loss": 97.1875, "step": 1526 }, { "epoch": 0.014454615158887175, "grad_norm": 527.7368774414062, "learning_rate": 9.634069400630914e-07, "loss": 64.7812, "step": 1527 }, { "epoch": 0.014464081180602228, "grad_norm": 355.0567321777344, "learning_rate": 9.640378548895899e-07, "loss": 35.1562, "step": 1528 }, { "epoch": 0.014473547202317283, "grad_norm": 548.6400756835938, "learning_rate": 9.646687697160882e-07, "loss": 43.3125, "step": 1529 }, { "epoch": 0.014483013224032336, "grad_norm": 515.7227172851562, "learning_rate": 9.652996845425867e-07, "loss": 36.5469, "step": 1530 }, { "epoch": 0.01449247924574739, "grad_norm": 506.5542297363281, "learning_rate": 9.659305993690852e-07, "loss": 33.4375, "step": 1531 }, { "epoch": 0.014501945267462443, "grad_norm": 533.2708129882812, "learning_rate": 9.665615141955834e-07, "loss": 53.5469, "step": 1532 }, { "epoch": 0.014511411289177498, "grad_norm": 808.70263671875, "learning_rate": 9.67192429022082e-07, "loss": 76.2031, "step": 1533 }, { "epoch": 0.014520877310892551, "grad_norm": 420.9373474121094, "learning_rate": 9.678233438485804e-07, "loss": 63.375, "step": 1534 }, { "epoch": 0.014530343332607606, "grad_norm": 330.6839904785156, "learning_rate": 9.684542586750787e-07, "loss": 32.4531, "step": 1535 }, { "epoch": 0.014539809354322659, "grad_norm": 456.32733154296875, "learning_rate": 9.690851735015772e-07, "loss": 39.4688, "step": 1536 }, { "epoch": 0.014549275376037712, "grad_norm": 495.7166748046875, "learning_rate": 9.697160883280757e-07, "loss": 54.875, "step": 1537 }, { "epoch": 0.014558741397752767, "grad_norm": 504.2703552246094, "learning_rate": 9.70347003154574e-07, "loss": 65.3438, "step": 1538 }, { "epoch": 0.01456820741946782, "grad_norm": 3.2078359127044678, "learning_rate": 9.709779179810725e-07, "loss": 0.936, "step": 1539 }, { "epoch": 0.014577673441182874, "grad_norm": 836.077880859375, "learning_rate": 9.71608832807571e-07, "loss": 79.0938, "step": 1540 }, { "epoch": 0.014587139462897927, "grad_norm": 338.6009216308594, "learning_rate": 9.722397476340694e-07, "loss": 44.0312, "step": 1541 }, { "epoch": 0.014596605484612982, "grad_norm": 874.1458740234375, "learning_rate": 9.728706624605677e-07, "loss": 34.7656, "step": 1542 }, { "epoch": 0.014606071506328035, "grad_norm": 495.95928955078125, "learning_rate": 9.735015772870662e-07, "loss": 37.3906, "step": 1543 }, { "epoch": 0.01461553752804309, "grad_norm": 472.87640380859375, "learning_rate": 9.741324921135647e-07, "loss": 35.3594, "step": 1544 }, { "epoch": 0.014625003549758143, "grad_norm": 3.1560075283050537, "learning_rate": 9.74763406940063e-07, "loss": 1.0498, "step": 1545 }, { "epoch": 0.014634469571473198, "grad_norm": 458.1545104980469, "learning_rate": 9.753943217665615e-07, "loss": 45.7656, "step": 1546 }, { "epoch": 0.01464393559318825, "grad_norm": 1171.685546875, "learning_rate": 9.7602523659306e-07, "loss": 96.75, "step": 1547 }, { "epoch": 0.014653401614903305, "grad_norm": 429.1284484863281, "learning_rate": 9.766561514195583e-07, "loss": 40.0781, "step": 1548 }, { "epoch": 0.014662867636618358, "grad_norm": 359.6435546875, "learning_rate": 9.772870662460568e-07, "loss": 39.3125, "step": 1549 }, { "epoch": 0.014672333658333413, "grad_norm": 2.8279566764831543, "learning_rate": 9.779179810725552e-07, "loss": 0.9326, "step": 1550 }, { "epoch": 0.014681799680048466, "grad_norm": 490.55816650390625, "learning_rate": 9.785488958990535e-07, "loss": 53.9844, "step": 1551 }, { "epoch": 0.014691265701763519, "grad_norm": 823.115478515625, "learning_rate": 9.79179810725552e-07, "loss": 54.6719, "step": 1552 }, { "epoch": 0.014700731723478574, "grad_norm": 512.02880859375, "learning_rate": 9.798107255520505e-07, "loss": 32.25, "step": 1553 }, { "epoch": 0.014710197745193627, "grad_norm": 462.1593322753906, "learning_rate": 9.804416403785488e-07, "loss": 48.5156, "step": 1554 }, { "epoch": 0.014719663766908682, "grad_norm": 1080.4844970703125, "learning_rate": 9.810725552050473e-07, "loss": 60.9531, "step": 1555 }, { "epoch": 0.014729129788623735, "grad_norm": 522.9070434570312, "learning_rate": 9.817034700315458e-07, "loss": 88.2188, "step": 1556 }, { "epoch": 0.01473859581033879, "grad_norm": 498.96173095703125, "learning_rate": 9.82334384858044e-07, "loss": 37.4219, "step": 1557 }, { "epoch": 0.014748061832053842, "grad_norm": 506.362548828125, "learning_rate": 9.829652996845426e-07, "loss": 38.75, "step": 1558 }, { "epoch": 0.014757527853768897, "grad_norm": 258.36773681640625, "learning_rate": 9.835962145110408e-07, "loss": 31.8438, "step": 1559 }, { "epoch": 0.01476699387548395, "grad_norm": 1216.8369140625, "learning_rate": 9.842271293375393e-07, "loss": 56.375, "step": 1560 }, { "epoch": 0.014776459897199005, "grad_norm": 682.7588500976562, "learning_rate": 9.848580441640378e-07, "loss": 60.5, "step": 1561 }, { "epoch": 0.014785925918914058, "grad_norm": 335.8126220703125, "learning_rate": 9.85488958990536e-07, "loss": 46.0625, "step": 1562 }, { "epoch": 0.014795391940629113, "grad_norm": 440.4599914550781, "learning_rate": 9.861198738170346e-07, "loss": 37.6719, "step": 1563 }, { "epoch": 0.014804857962344166, "grad_norm": 540.6492309570312, "learning_rate": 9.86750788643533e-07, "loss": 42.2656, "step": 1564 }, { "epoch": 0.01481432398405922, "grad_norm": 979.8394775390625, "learning_rate": 9.873817034700314e-07, "loss": 76.375, "step": 1565 }, { "epoch": 0.014823790005774273, "grad_norm": 1317.697021484375, "learning_rate": 9.880126182965299e-07, "loss": 84.0625, "step": 1566 }, { "epoch": 0.014833256027489326, "grad_norm": 334.5838317871094, "learning_rate": 9.886435331230284e-07, "loss": 36.2969, "step": 1567 }, { "epoch": 0.014842722049204381, "grad_norm": 2.8009111881256104, "learning_rate": 9.892744479495268e-07, "loss": 0.813, "step": 1568 }, { "epoch": 0.014852188070919434, "grad_norm": 3.096045970916748, "learning_rate": 9.899053627760251e-07, "loss": 0.939, "step": 1569 }, { "epoch": 0.014861654092634489, "grad_norm": 2.948606491088867, "learning_rate": 9.905362776025236e-07, "loss": 0.9609, "step": 1570 }, { "epoch": 0.014871120114349542, "grad_norm": 512.8611450195312, "learning_rate": 9.911671924290221e-07, "loss": 36.4062, "step": 1571 }, { "epoch": 0.014880586136064597, "grad_norm": 383.573974609375, "learning_rate": 9.917981072555204e-07, "loss": 34.0, "step": 1572 }, { "epoch": 0.01489005215777965, "grad_norm": 3.187349557876587, "learning_rate": 9.924290220820189e-07, "loss": 0.8804, "step": 1573 }, { "epoch": 0.014899518179494704, "grad_norm": 1316.876953125, "learning_rate": 9.930599369085174e-07, "loss": 69.4531, "step": 1574 }, { "epoch": 0.014908984201209757, "grad_norm": 944.4759521484375, "learning_rate": 9.936908517350157e-07, "loss": 40.875, "step": 1575 }, { "epoch": 0.014918450222924812, "grad_norm": 332.7432861328125, "learning_rate": 9.943217665615142e-07, "loss": 33.6875, "step": 1576 }, { "epoch": 0.014927916244639865, "grad_norm": 703.055419921875, "learning_rate": 9.949526813880126e-07, "loss": 74.7188, "step": 1577 }, { "epoch": 0.01493738226635492, "grad_norm": 1165.20947265625, "learning_rate": 9.955835962145111e-07, "loss": 70.4062, "step": 1578 }, { "epoch": 0.014946848288069973, "grad_norm": 950.879150390625, "learning_rate": 9.962145110410094e-07, "loss": 91.0312, "step": 1579 }, { "epoch": 0.014956314309785026, "grad_norm": 1153.678466796875, "learning_rate": 9.96845425867508e-07, "loss": 71.6094, "step": 1580 }, { "epoch": 0.01496578033150008, "grad_norm": 722.4657592773438, "learning_rate": 9.974763406940062e-07, "loss": 72.7344, "step": 1581 }, { "epoch": 0.014975246353215134, "grad_norm": 718.4335327148438, "learning_rate": 9.981072555205047e-07, "loss": 101.6875, "step": 1582 }, { "epoch": 0.014984712374930188, "grad_norm": 2.916945219039917, "learning_rate": 9.987381703470032e-07, "loss": 0.9512, "step": 1583 }, { "epoch": 0.014994178396645241, "grad_norm": 746.156982421875, "learning_rate": 9.993690851735015e-07, "loss": 49.2188, "step": 1584 }, { "epoch": 0.015003644418360296, "grad_norm": 585.630859375, "learning_rate": 1e-06, "loss": 40.2031, "step": 1585 }, { "epoch": 0.015013110440075349, "grad_norm": 985.6940307617188, "learning_rate": 1.0006309148264984e-06, "loss": 72.7188, "step": 1586 }, { "epoch": 0.015022576461790404, "grad_norm": 938.5748291015625, "learning_rate": 1.001261829652997e-06, "loss": 52.5391, "step": 1587 }, { "epoch": 0.015032042483505457, "grad_norm": 565.2317504882812, "learning_rate": 1.0018927444794952e-06, "loss": 44.6562, "step": 1588 }, { "epoch": 0.015041508505220512, "grad_norm": 1024.2633056640625, "learning_rate": 1.0025236593059935e-06, "loss": 85.125, "step": 1589 }, { "epoch": 0.015050974526935565, "grad_norm": 458.9192810058594, "learning_rate": 1.003154574132492e-06, "loss": 38.7188, "step": 1590 }, { "epoch": 0.01506044054865062, "grad_norm": 890.6364135742188, "learning_rate": 1.0037854889589905e-06, "loss": 77.5, "step": 1591 }, { "epoch": 0.015069906570365672, "grad_norm": 1131.025390625, "learning_rate": 1.004416403785489e-06, "loss": 75.6562, "step": 1592 }, { "epoch": 0.015079372592080727, "grad_norm": 486.6618957519531, "learning_rate": 1.0050473186119875e-06, "loss": 41.5312, "step": 1593 }, { "epoch": 0.01508883861379578, "grad_norm": 851.5337524414062, "learning_rate": 1.0056782334384858e-06, "loss": 60.7656, "step": 1594 }, { "epoch": 0.015098304635510833, "grad_norm": 561.5545043945312, "learning_rate": 1.0063091482649842e-06, "loss": 38.4375, "step": 1595 }, { "epoch": 0.015107770657225888, "grad_norm": 614.7553100585938, "learning_rate": 1.0069400630914825e-06, "loss": 40.9844, "step": 1596 }, { "epoch": 0.01511723667894094, "grad_norm": 1123.0914306640625, "learning_rate": 1.007570977917981e-06, "loss": 100.125, "step": 1597 }, { "epoch": 0.015126702700655996, "grad_norm": 639.1223754882812, "learning_rate": 1.0082018927444795e-06, "loss": 33.25, "step": 1598 }, { "epoch": 0.015136168722371049, "grad_norm": 540.2785034179688, "learning_rate": 1.0088328075709778e-06, "loss": 40.3438, "step": 1599 }, { "epoch": 0.015145634744086103, "grad_norm": 530.3887329101562, "learning_rate": 1.0094637223974763e-06, "loss": 80.0156, "step": 1600 }, { "epoch": 0.015155100765801156, "grad_norm": 619.4019165039062, "learning_rate": 1.0100946372239748e-06, "loss": 39.0781, "step": 1601 }, { "epoch": 0.015164566787516211, "grad_norm": 2.5680928230285645, "learning_rate": 1.010725552050473e-06, "loss": 0.7715, "step": 1602 }, { "epoch": 0.015174032809231264, "grad_norm": 647.6741943359375, "learning_rate": 1.0113564668769716e-06, "loss": 54.4531, "step": 1603 }, { "epoch": 0.015183498830946319, "grad_norm": 500.75054931640625, "learning_rate": 1.01198738170347e-06, "loss": 37.1719, "step": 1604 }, { "epoch": 0.015192964852661372, "grad_norm": 609.9459838867188, "learning_rate": 1.0126182965299683e-06, "loss": 41.7188, "step": 1605 }, { "epoch": 0.015202430874376427, "grad_norm": 757.8089599609375, "learning_rate": 1.0132492113564668e-06, "loss": 33.6875, "step": 1606 }, { "epoch": 0.01521189689609148, "grad_norm": 495.50653076171875, "learning_rate": 1.0138801261829653e-06, "loss": 63.8125, "step": 1607 }, { "epoch": 0.015221362917806533, "grad_norm": 384.1427307128906, "learning_rate": 1.0145110410094638e-06, "loss": 39.0781, "step": 1608 }, { "epoch": 0.015230828939521587, "grad_norm": 392.07281494140625, "learning_rate": 1.015141955835962e-06, "loss": 33.9219, "step": 1609 }, { "epoch": 0.01524029496123664, "grad_norm": 606.2182006835938, "learning_rate": 1.0157728706624604e-06, "loss": 32.2188, "step": 1610 }, { "epoch": 0.015249760982951695, "grad_norm": 359.3892822265625, "learning_rate": 1.0164037854889589e-06, "loss": 37.5156, "step": 1611 }, { "epoch": 0.015259227004666748, "grad_norm": 322.7551574707031, "learning_rate": 1.0170347003154574e-06, "loss": 38.7031, "step": 1612 }, { "epoch": 0.015268693026381803, "grad_norm": 710.5620727539062, "learning_rate": 1.0176656151419558e-06, "loss": 94.2188, "step": 1613 }, { "epoch": 0.015278159048096856, "grad_norm": 981.6589965820312, "learning_rate": 1.0182965299684543e-06, "loss": 45.1094, "step": 1614 }, { "epoch": 0.01528762506981191, "grad_norm": 384.23712158203125, "learning_rate": 1.0189274447949528e-06, "loss": 31.875, "step": 1615 }, { "epoch": 0.015297091091526964, "grad_norm": 885.2128295898438, "learning_rate": 1.019558359621451e-06, "loss": 36.6406, "step": 1616 }, { "epoch": 0.015306557113242018, "grad_norm": 3.574967384338379, "learning_rate": 1.0201892744479494e-06, "loss": 0.7729, "step": 1617 }, { "epoch": 0.015316023134957071, "grad_norm": 586.27197265625, "learning_rate": 1.0208201892744479e-06, "loss": 32.9531, "step": 1618 }, { "epoch": 0.015325489156672126, "grad_norm": 314.91815185546875, "learning_rate": 1.0214511041009464e-06, "loss": 34.0, "step": 1619 }, { "epoch": 0.015334955178387179, "grad_norm": 345.89617919921875, "learning_rate": 1.0220820189274449e-06, "loss": 51.1719, "step": 1620 }, { "epoch": 0.015344421200102234, "grad_norm": 949.4100341796875, "learning_rate": 1.0227129337539432e-06, "loss": 79.625, "step": 1621 }, { "epoch": 0.015353887221817287, "grad_norm": 720.751220703125, "learning_rate": 1.0233438485804414e-06, "loss": 34.7969, "step": 1622 }, { "epoch": 0.01536335324353234, "grad_norm": 392.671630859375, "learning_rate": 1.02397476340694e-06, "loss": 39.0, "step": 1623 }, { "epoch": 0.015372819265247395, "grad_norm": 531.5504760742188, "learning_rate": 1.0246056782334384e-06, "loss": 55.7891, "step": 1624 }, { "epoch": 0.015382285286962448, "grad_norm": 565.0335083007812, "learning_rate": 1.025236593059937e-06, "loss": 72.5312, "step": 1625 }, { "epoch": 0.015391751308677502, "grad_norm": 519.9074096679688, "learning_rate": 1.0258675078864354e-06, "loss": 43.3594, "step": 1626 }, { "epoch": 0.015401217330392555, "grad_norm": 3.1751227378845215, "learning_rate": 1.0264984227129337e-06, "loss": 0.7734, "step": 1627 }, { "epoch": 0.01541068335210761, "grad_norm": 1490.4371337890625, "learning_rate": 1.0271293375394322e-06, "loss": 103.2031, "step": 1628 }, { "epoch": 0.015420149373822663, "grad_norm": 385.8720703125, "learning_rate": 1.0277602523659305e-06, "loss": 40.4844, "step": 1629 }, { "epoch": 0.015429615395537718, "grad_norm": 922.3438110351562, "learning_rate": 1.028391167192429e-06, "loss": 76.7188, "step": 1630 }, { "epoch": 0.01543908141725277, "grad_norm": 403.16351318359375, "learning_rate": 1.0290220820189274e-06, "loss": 33.6406, "step": 1631 }, { "epoch": 0.015448547438967826, "grad_norm": 1458.5068359375, "learning_rate": 1.0296529968454257e-06, "loss": 104.0312, "step": 1632 }, { "epoch": 0.015458013460682879, "grad_norm": 499.0540771484375, "learning_rate": 1.0302839116719242e-06, "loss": 89.0312, "step": 1633 }, { "epoch": 0.015467479482397933, "grad_norm": 474.2492980957031, "learning_rate": 1.0309148264984227e-06, "loss": 48.6094, "step": 1634 }, { "epoch": 0.015476945504112986, "grad_norm": 558.6455688476562, "learning_rate": 1.0315457413249212e-06, "loss": 71.8125, "step": 1635 }, { "epoch": 0.015486411525828041, "grad_norm": 343.21044921875, "learning_rate": 1.0321766561514195e-06, "loss": 28.0781, "step": 1636 }, { "epoch": 0.015495877547543094, "grad_norm": 569.449951171875, "learning_rate": 1.032807570977918e-06, "loss": 44.0625, "step": 1637 }, { "epoch": 0.015505343569258147, "grad_norm": 325.5155029296875, "learning_rate": 1.0334384858044163e-06, "loss": 34.25, "step": 1638 }, { "epoch": 0.015514809590973202, "grad_norm": 361.2317810058594, "learning_rate": 1.0340694006309148e-06, "loss": 35.625, "step": 1639 }, { "epoch": 0.015524275612688255, "grad_norm": 859.0040283203125, "learning_rate": 1.0347003154574132e-06, "loss": 75.1875, "step": 1640 }, { "epoch": 0.01553374163440331, "grad_norm": 1238.6195068359375, "learning_rate": 1.0353312302839117e-06, "loss": 46.0156, "step": 1641 }, { "epoch": 0.015543207656118363, "grad_norm": 390.5838928222656, "learning_rate": 1.0359621451104102e-06, "loss": 38.5, "step": 1642 }, { "epoch": 0.015552673677833417, "grad_norm": 398.3189392089844, "learning_rate": 1.0365930599369083e-06, "loss": 39.1875, "step": 1643 }, { "epoch": 0.01556213969954847, "grad_norm": 547.1773681640625, "learning_rate": 1.0372239747634068e-06, "loss": 40.4219, "step": 1644 }, { "epoch": 0.015571605721263525, "grad_norm": 518.6434936523438, "learning_rate": 1.0378548895899053e-06, "loss": 36.375, "step": 1645 }, { "epoch": 0.015581071742978578, "grad_norm": 615.5721435546875, "learning_rate": 1.0384858044164038e-06, "loss": 79.4531, "step": 1646 }, { "epoch": 0.015590537764693633, "grad_norm": 1050.259521484375, "learning_rate": 1.0391167192429023e-06, "loss": 65.8281, "step": 1647 }, { "epoch": 0.015600003786408686, "grad_norm": 1045.0582275390625, "learning_rate": 1.0397476340694008e-06, "loss": 99.9531, "step": 1648 }, { "epoch": 0.01560946980812374, "grad_norm": 385.0968322753906, "learning_rate": 1.0403785488958988e-06, "loss": 32.7344, "step": 1649 }, { "epoch": 0.015618935829838794, "grad_norm": 842.92529296875, "learning_rate": 1.0410094637223973e-06, "loss": 60.8438, "step": 1650 }, { "epoch": 0.01562840185155385, "grad_norm": 519.292724609375, "learning_rate": 1.0416403785488958e-06, "loss": 71.5156, "step": 1651 }, { "epoch": 0.0156378678732689, "grad_norm": 582.1716918945312, "learning_rate": 1.0422712933753943e-06, "loss": 53.9844, "step": 1652 }, { "epoch": 0.015647333894983954, "grad_norm": 597.8706665039062, "learning_rate": 1.0429022082018928e-06, "loss": 39.0, "step": 1653 }, { "epoch": 0.015656799916699007, "grad_norm": 931.6563720703125, "learning_rate": 1.043533123028391e-06, "loss": 41.4531, "step": 1654 }, { "epoch": 0.015666265938414064, "grad_norm": 949.6720581054688, "learning_rate": 1.0441640378548896e-06, "loss": 62.9375, "step": 1655 }, { "epoch": 0.015675731960129117, "grad_norm": 254.19271850585938, "learning_rate": 1.0447949526813879e-06, "loss": 32.25, "step": 1656 }, { "epoch": 0.01568519798184417, "grad_norm": 391.3070983886719, "learning_rate": 1.0454258675078864e-06, "loss": 32.0, "step": 1657 }, { "epoch": 0.015694664003559223, "grad_norm": 840.4525756835938, "learning_rate": 1.0460567823343848e-06, "loss": 99.7969, "step": 1658 }, { "epoch": 0.01570413002527428, "grad_norm": 344.9937438964844, "learning_rate": 1.0466876971608833e-06, "loss": 36.4219, "step": 1659 }, { "epoch": 0.015713596046989332, "grad_norm": 636.9520874023438, "learning_rate": 1.0473186119873816e-06, "loss": 34.2344, "step": 1660 }, { "epoch": 0.015723062068704385, "grad_norm": 381.01953125, "learning_rate": 1.0479495268138801e-06, "loss": 49.0312, "step": 1661 }, { "epoch": 0.01573252809041944, "grad_norm": 743.704345703125, "learning_rate": 1.0485804416403786e-06, "loss": 91.5625, "step": 1662 }, { "epoch": 0.015741994112134495, "grad_norm": 496.76959228515625, "learning_rate": 1.0492113564668769e-06, "loss": 45.5, "step": 1663 }, { "epoch": 0.015751460133849548, "grad_norm": 855.9776000976562, "learning_rate": 1.0498422712933754e-06, "loss": 61.6875, "step": 1664 }, { "epoch": 0.0157609261555646, "grad_norm": 397.4556579589844, "learning_rate": 1.0504731861198737e-06, "loss": 40.7969, "step": 1665 }, { "epoch": 0.015770392177279654, "grad_norm": 891.388671875, "learning_rate": 1.0511041009463722e-06, "loss": 98.2188, "step": 1666 }, { "epoch": 0.015779858198994707, "grad_norm": 613.842529296875, "learning_rate": 1.0517350157728706e-06, "loss": 77.5312, "step": 1667 }, { "epoch": 0.015789324220709763, "grad_norm": 519.4662475585938, "learning_rate": 1.0523659305993691e-06, "loss": 52.7969, "step": 1668 }, { "epoch": 0.015798790242424816, "grad_norm": 457.2646179199219, "learning_rate": 1.0529968454258676e-06, "loss": 31.2656, "step": 1669 }, { "epoch": 0.01580825626413987, "grad_norm": 650.14111328125, "learning_rate": 1.053627760252366e-06, "loss": 48.8438, "step": 1670 }, { "epoch": 0.015817722285854922, "grad_norm": 2242.306884765625, "learning_rate": 1.0542586750788642e-06, "loss": 47.9844, "step": 1671 }, { "epoch": 0.01582718830756998, "grad_norm": 545.1943359375, "learning_rate": 1.0548895899053627e-06, "loss": 30.9219, "step": 1672 }, { "epoch": 0.015836654329285032, "grad_norm": 616.8423461914062, "learning_rate": 1.0555205047318612e-06, "loss": 78.1875, "step": 1673 }, { "epoch": 0.015846120351000085, "grad_norm": 2.920377731323242, "learning_rate": 1.0561514195583597e-06, "loss": 0.978, "step": 1674 }, { "epoch": 0.015855586372715138, "grad_norm": 580.9616088867188, "learning_rate": 1.0567823343848582e-06, "loss": 36.1406, "step": 1675 }, { "epoch": 0.015865052394430194, "grad_norm": 542.4140625, "learning_rate": 1.0574132492113562e-06, "loss": 64.7031, "step": 1676 }, { "epoch": 0.015874518416145247, "grad_norm": 433.63714599609375, "learning_rate": 1.0580441640378547e-06, "loss": 57.5, "step": 1677 }, { "epoch": 0.0158839844378603, "grad_norm": 567.9453125, "learning_rate": 1.0586750788643532e-06, "loss": 36.1875, "step": 1678 }, { "epoch": 0.015893450459575353, "grad_norm": 605.5845336914062, "learning_rate": 1.0593059936908517e-06, "loss": 37.3438, "step": 1679 }, { "epoch": 0.015902916481290406, "grad_norm": 468.4525146484375, "learning_rate": 1.0599369085173502e-06, "loss": 42.5156, "step": 1680 }, { "epoch": 0.015912382503005463, "grad_norm": 452.6356506347656, "learning_rate": 1.0605678233438485e-06, "loss": 35.375, "step": 1681 }, { "epoch": 0.015921848524720516, "grad_norm": 1780.895263671875, "learning_rate": 1.061198738170347e-06, "loss": 84.25, "step": 1682 }, { "epoch": 0.01593131454643557, "grad_norm": 533.7827758789062, "learning_rate": 1.0618296529968453e-06, "loss": 34.375, "step": 1683 }, { "epoch": 0.015940780568150622, "grad_norm": 866.1103515625, "learning_rate": 1.0624605678233438e-06, "loss": 105.4688, "step": 1684 }, { "epoch": 0.01595024658986568, "grad_norm": 826.0418701171875, "learning_rate": 1.0630914826498422e-06, "loss": 40.2656, "step": 1685 }, { "epoch": 0.01595971261158073, "grad_norm": 480.28546142578125, "learning_rate": 1.0637223974763407e-06, "loss": 38.0938, "step": 1686 }, { "epoch": 0.015969178633295784, "grad_norm": 665.2730102539062, "learning_rate": 1.064353312302839e-06, "loss": 72.9062, "step": 1687 }, { "epoch": 0.015978644655010837, "grad_norm": 540.0803833007812, "learning_rate": 1.0649842271293375e-06, "loss": 41.2344, "step": 1688 }, { "epoch": 0.015988110676725894, "grad_norm": 885.970947265625, "learning_rate": 1.065615141955836e-06, "loss": 73.9375, "step": 1689 }, { "epoch": 0.015997576698440947, "grad_norm": 474.86865234375, "learning_rate": 1.0662460567823343e-06, "loss": 37.3594, "step": 1690 }, { "epoch": 0.016007042720156, "grad_norm": 731.8013916015625, "learning_rate": 1.0668769716088328e-06, "loss": 100.0078, "step": 1691 }, { "epoch": 0.016016508741871053, "grad_norm": 656.1320190429688, "learning_rate": 1.067507886435331e-06, "loss": 37.125, "step": 1692 }, { "epoch": 0.01602597476358611, "grad_norm": 2.8406147956848145, "learning_rate": 1.0681388012618296e-06, "loss": 0.9629, "step": 1693 }, { "epoch": 0.016035440785301162, "grad_norm": 475.0668029785156, "learning_rate": 1.068769716088328e-06, "loss": 77.8438, "step": 1694 }, { "epoch": 0.016044906807016215, "grad_norm": 351.7608947753906, "learning_rate": 1.0694006309148265e-06, "loss": 32.3281, "step": 1695 }, { "epoch": 0.01605437282873127, "grad_norm": 1165.3621826171875, "learning_rate": 1.0700315457413248e-06, "loss": 54.625, "step": 1696 }, { "epoch": 0.01606383885044632, "grad_norm": 498.9257507324219, "learning_rate": 1.0706624605678233e-06, "loss": 73.0625, "step": 1697 }, { "epoch": 0.016073304872161378, "grad_norm": 2.999356985092163, "learning_rate": 1.0712933753943216e-06, "loss": 0.875, "step": 1698 }, { "epoch": 0.01608277089387643, "grad_norm": 540.3839721679688, "learning_rate": 1.07192429022082e-06, "loss": 42.0156, "step": 1699 }, { "epoch": 0.016092236915591484, "grad_norm": 2.838178873062134, "learning_rate": 1.0725552050473186e-06, "loss": 0.8828, "step": 1700 }, { "epoch": 0.016101702937306537, "grad_norm": 400.80426025390625, "learning_rate": 1.073186119873817e-06, "loss": 41.4375, "step": 1701 }, { "epoch": 0.016111168959021593, "grad_norm": 627.386962890625, "learning_rate": 1.0738170347003156e-06, "loss": 44.4844, "step": 1702 }, { "epoch": 0.016120634980736646, "grad_norm": 398.4049072265625, "learning_rate": 1.0744479495268136e-06, "loss": 33.2969, "step": 1703 }, { "epoch": 0.0161301010024517, "grad_norm": 487.301513671875, "learning_rate": 1.0750788643533121e-06, "loss": 67.4062, "step": 1704 }, { "epoch": 0.016139567024166752, "grad_norm": 3.3775322437286377, "learning_rate": 1.0757097791798106e-06, "loss": 0.9165, "step": 1705 }, { "epoch": 0.01614903304588181, "grad_norm": 420.8460693359375, "learning_rate": 1.0763406940063091e-06, "loss": 46.8594, "step": 1706 }, { "epoch": 0.016158499067596862, "grad_norm": 353.3575439453125, "learning_rate": 1.0769716088328076e-06, "loss": 46.5625, "step": 1707 }, { "epoch": 0.016167965089311915, "grad_norm": 508.2467956542969, "learning_rate": 1.077602523659306e-06, "loss": 48.9844, "step": 1708 }, { "epoch": 0.016177431111026968, "grad_norm": 716.7093505859375, "learning_rate": 1.0782334384858044e-06, "loss": 84.8125, "step": 1709 }, { "epoch": 0.01618689713274202, "grad_norm": 425.7799377441406, "learning_rate": 1.0788643533123027e-06, "loss": 37.375, "step": 1710 }, { "epoch": 0.016196363154457077, "grad_norm": 287.42010498046875, "learning_rate": 1.0794952681388012e-06, "loss": 36.875, "step": 1711 }, { "epoch": 0.01620582917617213, "grad_norm": 472.5465393066406, "learning_rate": 1.0801261829652996e-06, "loss": 65.3438, "step": 1712 }, { "epoch": 0.016215295197887183, "grad_norm": 2.9172630310058594, "learning_rate": 1.0807570977917981e-06, "loss": 0.8633, "step": 1713 }, { "epoch": 0.016224761219602236, "grad_norm": 368.5461120605469, "learning_rate": 1.0813880126182964e-06, "loss": 35.9531, "step": 1714 }, { "epoch": 0.016234227241317293, "grad_norm": 1042.0780029296875, "learning_rate": 1.082018927444795e-06, "loss": 70.7969, "step": 1715 }, { "epoch": 0.016243693263032346, "grad_norm": 738.8729858398438, "learning_rate": 1.0826498422712932e-06, "loss": 79.75, "step": 1716 }, { "epoch": 0.0162531592847474, "grad_norm": 384.94500732421875, "learning_rate": 1.0832807570977917e-06, "loss": 37.1562, "step": 1717 }, { "epoch": 0.016262625306462452, "grad_norm": 336.48931884765625, "learning_rate": 1.0839116719242902e-06, "loss": 29.5938, "step": 1718 }, { "epoch": 0.01627209132817751, "grad_norm": 1249.4678955078125, "learning_rate": 1.0845425867507887e-06, "loss": 75.3438, "step": 1719 }, { "epoch": 0.01628155734989256, "grad_norm": 348.5146484375, "learning_rate": 1.085173501577287e-06, "loss": 31.6562, "step": 1720 }, { "epoch": 0.016291023371607614, "grad_norm": 901.5489501953125, "learning_rate": 1.0858044164037854e-06, "loss": 75.8125, "step": 1721 }, { "epoch": 0.016300489393322667, "grad_norm": 380.92779541015625, "learning_rate": 1.086435331230284e-06, "loss": 39.2188, "step": 1722 }, { "epoch": 0.01630995541503772, "grad_norm": 995.5256958007812, "learning_rate": 1.0870662460567822e-06, "loss": 51.0781, "step": 1723 }, { "epoch": 0.016319421436752777, "grad_norm": 808.4404907226562, "learning_rate": 1.0876971608832807e-06, "loss": 97.5469, "step": 1724 }, { "epoch": 0.01632888745846783, "grad_norm": 992.0001831054688, "learning_rate": 1.088328075709779e-06, "loss": 72.4844, "step": 1725 }, { "epoch": 0.016338353480182883, "grad_norm": 753.4857788085938, "learning_rate": 1.0889589905362775e-06, "loss": 55.25, "step": 1726 }, { "epoch": 0.016347819501897936, "grad_norm": 368.0390625, "learning_rate": 1.089589905362776e-06, "loss": 33.9531, "step": 1727 }, { "epoch": 0.016357285523612992, "grad_norm": 351.5782470703125, "learning_rate": 1.0902208201892745e-06, "loss": 33.6953, "step": 1728 }, { "epoch": 0.016366751545328045, "grad_norm": 586.8836059570312, "learning_rate": 1.090851735015773e-06, "loss": 67.4375, "step": 1729 }, { "epoch": 0.0163762175670431, "grad_norm": 2019.5118408203125, "learning_rate": 1.0914826498422712e-06, "loss": 73.3906, "step": 1730 }, { "epoch": 0.01638568358875815, "grad_norm": 567.8345336914062, "learning_rate": 1.0921135646687695e-06, "loss": 63.2031, "step": 1731 }, { "epoch": 0.016395149610473208, "grad_norm": 330.94940185546875, "learning_rate": 1.092744479495268e-06, "loss": 30.9062, "step": 1732 }, { "epoch": 0.01640461563218826, "grad_norm": 429.05035400390625, "learning_rate": 1.0933753943217665e-06, "loss": 52.875, "step": 1733 }, { "epoch": 0.016414081653903314, "grad_norm": 814.8743286132812, "learning_rate": 1.094006309148265e-06, "loss": 80.3125, "step": 1734 }, { "epoch": 0.016423547675618367, "grad_norm": 423.9749755859375, "learning_rate": 1.0946372239747635e-06, "loss": 62.1875, "step": 1735 }, { "epoch": 0.01643301369733342, "grad_norm": 411.9945983886719, "learning_rate": 1.0952681388012616e-06, "loss": 37.7344, "step": 1736 }, { "epoch": 0.016442479719048476, "grad_norm": 602.2581176757812, "learning_rate": 1.09589905362776e-06, "loss": 70.75, "step": 1737 }, { "epoch": 0.01645194574076353, "grad_norm": 704.5748901367188, "learning_rate": 1.0965299684542586e-06, "loss": 88.8594, "step": 1738 }, { "epoch": 0.016461411762478582, "grad_norm": 698.968994140625, "learning_rate": 1.097160883280757e-06, "loss": 62.1406, "step": 1739 }, { "epoch": 0.016470877784193635, "grad_norm": 446.2467346191406, "learning_rate": 1.0977917981072555e-06, "loss": 38.8438, "step": 1740 }, { "epoch": 0.016480343805908692, "grad_norm": 452.7189025878906, "learning_rate": 1.098422712933754e-06, "loss": 35.125, "step": 1741 }, { "epoch": 0.016489809827623745, "grad_norm": 468.8706970214844, "learning_rate": 1.0990536277602523e-06, "loss": 36.5156, "step": 1742 }, { "epoch": 0.016499275849338798, "grad_norm": 2.7570667266845703, "learning_rate": 1.0996845425867506e-06, "loss": 0.8096, "step": 1743 }, { "epoch": 0.01650874187105385, "grad_norm": 653.8993530273438, "learning_rate": 1.100315457413249e-06, "loss": 30.0156, "step": 1744 }, { "epoch": 0.016518207892768907, "grad_norm": 680.657958984375, "learning_rate": 1.1009463722397476e-06, "loss": 31.5469, "step": 1745 }, { "epoch": 0.01652767391448396, "grad_norm": 953.1234741210938, "learning_rate": 1.101577287066246e-06, "loss": 89.5625, "step": 1746 }, { "epoch": 0.016537139936199013, "grad_norm": 486.2539367675781, "learning_rate": 1.1022082018927444e-06, "loss": 36.9062, "step": 1747 }, { "epoch": 0.016546605957914066, "grad_norm": 721.6468505859375, "learning_rate": 1.1028391167192429e-06, "loss": 65.0625, "step": 1748 }, { "epoch": 0.016556071979629123, "grad_norm": 963.2139282226562, "learning_rate": 1.1034700315457413e-06, "loss": 55.8672, "step": 1749 }, { "epoch": 0.016565538001344176, "grad_norm": 237.7793731689453, "learning_rate": 1.1041009463722396e-06, "loss": 27.4844, "step": 1750 }, { "epoch": 0.01657500402305923, "grad_norm": 362.14984130859375, "learning_rate": 1.1047318611987381e-06, "loss": 45.7344, "step": 1751 }, { "epoch": 0.016584470044774282, "grad_norm": 719.058349609375, "learning_rate": 1.1053627760252366e-06, "loss": 45.4062, "step": 1752 }, { "epoch": 0.016593936066489335, "grad_norm": 828.1848754882812, "learning_rate": 1.1059936908517349e-06, "loss": 38.2656, "step": 1753 }, { "epoch": 0.01660340208820439, "grad_norm": 541.8892822265625, "learning_rate": 1.1066246056782334e-06, "loss": 39.875, "step": 1754 }, { "epoch": 0.016612868109919444, "grad_norm": 591.5081176757812, "learning_rate": 1.1072555205047319e-06, "loss": 37.3281, "step": 1755 }, { "epoch": 0.016622334131634497, "grad_norm": 654.0038452148438, "learning_rate": 1.1078864353312304e-06, "loss": 35.4844, "step": 1756 }, { "epoch": 0.01663180015334955, "grad_norm": 326.9129333496094, "learning_rate": 1.1085173501577287e-06, "loss": 39.0312, "step": 1757 }, { "epoch": 0.016641266175064607, "grad_norm": 1263.4073486328125, "learning_rate": 1.109148264984227e-06, "loss": 88.2812, "step": 1758 }, { "epoch": 0.01665073219677966, "grad_norm": 770.7827758789062, "learning_rate": 1.1097791798107254e-06, "loss": 89.8125, "step": 1759 }, { "epoch": 0.016660198218494713, "grad_norm": 597.7844848632812, "learning_rate": 1.110410094637224e-06, "loss": 83.4375, "step": 1760 }, { "epoch": 0.016669664240209766, "grad_norm": 551.2822875976562, "learning_rate": 1.1110410094637224e-06, "loss": 52.4062, "step": 1761 }, { "epoch": 0.016679130261924822, "grad_norm": 2.9120187759399414, "learning_rate": 1.111671924290221e-06, "loss": 0.834, "step": 1762 }, { "epoch": 0.016688596283639875, "grad_norm": 295.5687561035156, "learning_rate": 1.1123028391167194e-06, "loss": 37.3125, "step": 1763 }, { "epoch": 0.01669806230535493, "grad_norm": 2.733940362930298, "learning_rate": 1.1129337539432175e-06, "loss": 0.8711, "step": 1764 }, { "epoch": 0.01670752832706998, "grad_norm": 368.4912109375, "learning_rate": 1.113564668769716e-06, "loss": 38.7031, "step": 1765 }, { "epoch": 0.016716994348785034, "grad_norm": 770.675537109375, "learning_rate": 1.1141955835962145e-06, "loss": 84.4375, "step": 1766 }, { "epoch": 0.01672646037050009, "grad_norm": 366.6790771484375, "learning_rate": 1.114826498422713e-06, "loss": 36.7031, "step": 1767 }, { "epoch": 0.016735926392215144, "grad_norm": 332.6624450683594, "learning_rate": 1.1154574132492114e-06, "loss": 32.7656, "step": 1768 }, { "epoch": 0.016745392413930197, "grad_norm": 792.1138305664062, "learning_rate": 1.1160883280757097e-06, "loss": 53.5469, "step": 1769 }, { "epoch": 0.01675485843564525, "grad_norm": 409.1287536621094, "learning_rate": 1.116719242902208e-06, "loss": 34.7031, "step": 1770 }, { "epoch": 0.016764324457360306, "grad_norm": 2.813776731491089, "learning_rate": 1.1173501577287065e-06, "loss": 0.8457, "step": 1771 }, { "epoch": 0.01677379047907536, "grad_norm": 412.25531005859375, "learning_rate": 1.117981072555205e-06, "loss": 36.2969, "step": 1772 }, { "epoch": 0.016783256500790412, "grad_norm": 525.5071411132812, "learning_rate": 1.1186119873817035e-06, "loss": 37.1875, "step": 1773 }, { "epoch": 0.016792722522505465, "grad_norm": 533.2871704101562, "learning_rate": 1.119242902208202e-06, "loss": 39.5469, "step": 1774 }, { "epoch": 0.016802188544220522, "grad_norm": 2.7620437145233154, "learning_rate": 1.1198738170347003e-06, "loss": 0.9568, "step": 1775 }, { "epoch": 0.016811654565935575, "grad_norm": 348.8665466308594, "learning_rate": 1.1205047318611987e-06, "loss": 30.0312, "step": 1776 }, { "epoch": 0.016821120587650628, "grad_norm": 493.4899597167969, "learning_rate": 1.121135646687697e-06, "loss": 53.8438, "step": 1777 }, { "epoch": 0.01683058660936568, "grad_norm": 725.303955078125, "learning_rate": 1.1217665615141955e-06, "loss": 45.6719, "step": 1778 }, { "epoch": 0.016840052631080734, "grad_norm": 308.6080017089844, "learning_rate": 1.122397476340694e-06, "loss": 32.8594, "step": 1779 }, { "epoch": 0.01684951865279579, "grad_norm": 324.0197448730469, "learning_rate": 1.1230283911671923e-06, "loss": 41.5469, "step": 1780 }, { "epoch": 0.016858984674510843, "grad_norm": 3.0031628608703613, "learning_rate": 1.1236593059936908e-06, "loss": 0.9819, "step": 1781 }, { "epoch": 0.016868450696225896, "grad_norm": 626.6452026367188, "learning_rate": 1.1242902208201893e-06, "loss": 32.8203, "step": 1782 }, { "epoch": 0.01687791671794095, "grad_norm": 281.9786682128906, "learning_rate": 1.1249211356466878e-06, "loss": 35.6875, "step": 1783 }, { "epoch": 0.016887382739656006, "grad_norm": 420.8489990234375, "learning_rate": 1.125552050473186e-06, "loss": 47.4062, "step": 1784 }, { "epoch": 0.01689684876137106, "grad_norm": 265.87347412109375, "learning_rate": 1.1261829652996845e-06, "loss": 33.8906, "step": 1785 }, { "epoch": 0.016906314783086112, "grad_norm": 525.3480834960938, "learning_rate": 1.1268138801261828e-06, "loss": 38.8828, "step": 1786 }, { "epoch": 0.016915780804801165, "grad_norm": 228.9912872314453, "learning_rate": 1.1274447949526813e-06, "loss": 28.3125, "step": 1787 }, { "epoch": 0.01692524682651622, "grad_norm": 560.3884887695312, "learning_rate": 1.1280757097791798e-06, "loss": 34.8906, "step": 1788 }, { "epoch": 0.016934712848231274, "grad_norm": 492.8253479003906, "learning_rate": 1.1287066246056783e-06, "loss": 53.6875, "step": 1789 }, { "epoch": 0.016944178869946327, "grad_norm": 490.9655456542969, "learning_rate": 1.1293375394321766e-06, "loss": 32.375, "step": 1790 }, { "epoch": 0.01695364489166138, "grad_norm": 1440.664306640625, "learning_rate": 1.1299684542586749e-06, "loss": 73.5312, "step": 1791 }, { "epoch": 0.016963110913376437, "grad_norm": 542.3134155273438, "learning_rate": 1.1305993690851734e-06, "loss": 47.7188, "step": 1792 }, { "epoch": 0.01697257693509149, "grad_norm": 493.11968994140625, "learning_rate": 1.1312302839116719e-06, "loss": 31.6875, "step": 1793 }, { "epoch": 0.016982042956806543, "grad_norm": 443.6826477050781, "learning_rate": 1.1318611987381703e-06, "loss": 34.9219, "step": 1794 }, { "epoch": 0.016991508978521596, "grad_norm": 295.4015808105469, "learning_rate": 1.1324921135646688e-06, "loss": 32.7969, "step": 1795 }, { "epoch": 0.01700097500023665, "grad_norm": 425.1336364746094, "learning_rate": 1.1331230283911673e-06, "loss": 37.7812, "step": 1796 }, { "epoch": 0.017010441021951705, "grad_norm": 402.3957824707031, "learning_rate": 1.1337539432176654e-06, "loss": 36.2031, "step": 1797 }, { "epoch": 0.01701990704366676, "grad_norm": 318.6095275878906, "learning_rate": 1.1343848580441639e-06, "loss": 31.75, "step": 1798 }, { "epoch": 0.01702937306538181, "grad_norm": 787.3699340820312, "learning_rate": 1.1350157728706624e-06, "loss": 77.6875, "step": 1799 }, { "epoch": 0.017038839087096865, "grad_norm": 433.5979309082031, "learning_rate": 1.1356466876971609e-06, "loss": 42.5625, "step": 1800 }, { "epoch": 0.01704830510881192, "grad_norm": 206.4489288330078, "learning_rate": 1.1362776025236594e-06, "loss": 29.4062, "step": 1801 }, { "epoch": 0.017057771130526974, "grad_norm": 409.8436279296875, "learning_rate": 1.1369085173501577e-06, "loss": 32.6719, "step": 1802 }, { "epoch": 0.017067237152242027, "grad_norm": 2.417670249938965, "learning_rate": 1.1375394321766561e-06, "loss": 0.8501, "step": 1803 }, { "epoch": 0.01707670317395708, "grad_norm": 576.62841796875, "learning_rate": 1.1381703470031544e-06, "loss": 74.3281, "step": 1804 }, { "epoch": 0.017086169195672136, "grad_norm": 339.8450927734375, "learning_rate": 1.138801261829653e-06, "loss": 29.5156, "step": 1805 }, { "epoch": 0.01709563521738719, "grad_norm": 469.55010986328125, "learning_rate": 1.1394321766561514e-06, "loss": 47.5, "step": 1806 }, { "epoch": 0.017105101239102243, "grad_norm": 677.39697265625, "learning_rate": 1.14006309148265e-06, "loss": 91.3438, "step": 1807 }, { "epoch": 0.017114567260817296, "grad_norm": 713.2576904296875, "learning_rate": 1.1406940063091482e-06, "loss": 57.875, "step": 1808 }, { "epoch": 0.01712403328253235, "grad_norm": 1504.5362548828125, "learning_rate": 1.1413249211356467e-06, "loss": 68.6172, "step": 1809 }, { "epoch": 0.017133499304247405, "grad_norm": 812.543701171875, "learning_rate": 1.141955835962145e-06, "loss": 72.8438, "step": 1810 }, { "epoch": 0.017142965325962458, "grad_norm": 386.485107421875, "learning_rate": 1.1425867507886435e-06, "loss": 33.8281, "step": 1811 }, { "epoch": 0.01715243134767751, "grad_norm": 801.1355590820312, "learning_rate": 1.143217665615142e-06, "loss": 76.4219, "step": 1812 }, { "epoch": 0.017161897369392564, "grad_norm": 604.911376953125, "learning_rate": 1.1438485804416402e-06, "loss": 64.625, "step": 1813 }, { "epoch": 0.01717136339110762, "grad_norm": 817.3651123046875, "learning_rate": 1.1444794952681387e-06, "loss": 91.5625, "step": 1814 }, { "epoch": 0.017180829412822674, "grad_norm": 300.9106140136719, "learning_rate": 1.1451104100946372e-06, "loss": 29.7812, "step": 1815 }, { "epoch": 0.017190295434537727, "grad_norm": 678.43896484375, "learning_rate": 1.1457413249211357e-06, "loss": 77.25, "step": 1816 }, { "epoch": 0.01719976145625278, "grad_norm": 2.983091115951538, "learning_rate": 1.146372239747634e-06, "loss": 0.8179, "step": 1817 }, { "epoch": 0.017209227477967836, "grad_norm": 298.7666015625, "learning_rate": 1.1470031545741325e-06, "loss": 37.6406, "step": 1818 }, { "epoch": 0.01721869349968289, "grad_norm": 640.8468017578125, "learning_rate": 1.1476340694006308e-06, "loss": 31.625, "step": 1819 }, { "epoch": 0.017228159521397942, "grad_norm": 322.48614501953125, "learning_rate": 1.1482649842271293e-06, "loss": 34.25, "step": 1820 }, { "epoch": 0.017237625543112995, "grad_norm": 2.909320592880249, "learning_rate": 1.1488958990536277e-06, "loss": 0.9058, "step": 1821 }, { "epoch": 0.017247091564828048, "grad_norm": 3.1159634590148926, "learning_rate": 1.1495268138801262e-06, "loss": 0.7253, "step": 1822 }, { "epoch": 0.017256557586543105, "grad_norm": 747.490478515625, "learning_rate": 1.1501577287066247e-06, "loss": 86.75, "step": 1823 }, { "epoch": 0.017266023608258158, "grad_norm": 960.1616821289062, "learning_rate": 1.1507886435331228e-06, "loss": 58.8359, "step": 1824 }, { "epoch": 0.01727548962997321, "grad_norm": 421.95654296875, "learning_rate": 1.1514195583596213e-06, "loss": 40.7656, "step": 1825 }, { "epoch": 0.017284955651688264, "grad_norm": 924.6177368164062, "learning_rate": 1.1520504731861198e-06, "loss": 57.375, "step": 1826 }, { "epoch": 0.01729442167340332, "grad_norm": 2.9176149368286133, "learning_rate": 1.1526813880126183e-06, "loss": 0.8599, "step": 1827 }, { "epoch": 0.017303887695118373, "grad_norm": 320.55743408203125, "learning_rate": 1.1533123028391168e-06, "loss": 41.0938, "step": 1828 }, { "epoch": 0.017313353716833426, "grad_norm": 354.20330810546875, "learning_rate": 1.1539432176656153e-06, "loss": 35.9688, "step": 1829 }, { "epoch": 0.01732281973854848, "grad_norm": 542.0612182617188, "learning_rate": 1.1545741324921135e-06, "loss": 51.5469, "step": 1830 }, { "epoch": 0.017332285760263536, "grad_norm": 272.3559875488281, "learning_rate": 1.1552050473186118e-06, "loss": 31.7344, "step": 1831 }, { "epoch": 0.01734175178197859, "grad_norm": 722.7666625976562, "learning_rate": 1.1558359621451103e-06, "loss": 68.375, "step": 1832 }, { "epoch": 0.01735121780369364, "grad_norm": 314.66668701171875, "learning_rate": 1.1564668769716088e-06, "loss": 30.2969, "step": 1833 }, { "epoch": 0.017360683825408695, "grad_norm": 469.9563903808594, "learning_rate": 1.1570977917981073e-06, "loss": 36.9688, "step": 1834 }, { "epoch": 0.01737014984712375, "grad_norm": 316.02679443359375, "learning_rate": 1.1577287066246056e-06, "loss": 37.9531, "step": 1835 }, { "epoch": 0.017379615868838804, "grad_norm": 507.0118408203125, "learning_rate": 1.158359621451104e-06, "loss": 27.1094, "step": 1836 }, { "epoch": 0.017389081890553857, "grad_norm": 416.8763427734375, "learning_rate": 1.1589905362776024e-06, "loss": 33.5, "step": 1837 }, { "epoch": 0.01739854791226891, "grad_norm": 590.4400634765625, "learning_rate": 1.1596214511041009e-06, "loss": 78.75, "step": 1838 }, { "epoch": 0.017408013933983963, "grad_norm": 640.1343383789062, "learning_rate": 1.1602523659305993e-06, "loss": 41.5156, "step": 1839 }, { "epoch": 0.01741747995569902, "grad_norm": 570.1962890625, "learning_rate": 1.1608832807570978e-06, "loss": 79.125, "step": 1840 }, { "epoch": 0.017426945977414073, "grad_norm": 398.20880126953125, "learning_rate": 1.1615141955835961e-06, "loss": 36.8438, "step": 1841 }, { "epoch": 0.017436411999129126, "grad_norm": 1069.8817138671875, "learning_rate": 1.1621451104100946e-06, "loss": 75.7969, "step": 1842 }, { "epoch": 0.01744587802084418, "grad_norm": 451.7537841796875, "learning_rate": 1.162776025236593e-06, "loss": 38.5156, "step": 1843 }, { "epoch": 0.017455344042559235, "grad_norm": 380.56103515625, "learning_rate": 1.1634069400630914e-06, "loss": 41.0625, "step": 1844 }, { "epoch": 0.017464810064274288, "grad_norm": 460.89923095703125, "learning_rate": 1.1640378548895899e-06, "loss": 34.1719, "step": 1845 }, { "epoch": 0.01747427608598934, "grad_norm": 378.84075927734375, "learning_rate": 1.1646687697160882e-06, "loss": 33.5, "step": 1846 }, { "epoch": 0.017483742107704394, "grad_norm": 815.3104858398438, "learning_rate": 1.1652996845425867e-06, "loss": 68.1094, "step": 1847 }, { "epoch": 0.01749320812941945, "grad_norm": 409.7140197753906, "learning_rate": 1.1659305993690851e-06, "loss": 38.5781, "step": 1848 }, { "epoch": 0.017502674151134504, "grad_norm": 288.7345275878906, "learning_rate": 1.1665615141955836e-06, "loss": 33.8438, "step": 1849 }, { "epoch": 0.017512140172849557, "grad_norm": 426.1141052246094, "learning_rate": 1.1671924290220821e-06, "loss": 31.75, "step": 1850 }, { "epoch": 0.01752160619456461, "grad_norm": 551.4730834960938, "learning_rate": 1.1678233438485802e-06, "loss": 40.875, "step": 1851 }, { "epoch": 0.017531072216279663, "grad_norm": 1395.2723388671875, "learning_rate": 1.1684542586750787e-06, "loss": 116.2812, "step": 1852 }, { "epoch": 0.01754053823799472, "grad_norm": 396.5265197753906, "learning_rate": 1.1690851735015772e-06, "loss": 32.4688, "step": 1853 }, { "epoch": 0.017550004259709772, "grad_norm": 2760.52734375, "learning_rate": 1.1697160883280757e-06, "loss": 78.5156, "step": 1854 }, { "epoch": 0.017559470281424825, "grad_norm": 363.58056640625, "learning_rate": 1.1703470031545742e-06, "loss": 29.1562, "step": 1855 }, { "epoch": 0.017568936303139878, "grad_norm": 391.2961120605469, "learning_rate": 1.1709779179810727e-06, "loss": 31.1719, "step": 1856 }, { "epoch": 0.017578402324854935, "grad_norm": 395.7762451171875, "learning_rate": 1.1716088328075707e-06, "loss": 30.4219, "step": 1857 }, { "epoch": 0.017587868346569988, "grad_norm": 577.7101440429688, "learning_rate": 1.1722397476340692e-06, "loss": 40.5781, "step": 1858 }, { "epoch": 0.01759733436828504, "grad_norm": 842.7997436523438, "learning_rate": 1.1728706624605677e-06, "loss": 78.9375, "step": 1859 }, { "epoch": 0.017606800390000094, "grad_norm": 929.2735595703125, "learning_rate": 1.1735015772870662e-06, "loss": 56.6719, "step": 1860 }, { "epoch": 0.01761626641171515, "grad_norm": 841.3222045898438, "learning_rate": 1.1741324921135647e-06, "loss": 37.4531, "step": 1861 }, { "epoch": 0.017625732433430203, "grad_norm": 439.88836669921875, "learning_rate": 1.174763406940063e-06, "loss": 32.125, "step": 1862 }, { "epoch": 0.017635198455145256, "grad_norm": 556.1554565429688, "learning_rate": 1.1753943217665615e-06, "loss": 48.1875, "step": 1863 }, { "epoch": 0.01764466447686031, "grad_norm": 609.6834716796875, "learning_rate": 1.1760252365930598e-06, "loss": 44.2969, "step": 1864 }, { "epoch": 0.017654130498575362, "grad_norm": 256.44342041015625, "learning_rate": 1.1766561514195583e-06, "loss": 31.6719, "step": 1865 }, { "epoch": 0.01766359652029042, "grad_norm": 1196.64599609375, "learning_rate": 1.1772870662460567e-06, "loss": 72.5625, "step": 1866 }, { "epoch": 0.01767306254200547, "grad_norm": 410.1291198730469, "learning_rate": 1.1779179810725552e-06, "loss": 33.25, "step": 1867 }, { "epoch": 0.017682528563720525, "grad_norm": 450.711181640625, "learning_rate": 1.1785488958990535e-06, "loss": 38.25, "step": 1868 }, { "epoch": 0.017691994585435578, "grad_norm": 283.5461120605469, "learning_rate": 1.179179810725552e-06, "loss": 27.7188, "step": 1869 }, { "epoch": 0.017701460607150634, "grad_norm": 759.0654907226562, "learning_rate": 1.1798107255520505e-06, "loss": 82.0938, "step": 1870 }, { "epoch": 0.017710926628865687, "grad_norm": 652.7574462890625, "learning_rate": 1.1804416403785488e-06, "loss": 92.0312, "step": 1871 }, { "epoch": 0.01772039265058074, "grad_norm": 866.2205200195312, "learning_rate": 1.1810725552050473e-06, "loss": 59.1406, "step": 1872 }, { "epoch": 0.017729858672295793, "grad_norm": 756.0506591796875, "learning_rate": 1.1817034700315456e-06, "loss": 66.8125, "step": 1873 }, { "epoch": 0.01773932469401085, "grad_norm": 3.616075277328491, "learning_rate": 1.182334384858044e-06, "loss": 0.937, "step": 1874 }, { "epoch": 0.017748790715725903, "grad_norm": 334.2268371582031, "learning_rate": 1.1829652996845425e-06, "loss": 30.9375, "step": 1875 }, { "epoch": 0.017758256737440956, "grad_norm": 779.3422241210938, "learning_rate": 1.183596214511041e-06, "loss": 56.375, "step": 1876 }, { "epoch": 0.01776772275915601, "grad_norm": 669.1897583007812, "learning_rate": 1.1842271293375395e-06, "loss": 49.75, "step": 1877 }, { "epoch": 0.017777188780871065, "grad_norm": 416.17315673828125, "learning_rate": 1.1848580441640378e-06, "loss": 33.625, "step": 1878 }, { "epoch": 0.017786654802586118, "grad_norm": 338.4939880371094, "learning_rate": 1.185488958990536e-06, "loss": 33.8281, "step": 1879 }, { "epoch": 0.01779612082430117, "grad_norm": 444.8028259277344, "learning_rate": 1.1861198738170346e-06, "loss": 34.7188, "step": 1880 }, { "epoch": 0.017805586846016224, "grad_norm": 1514.822998046875, "learning_rate": 1.186750788643533e-06, "loss": 72.8594, "step": 1881 }, { "epoch": 0.017815052867731277, "grad_norm": 268.0196838378906, "learning_rate": 1.1873817034700316e-06, "loss": 25.7344, "step": 1882 }, { "epoch": 0.017824518889446334, "grad_norm": 601.5250854492188, "learning_rate": 1.18801261829653e-06, "loss": 46.5312, "step": 1883 }, { "epoch": 0.017833984911161387, "grad_norm": 697.5971069335938, "learning_rate": 1.1886435331230281e-06, "loss": 36.0, "step": 1884 }, { "epoch": 0.01784345093287644, "grad_norm": 559.5159912109375, "learning_rate": 1.1892744479495266e-06, "loss": 38.5312, "step": 1885 }, { "epoch": 0.017852916954591493, "grad_norm": 1390.0335693359375, "learning_rate": 1.1899053627760251e-06, "loss": 75.0938, "step": 1886 }, { "epoch": 0.01786238297630655, "grad_norm": 327.7848205566406, "learning_rate": 1.1905362776025236e-06, "loss": 32.1562, "step": 1887 }, { "epoch": 0.017871848998021602, "grad_norm": 521.9337158203125, "learning_rate": 1.191167192429022e-06, "loss": 62.9844, "step": 1888 }, { "epoch": 0.017881315019736655, "grad_norm": 657.2332763671875, "learning_rate": 1.1917981072555206e-06, "loss": 40.7188, "step": 1889 }, { "epoch": 0.017890781041451708, "grad_norm": 372.6358642578125, "learning_rate": 1.1924290220820189e-06, "loss": 36.3125, "step": 1890 }, { "epoch": 0.017900247063166765, "grad_norm": 845.8232421875, "learning_rate": 1.1930599369085172e-06, "loss": 59.7344, "step": 1891 }, { "epoch": 0.017909713084881818, "grad_norm": 242.6021270751953, "learning_rate": 1.1936908517350157e-06, "loss": 31.0469, "step": 1892 }, { "epoch": 0.01791917910659687, "grad_norm": 433.31024169921875, "learning_rate": 1.1943217665615141e-06, "loss": 29.7031, "step": 1893 }, { "epoch": 0.017928645128311924, "grad_norm": 452.2779846191406, "learning_rate": 1.1949526813880126e-06, "loss": 35.8438, "step": 1894 }, { "epoch": 0.017938111150026977, "grad_norm": 250.09254455566406, "learning_rate": 1.195583596214511e-06, "loss": 32.0938, "step": 1895 }, { "epoch": 0.017947577171742033, "grad_norm": 911.1326904296875, "learning_rate": 1.1962145110410094e-06, "loss": 46.5781, "step": 1896 }, { "epoch": 0.017957043193457086, "grad_norm": 845.9611206054688, "learning_rate": 1.196845425867508e-06, "loss": 60.1875, "step": 1897 }, { "epoch": 0.01796650921517214, "grad_norm": 321.9201965332031, "learning_rate": 1.1974763406940062e-06, "loss": 34.6406, "step": 1898 }, { "epoch": 0.017975975236887192, "grad_norm": 3.446354389190674, "learning_rate": 1.1981072555205047e-06, "loss": 0.908, "step": 1899 }, { "epoch": 0.01798544125860225, "grad_norm": 653.6058959960938, "learning_rate": 1.1987381703470032e-06, "loss": 78.6719, "step": 1900 }, { "epoch": 0.0179949072803173, "grad_norm": 602.793212890625, "learning_rate": 1.1993690851735015e-06, "loss": 32.9531, "step": 1901 }, { "epoch": 0.018004373302032355, "grad_norm": 805.3904418945312, "learning_rate": 1.2e-06, "loss": 59.2188, "step": 1902 }, { "epoch": 0.018013839323747408, "grad_norm": 460.6910400390625, "learning_rate": 1.2006309148264984e-06, "loss": 34.8906, "step": 1903 }, { "epoch": 0.018023305345462464, "grad_norm": 872.9583740234375, "learning_rate": 1.201261829652997e-06, "loss": 63.8438, "step": 1904 }, { "epoch": 0.018032771367177517, "grad_norm": 467.94189453125, "learning_rate": 1.2018927444794952e-06, "loss": 46.8125, "step": 1905 }, { "epoch": 0.01804223738889257, "grad_norm": 671.99658203125, "learning_rate": 1.2025236593059935e-06, "loss": 86.8438, "step": 1906 }, { "epoch": 0.018051703410607623, "grad_norm": 564.1636352539062, "learning_rate": 1.203154574132492e-06, "loss": 38.6406, "step": 1907 }, { "epoch": 0.018061169432322676, "grad_norm": 320.685302734375, "learning_rate": 1.2037854889589905e-06, "loss": 32.6875, "step": 1908 }, { "epoch": 0.018070635454037733, "grad_norm": 428.6639709472656, "learning_rate": 1.204416403785489e-06, "loss": 31.5781, "step": 1909 }, { "epoch": 0.018080101475752786, "grad_norm": 509.7602233886719, "learning_rate": 1.2050473186119875e-06, "loss": 53.8906, "step": 1910 }, { "epoch": 0.01808956749746784, "grad_norm": 312.21990966796875, "learning_rate": 1.2056782334384857e-06, "loss": 36.0469, "step": 1911 }, { "epoch": 0.01809903351918289, "grad_norm": 783.103759765625, "learning_rate": 1.206309148264984e-06, "loss": 52.7188, "step": 1912 }, { "epoch": 0.018108499540897948, "grad_norm": 709.7259521484375, "learning_rate": 1.2069400630914825e-06, "loss": 44.2031, "step": 1913 }, { "epoch": 0.018117965562613, "grad_norm": 358.9385986328125, "learning_rate": 1.207570977917981e-06, "loss": 33.5, "step": 1914 }, { "epoch": 0.018127431584328054, "grad_norm": 329.25048828125, "learning_rate": 1.2082018927444795e-06, "loss": 33.5156, "step": 1915 }, { "epoch": 0.018136897606043107, "grad_norm": 345.3670349121094, "learning_rate": 1.208832807570978e-06, "loss": 33.5156, "step": 1916 }, { "epoch": 0.018146363627758164, "grad_norm": 1009.281494140625, "learning_rate": 1.2094637223974763e-06, "loss": 65.1562, "step": 1917 }, { "epoch": 0.018155829649473217, "grad_norm": 556.6182250976562, "learning_rate": 1.2100946372239746e-06, "loss": 40.2344, "step": 1918 }, { "epoch": 0.01816529567118827, "grad_norm": 4317.2724609375, "learning_rate": 1.210725552050473e-06, "loss": 67.0156, "step": 1919 }, { "epoch": 0.018174761692903323, "grad_norm": 666.6458740234375, "learning_rate": 1.2113564668769715e-06, "loss": 63.6641, "step": 1920 }, { "epoch": 0.018184227714618376, "grad_norm": 272.67718505859375, "learning_rate": 1.21198738170347e-06, "loss": 32.5, "step": 1921 }, { "epoch": 0.018193693736333432, "grad_norm": 660.0942993164062, "learning_rate": 1.2126182965299685e-06, "loss": 65.5, "step": 1922 }, { "epoch": 0.018203159758048485, "grad_norm": 1903.6678466796875, "learning_rate": 1.2132492113564668e-06, "loss": 67.75, "step": 1923 }, { "epoch": 0.018212625779763538, "grad_norm": 3.1346993446350098, "learning_rate": 1.2138801261829653e-06, "loss": 0.9429, "step": 1924 }, { "epoch": 0.01822209180147859, "grad_norm": 364.39508056640625, "learning_rate": 1.2145110410094636e-06, "loss": 39.0625, "step": 1925 }, { "epoch": 0.018231557823193648, "grad_norm": 779.1327514648438, "learning_rate": 1.215141955835962e-06, "loss": 43.0156, "step": 1926 }, { "epoch": 0.0182410238449087, "grad_norm": 587.0258178710938, "learning_rate": 1.2157728706624606e-06, "loss": 69.7188, "step": 1927 }, { "epoch": 0.018250489866623754, "grad_norm": 781.679931640625, "learning_rate": 1.2164037854889589e-06, "loss": 94.2344, "step": 1928 }, { "epoch": 0.018259955888338807, "grad_norm": 577.1917724609375, "learning_rate": 1.2170347003154573e-06, "loss": 38.25, "step": 1929 }, { "epoch": 0.018269421910053863, "grad_norm": 418.002685546875, "learning_rate": 1.2176656151419558e-06, "loss": 41.9375, "step": 1930 }, { "epoch": 0.018278887931768916, "grad_norm": 1406.249755859375, "learning_rate": 1.2182965299684541e-06, "loss": 79.6406, "step": 1931 }, { "epoch": 0.01828835395348397, "grad_norm": 1071.46728515625, "learning_rate": 1.2189274447949526e-06, "loss": 77.7891, "step": 1932 }, { "epoch": 0.018297819975199022, "grad_norm": 506.83966064453125, "learning_rate": 1.219558359621451e-06, "loss": 36.75, "step": 1933 }, { "epoch": 0.01830728599691408, "grad_norm": 765.6309204101562, "learning_rate": 1.2201892744479494e-06, "loss": 39.4609, "step": 1934 }, { "epoch": 0.01831675201862913, "grad_norm": 387.5756530761719, "learning_rate": 1.2208201892744479e-06, "loss": 31.5938, "step": 1935 }, { "epoch": 0.018326218040344185, "grad_norm": 281.5417785644531, "learning_rate": 1.2214511041009464e-06, "loss": 30.8594, "step": 1936 }, { "epoch": 0.018335684062059238, "grad_norm": 1204.9063720703125, "learning_rate": 1.2220820189274449e-06, "loss": 80.25, "step": 1937 }, { "epoch": 0.01834515008377429, "grad_norm": 278.17864990234375, "learning_rate": 1.2227129337539431e-06, "loss": 29.8125, "step": 1938 }, { "epoch": 0.018354616105489347, "grad_norm": 345.4440612792969, "learning_rate": 1.2233438485804414e-06, "loss": 30.625, "step": 1939 }, { "epoch": 0.0183640821272044, "grad_norm": 266.744873046875, "learning_rate": 1.22397476340694e-06, "loss": 28.2344, "step": 1940 }, { "epoch": 0.018373548148919453, "grad_norm": 601.2613525390625, "learning_rate": 1.2246056782334384e-06, "loss": 62.4219, "step": 1941 }, { "epoch": 0.018383014170634506, "grad_norm": 526.4821166992188, "learning_rate": 1.225236593059937e-06, "loss": 39.6562, "step": 1942 }, { "epoch": 0.018392480192349563, "grad_norm": 672.8414916992188, "learning_rate": 1.2258675078864354e-06, "loss": 74.375, "step": 1943 }, { "epoch": 0.018401946214064616, "grad_norm": 503.8292541503906, "learning_rate": 1.2264984227129339e-06, "loss": 63.6094, "step": 1944 }, { "epoch": 0.01841141223577967, "grad_norm": 1755.0703125, "learning_rate": 1.227129337539432e-06, "loss": 59.75, "step": 1945 }, { "epoch": 0.01842087825749472, "grad_norm": 419.3904113769531, "learning_rate": 1.2277602523659305e-06, "loss": 50.0, "step": 1946 }, { "epoch": 0.018430344279209778, "grad_norm": 1302.19482421875, "learning_rate": 1.228391167192429e-06, "loss": 86.9531, "step": 1947 }, { "epoch": 0.01843981030092483, "grad_norm": 375.87249755859375, "learning_rate": 1.2290220820189274e-06, "loss": 31.25, "step": 1948 }, { "epoch": 0.018449276322639884, "grad_norm": 717.37158203125, "learning_rate": 1.229652996845426e-06, "loss": 75.2031, "step": 1949 }, { "epoch": 0.018458742344354937, "grad_norm": 670.8231811523438, "learning_rate": 1.2302839116719242e-06, "loss": 56.4219, "step": 1950 }, { "epoch": 0.01846820836606999, "grad_norm": 458.2268371582031, "learning_rate": 1.2309148264984225e-06, "loss": 48.9844, "step": 1951 }, { "epoch": 0.018477674387785047, "grad_norm": 363.7681579589844, "learning_rate": 1.231545741324921e-06, "loss": 33.4219, "step": 1952 }, { "epoch": 0.0184871404095001, "grad_norm": 315.9241638183594, "learning_rate": 1.2321766561514195e-06, "loss": 30.2969, "step": 1953 }, { "epoch": 0.018496606431215153, "grad_norm": 638.6665649414062, "learning_rate": 1.232807570977918e-06, "loss": 57.6094, "step": 1954 }, { "epoch": 0.018506072452930206, "grad_norm": 443.9428405761719, "learning_rate": 1.2334384858044165e-06, "loss": 43.8125, "step": 1955 }, { "epoch": 0.018515538474645262, "grad_norm": 497.1656799316406, "learning_rate": 1.2340694006309147e-06, "loss": 35.7656, "step": 1956 }, { "epoch": 0.018525004496360315, "grad_norm": 3.553157091140747, "learning_rate": 1.2347003154574132e-06, "loss": 1.041, "step": 1957 }, { "epoch": 0.018534470518075368, "grad_norm": 345.2137756347656, "learning_rate": 1.2353312302839115e-06, "loss": 32.6406, "step": 1958 }, { "epoch": 0.01854393653979042, "grad_norm": 1053.6658935546875, "learning_rate": 1.23596214511041e-06, "loss": 41.1719, "step": 1959 }, { "epoch": 0.018553402561505478, "grad_norm": 669.4173583984375, "learning_rate": 1.2365930599369085e-06, "loss": 63.0938, "step": 1960 }, { "epoch": 0.01856286858322053, "grad_norm": 2.7527425289154053, "learning_rate": 1.2372239747634068e-06, "loss": 0.916, "step": 1961 }, { "epoch": 0.018572334604935584, "grad_norm": 436.4435119628906, "learning_rate": 1.2378548895899053e-06, "loss": 46.9844, "step": 1962 }, { "epoch": 0.018581800626650637, "grad_norm": 346.7664489746094, "learning_rate": 1.2384858044164038e-06, "loss": 29.4219, "step": 1963 }, { "epoch": 0.01859126664836569, "grad_norm": 3.0576558113098145, "learning_rate": 1.2391167192429023e-06, "loss": 0.7871, "step": 1964 }, { "epoch": 0.018600732670080746, "grad_norm": 570.0885620117188, "learning_rate": 1.2397476340694005e-06, "loss": 51.0469, "step": 1965 }, { "epoch": 0.0186101986917958, "grad_norm": 348.0074768066406, "learning_rate": 1.240378548895899e-06, "loss": 30.625, "step": 1966 }, { "epoch": 0.018619664713510852, "grad_norm": 967.4291381835938, "learning_rate": 1.2410094637223973e-06, "loss": 40.375, "step": 1967 }, { "epoch": 0.018629130735225905, "grad_norm": 344.5946350097656, "learning_rate": 1.2416403785488958e-06, "loss": 31.7188, "step": 1968 }, { "epoch": 0.01863859675694096, "grad_norm": 936.2931518554688, "learning_rate": 1.2422712933753943e-06, "loss": 78.375, "step": 1969 }, { "epoch": 0.018648062778656015, "grad_norm": 601.0995483398438, "learning_rate": 1.2429022082018928e-06, "loss": 70.8281, "step": 1970 }, { "epoch": 0.018657528800371068, "grad_norm": 1084.6976318359375, "learning_rate": 1.2435331230283913e-06, "loss": 88.9688, "step": 1971 }, { "epoch": 0.01866699482208612, "grad_norm": 302.46209716796875, "learning_rate": 1.2441640378548894e-06, "loss": 28.7812, "step": 1972 }, { "epoch": 0.018676460843801177, "grad_norm": 762.4901733398438, "learning_rate": 1.2447949526813879e-06, "loss": 84.2656, "step": 1973 }, { "epoch": 0.01868592686551623, "grad_norm": 645.4723510742188, "learning_rate": 1.2454258675078863e-06, "loss": 33.1484, "step": 1974 }, { "epoch": 0.018695392887231283, "grad_norm": 496.0794982910156, "learning_rate": 1.2460567823343848e-06, "loss": 72.7188, "step": 1975 }, { "epoch": 0.018704858908946336, "grad_norm": 390.42578125, "learning_rate": 1.2466876971608833e-06, "loss": 46.6875, "step": 1976 }, { "epoch": 0.018714324930661393, "grad_norm": 2.5872249603271484, "learning_rate": 1.2473186119873818e-06, "loss": 0.6982, "step": 1977 }, { "epoch": 0.018723790952376446, "grad_norm": 2218.848388671875, "learning_rate": 1.24794952681388e-06, "loss": 36.3438, "step": 1978 }, { "epoch": 0.0187332569740915, "grad_norm": 258.38397216796875, "learning_rate": 1.2485804416403784e-06, "loss": 33.4531, "step": 1979 }, { "epoch": 0.01874272299580655, "grad_norm": 1076.1114501953125, "learning_rate": 1.2492113564668769e-06, "loss": 79.7344, "step": 1980 }, { "epoch": 0.018752189017521605, "grad_norm": 622.9656372070312, "learning_rate": 1.2498422712933754e-06, "loss": 64.8438, "step": 1981 }, { "epoch": 0.01876165503923666, "grad_norm": 512.4779663085938, "learning_rate": 1.2504731861198739e-06, "loss": 36.2812, "step": 1982 }, { "epoch": 0.018771121060951714, "grad_norm": 349.68328857421875, "learning_rate": 1.2511041009463721e-06, "loss": 38.8906, "step": 1983 }, { "epoch": 0.018780587082666767, "grad_norm": 289.9325866699219, "learning_rate": 1.2517350157728706e-06, "loss": 35.0625, "step": 1984 }, { "epoch": 0.01879005310438182, "grad_norm": 563.7092895507812, "learning_rate": 1.252365930599369e-06, "loss": 84.5938, "step": 1985 }, { "epoch": 0.018799519126096877, "grad_norm": 3.008485794067383, "learning_rate": 1.2529968454258674e-06, "loss": 0.9077, "step": 1986 }, { "epoch": 0.01880898514781193, "grad_norm": 533.5364379882812, "learning_rate": 1.253627760252366e-06, "loss": 71.4688, "step": 1987 }, { "epoch": 0.018818451169526983, "grad_norm": 719.5607299804688, "learning_rate": 1.2542586750788644e-06, "loss": 60.1875, "step": 1988 }, { "epoch": 0.018827917191242036, "grad_norm": 369.4945983886719, "learning_rate": 1.2548895899053627e-06, "loss": 55.0312, "step": 1989 }, { "epoch": 0.018837383212957092, "grad_norm": 475.2533264160156, "learning_rate": 1.2555205047318612e-06, "loss": 47.1406, "step": 1990 }, { "epoch": 0.018846849234672145, "grad_norm": 375.1695556640625, "learning_rate": 1.2561514195583597e-06, "loss": 38.5938, "step": 1991 }, { "epoch": 0.018856315256387198, "grad_norm": 370.98065185546875, "learning_rate": 1.256782334384858e-06, "loss": 46.6094, "step": 1992 }, { "epoch": 0.01886578127810225, "grad_norm": 439.9637756347656, "learning_rate": 1.2574132492113564e-06, "loss": 38.75, "step": 1993 }, { "epoch": 0.018875247299817304, "grad_norm": 314.25677490234375, "learning_rate": 1.2580441640378547e-06, "loss": 31.3125, "step": 1994 }, { "epoch": 0.01888471332153236, "grad_norm": 540.0752563476562, "learning_rate": 1.2586750788643532e-06, "loss": 39.0156, "step": 1995 }, { "epoch": 0.018894179343247414, "grad_norm": 419.053955078125, "learning_rate": 1.2593059936908517e-06, "loss": 30.2344, "step": 1996 }, { "epoch": 0.018903645364962467, "grad_norm": 602.151611328125, "learning_rate": 1.2599369085173502e-06, "loss": 43.2812, "step": 1997 }, { "epoch": 0.01891311138667752, "grad_norm": 604.1399536132812, "learning_rate": 1.2605678233438487e-06, "loss": 77.2812, "step": 1998 }, { "epoch": 0.018922577408392576, "grad_norm": 523.5217895507812, "learning_rate": 1.261198738170347e-06, "loss": 36.3438, "step": 1999 }, { "epoch": 0.01893204343010763, "grad_norm": 621.0302124023438, "learning_rate": 1.2618296529968453e-06, "loss": 38.1562, "step": 2000 }, { "epoch": 0.018941509451822682, "grad_norm": 524.3231811523438, "learning_rate": 1.2624605678233437e-06, "loss": 48.4688, "step": 2001 }, { "epoch": 0.018950975473537735, "grad_norm": 270.48089599609375, "learning_rate": 1.2630914826498422e-06, "loss": 27.9609, "step": 2002 }, { "epoch": 0.01896044149525279, "grad_norm": 705.258056640625, "learning_rate": 1.2637223974763407e-06, "loss": 47.4531, "step": 2003 }, { "epoch": 0.018969907516967845, "grad_norm": 608.187255859375, "learning_rate": 1.2643533123028392e-06, "loss": 67.0, "step": 2004 }, { "epoch": 0.018979373538682898, "grad_norm": 969.8892822265625, "learning_rate": 1.2649842271293373e-06, "loss": 47.0703, "step": 2005 }, { "epoch": 0.01898883956039795, "grad_norm": 553.6990966796875, "learning_rate": 1.2656151419558358e-06, "loss": 38.2266, "step": 2006 }, { "epoch": 0.018998305582113004, "grad_norm": 369.3213806152344, "learning_rate": 1.2662460567823343e-06, "loss": 43.9688, "step": 2007 }, { "epoch": 0.01900777160382806, "grad_norm": 301.6854553222656, "learning_rate": 1.2668769716088328e-06, "loss": 32.7344, "step": 2008 }, { "epoch": 0.019017237625543113, "grad_norm": 811.435791015625, "learning_rate": 1.2675078864353313e-06, "loss": 73.0703, "step": 2009 }, { "epoch": 0.019026703647258166, "grad_norm": 1138.5885009765625, "learning_rate": 1.2681388012618298e-06, "loss": 66.125, "step": 2010 }, { "epoch": 0.01903616966897322, "grad_norm": 403.1347961425781, "learning_rate": 1.268769716088328e-06, "loss": 43.2812, "step": 2011 }, { "epoch": 0.019045635690688276, "grad_norm": 510.0248107910156, "learning_rate": 1.2694006309148263e-06, "loss": 29.125, "step": 2012 }, { "epoch": 0.01905510171240333, "grad_norm": 375.2066650390625, "learning_rate": 1.2700315457413248e-06, "loss": 34.6562, "step": 2013 }, { "epoch": 0.01906456773411838, "grad_norm": 704.4481201171875, "learning_rate": 1.2706624605678233e-06, "loss": 64.375, "step": 2014 }, { "epoch": 0.019074033755833435, "grad_norm": 265.7681579589844, "learning_rate": 1.2712933753943218e-06, "loss": 39.3906, "step": 2015 }, { "epoch": 0.01908349977754849, "grad_norm": 303.23138427734375, "learning_rate": 1.27192429022082e-06, "loss": 33.2812, "step": 2016 }, { "epoch": 0.019092965799263544, "grad_norm": 2.9379146099090576, "learning_rate": 1.2725552050473186e-06, "loss": 0.8718, "step": 2017 }, { "epoch": 0.019102431820978597, "grad_norm": 578.0509033203125, "learning_rate": 1.273186119873817e-06, "loss": 54.25, "step": 2018 }, { "epoch": 0.01911189784269365, "grad_norm": 487.6370544433594, "learning_rate": 1.2738170347003153e-06, "loss": 67.1562, "step": 2019 }, { "epoch": 0.019121363864408707, "grad_norm": 558.8097534179688, "learning_rate": 1.2744479495268138e-06, "loss": 30.25, "step": 2020 }, { "epoch": 0.01913082988612376, "grad_norm": 634.3668212890625, "learning_rate": 1.2750788643533123e-06, "loss": 70.9531, "step": 2021 }, { "epoch": 0.019140295907838813, "grad_norm": 705.0823974609375, "learning_rate": 1.2757097791798106e-06, "loss": 71.8125, "step": 2022 }, { "epoch": 0.019149761929553866, "grad_norm": 687.8134155273438, "learning_rate": 1.2763406940063091e-06, "loss": 38.7656, "step": 2023 }, { "epoch": 0.01915922795126892, "grad_norm": 317.710205078125, "learning_rate": 1.2769716088328076e-06, "loss": 29.3594, "step": 2024 }, { "epoch": 0.019168693972983975, "grad_norm": 684.2935180664062, "learning_rate": 1.2776025236593059e-06, "loss": 33.1719, "step": 2025 }, { "epoch": 0.019178159994699028, "grad_norm": 2.823340654373169, "learning_rate": 1.2782334384858044e-06, "loss": 0.979, "step": 2026 }, { "epoch": 0.01918762601641408, "grad_norm": 297.4220275878906, "learning_rate": 1.2788643533123027e-06, "loss": 28.3125, "step": 2027 }, { "epoch": 0.019197092038129134, "grad_norm": 2.8904025554656982, "learning_rate": 1.2794952681388011e-06, "loss": 0.8682, "step": 2028 }, { "epoch": 0.01920655805984419, "grad_norm": 517.52978515625, "learning_rate": 1.2801261829652996e-06, "loss": 31.2969, "step": 2029 }, { "epoch": 0.019216024081559244, "grad_norm": 463.05462646484375, "learning_rate": 1.2807570977917981e-06, "loss": 27.75, "step": 2030 }, { "epoch": 0.019225490103274297, "grad_norm": 482.45391845703125, "learning_rate": 1.2813880126182966e-06, "loss": 44.9375, "step": 2031 }, { "epoch": 0.01923495612498935, "grad_norm": 298.6792907714844, "learning_rate": 1.2820189274447947e-06, "loss": 30.2656, "step": 2032 }, { "epoch": 0.019244422146704406, "grad_norm": 701.9141845703125, "learning_rate": 1.2826498422712932e-06, "loss": 58.9297, "step": 2033 }, { "epoch": 0.01925388816841946, "grad_norm": 1098.4503173828125, "learning_rate": 1.2832807570977917e-06, "loss": 53.4766, "step": 2034 }, { "epoch": 0.019263354190134512, "grad_norm": 409.1884765625, "learning_rate": 1.2839116719242902e-06, "loss": 35.8906, "step": 2035 }, { "epoch": 0.019272820211849565, "grad_norm": 361.0182800292969, "learning_rate": 1.2845425867507887e-06, "loss": 30.7969, "step": 2036 }, { "epoch": 0.019282286233564618, "grad_norm": 337.2953186035156, "learning_rate": 1.2851735015772872e-06, "loss": 33.3125, "step": 2037 }, { "epoch": 0.019291752255279675, "grad_norm": 2342.3994140625, "learning_rate": 1.2858044164037854e-06, "loss": 61.9531, "step": 2038 }, { "epoch": 0.019301218276994728, "grad_norm": 318.1278381347656, "learning_rate": 1.2864353312302837e-06, "loss": 36.1562, "step": 2039 }, { "epoch": 0.01931068429870978, "grad_norm": 870.2205810546875, "learning_rate": 1.2870662460567822e-06, "loss": 42.5, "step": 2040 }, { "epoch": 0.019320150320424834, "grad_norm": 586.2507934570312, "learning_rate": 1.2876971608832807e-06, "loss": 32.8906, "step": 2041 }, { "epoch": 0.01932961634213989, "grad_norm": 247.02252197265625, "learning_rate": 1.2883280757097792e-06, "loss": 32.7031, "step": 2042 }, { "epoch": 0.019339082363854943, "grad_norm": 2.9542806148529053, "learning_rate": 1.2889589905362775e-06, "loss": 0.9224, "step": 2043 }, { "epoch": 0.019348548385569996, "grad_norm": 876.9345092773438, "learning_rate": 1.289589905362776e-06, "loss": 84.6562, "step": 2044 }, { "epoch": 0.01935801440728505, "grad_norm": 923.361328125, "learning_rate": 1.2902208201892745e-06, "loss": 79.6953, "step": 2045 }, { "epoch": 0.019367480429000106, "grad_norm": 528.9297485351562, "learning_rate": 1.2908517350157727e-06, "loss": 37.0781, "step": 2046 }, { "epoch": 0.01937694645071516, "grad_norm": 703.0487060546875, "learning_rate": 1.2914826498422712e-06, "loss": 79.1719, "step": 2047 }, { "epoch": 0.01938641247243021, "grad_norm": 370.8716125488281, "learning_rate": 1.2921135646687697e-06, "loss": 29.7812, "step": 2048 }, { "epoch": 0.019395878494145265, "grad_norm": 259.76593017578125, "learning_rate": 1.292744479495268e-06, "loss": 38.5312, "step": 2049 }, { "epoch": 0.019405344515860318, "grad_norm": 342.0113220214844, "learning_rate": 1.2933753943217665e-06, "loss": 30.0625, "step": 2050 }, { "epoch": 0.019414810537575374, "grad_norm": 2.956616163253784, "learning_rate": 1.294006309148265e-06, "loss": 0.8704, "step": 2051 }, { "epoch": 0.019424276559290427, "grad_norm": 674.3938598632812, "learning_rate": 1.2946372239747633e-06, "loss": 69.9062, "step": 2052 }, { "epoch": 0.01943374258100548, "grad_norm": 1767.0224609375, "learning_rate": 1.2952681388012618e-06, "loss": 57.0625, "step": 2053 }, { "epoch": 0.019443208602720533, "grad_norm": 394.44293212890625, "learning_rate": 1.29589905362776e-06, "loss": 42.3281, "step": 2054 }, { "epoch": 0.01945267462443559, "grad_norm": 346.6348571777344, "learning_rate": 1.2965299684542585e-06, "loss": 37.8594, "step": 2055 }, { "epoch": 0.019462140646150643, "grad_norm": 321.0770568847656, "learning_rate": 1.297160883280757e-06, "loss": 47.8281, "step": 2056 }, { "epoch": 0.019471606667865696, "grad_norm": 724.1366577148438, "learning_rate": 1.2977917981072555e-06, "loss": 37.0469, "step": 2057 }, { "epoch": 0.01948107268958075, "grad_norm": 520.2060546875, "learning_rate": 1.298422712933754e-06, "loss": 42.9375, "step": 2058 }, { "epoch": 0.019490538711295805, "grad_norm": 407.670654296875, "learning_rate": 1.2990536277602523e-06, "loss": 46.4219, "step": 2059 }, { "epoch": 0.019500004733010858, "grad_norm": 841.496337890625, "learning_rate": 1.2996845425867506e-06, "loss": 55.125, "step": 2060 }, { "epoch": 0.01950947075472591, "grad_norm": 421.41510009765625, "learning_rate": 1.300315457413249e-06, "loss": 23.1797, "step": 2061 }, { "epoch": 0.019518936776440964, "grad_norm": 382.92901611328125, "learning_rate": 1.3009463722397476e-06, "loss": 33.1406, "step": 2062 }, { "epoch": 0.01952840279815602, "grad_norm": 865.9940795898438, "learning_rate": 1.301577287066246e-06, "loss": 62.9531, "step": 2063 }, { "epoch": 0.019537868819871074, "grad_norm": 492.7709655761719, "learning_rate": 1.3022082018927446e-06, "loss": 51.6562, "step": 2064 }, { "epoch": 0.019547334841586127, "grad_norm": 913.2398681640625, "learning_rate": 1.3028391167192428e-06, "loss": 72.3906, "step": 2065 }, { "epoch": 0.01955680086330118, "grad_norm": 724.1826171875, "learning_rate": 1.3034700315457411e-06, "loss": 27.125, "step": 2066 }, { "epoch": 0.019566266885016233, "grad_norm": 254.63461303710938, "learning_rate": 1.3041009463722396e-06, "loss": 31.4688, "step": 2067 }, { "epoch": 0.01957573290673129, "grad_norm": 308.5326232910156, "learning_rate": 1.3047318611987381e-06, "loss": 30.4531, "step": 2068 }, { "epoch": 0.019585198928446342, "grad_norm": 720.0283203125, "learning_rate": 1.3053627760252366e-06, "loss": 69.3125, "step": 2069 }, { "epoch": 0.019594664950161395, "grad_norm": 604.1643676757812, "learning_rate": 1.305993690851735e-06, "loss": 33.5938, "step": 2070 }, { "epoch": 0.019604130971876448, "grad_norm": 477.911376953125, "learning_rate": 1.3066246056782334e-06, "loss": 34.9375, "step": 2071 }, { "epoch": 0.019613596993591505, "grad_norm": 343.9098205566406, "learning_rate": 1.3072555205047317e-06, "loss": 32.7812, "step": 2072 }, { "epoch": 0.019623063015306558, "grad_norm": 687.4589233398438, "learning_rate": 1.3078864353312301e-06, "loss": 52.1406, "step": 2073 }, { "epoch": 0.01963252903702161, "grad_norm": 260.613525390625, "learning_rate": 1.3085173501577286e-06, "loss": 30.0312, "step": 2074 }, { "epoch": 0.019641995058736664, "grad_norm": 503.62603759765625, "learning_rate": 1.3091482649842271e-06, "loss": 40.0, "step": 2075 }, { "epoch": 0.01965146108045172, "grad_norm": 823.0601196289062, "learning_rate": 1.3097791798107254e-06, "loss": 41.0469, "step": 2076 }, { "epoch": 0.019660927102166773, "grad_norm": 480.853759765625, "learning_rate": 1.310410094637224e-06, "loss": 29.375, "step": 2077 }, { "epoch": 0.019670393123881826, "grad_norm": 431.7024841308594, "learning_rate": 1.3110410094637224e-06, "loss": 33.7812, "step": 2078 }, { "epoch": 0.01967985914559688, "grad_norm": 3.1616618633270264, "learning_rate": 1.3116719242902207e-06, "loss": 0.9175, "step": 2079 }, { "epoch": 0.019689325167311932, "grad_norm": 332.2991027832031, "learning_rate": 1.3123028391167192e-06, "loss": 32.875, "step": 2080 }, { "epoch": 0.01969879118902699, "grad_norm": 1270.9605712890625, "learning_rate": 1.3129337539432177e-06, "loss": 54.9297, "step": 2081 }, { "epoch": 0.01970825721074204, "grad_norm": 386.9015197753906, "learning_rate": 1.313564668769716e-06, "loss": 30.7656, "step": 2082 }, { "epoch": 0.019717723232457095, "grad_norm": 713.4321899414062, "learning_rate": 1.3141955835962144e-06, "loss": 42.875, "step": 2083 }, { "epoch": 0.019727189254172148, "grad_norm": 251.7477264404297, "learning_rate": 1.314826498422713e-06, "loss": 30.8438, "step": 2084 }, { "epoch": 0.019736655275887204, "grad_norm": 373.73443603515625, "learning_rate": 1.3154574132492114e-06, "loss": 35.7031, "step": 2085 }, { "epoch": 0.019746121297602257, "grad_norm": 648.2665405273438, "learning_rate": 1.3160883280757097e-06, "loss": 45.0703, "step": 2086 }, { "epoch": 0.01975558731931731, "grad_norm": 364.82330322265625, "learning_rate": 1.316719242902208e-06, "loss": 34.2344, "step": 2087 }, { "epoch": 0.019765053341032363, "grad_norm": 337.90667724609375, "learning_rate": 1.3173501577287065e-06, "loss": 30.3438, "step": 2088 }, { "epoch": 0.01977451936274742, "grad_norm": 399.9173278808594, "learning_rate": 1.317981072555205e-06, "loss": 29.4062, "step": 2089 }, { "epoch": 0.019783985384462473, "grad_norm": 295.9457702636719, "learning_rate": 1.3186119873817035e-06, "loss": 36.1406, "step": 2090 }, { "epoch": 0.019793451406177526, "grad_norm": 525.6752319335938, "learning_rate": 1.319242902208202e-06, "loss": 71.2812, "step": 2091 }, { "epoch": 0.01980291742789258, "grad_norm": 3.1102352142333984, "learning_rate": 1.3198738170347005e-06, "loss": 0.7585, "step": 2092 }, { "epoch": 0.019812383449607632, "grad_norm": 284.2369689941406, "learning_rate": 1.3205047318611985e-06, "loss": 34.8125, "step": 2093 }, { "epoch": 0.019821849471322688, "grad_norm": 763.8524780273438, "learning_rate": 1.321135646687697e-06, "loss": 86.2812, "step": 2094 }, { "epoch": 0.01983131549303774, "grad_norm": 294.59027099609375, "learning_rate": 1.3217665615141955e-06, "loss": 29.0469, "step": 2095 }, { "epoch": 0.019840781514752794, "grad_norm": 751.1774291992188, "learning_rate": 1.322397476340694e-06, "loss": 59.375, "step": 2096 }, { "epoch": 0.019850247536467847, "grad_norm": 639.15234375, "learning_rate": 1.3230283911671925e-06, "loss": 65.0312, "step": 2097 }, { "epoch": 0.019859713558182904, "grad_norm": 343.3891906738281, "learning_rate": 1.3236593059936908e-06, "loss": 27.25, "step": 2098 }, { "epoch": 0.019869179579897957, "grad_norm": 377.7643127441406, "learning_rate": 1.324290220820189e-06, "loss": 35.3438, "step": 2099 }, { "epoch": 0.01987864560161301, "grad_norm": 771.504150390625, "learning_rate": 1.3249211356466875e-06, "loss": 35.2812, "step": 2100 }, { "epoch": 0.019888111623328063, "grad_norm": 579.07421875, "learning_rate": 1.325552050473186e-06, "loss": 78.5625, "step": 2101 }, { "epoch": 0.01989757764504312, "grad_norm": 352.45166015625, "learning_rate": 1.3261829652996845e-06, "loss": 34.25, "step": 2102 }, { "epoch": 0.019907043666758172, "grad_norm": 680.9039916992188, "learning_rate": 1.326813880126183e-06, "loss": 40.9688, "step": 2103 }, { "epoch": 0.019916509688473225, "grad_norm": 724.0121459960938, "learning_rate": 1.3274447949526813e-06, "loss": 62.125, "step": 2104 }, { "epoch": 0.019925975710188278, "grad_norm": 567.20703125, "learning_rate": 1.3280757097791798e-06, "loss": 69.0625, "step": 2105 }, { "epoch": 0.01993544173190333, "grad_norm": 614.3329467773438, "learning_rate": 1.328706624605678e-06, "loss": 34.3906, "step": 2106 }, { "epoch": 0.019944907753618388, "grad_norm": 2.9560792446136475, "learning_rate": 1.3293375394321766e-06, "loss": 1.0537, "step": 2107 }, { "epoch": 0.01995437377533344, "grad_norm": 612.74169921875, "learning_rate": 1.329968454258675e-06, "loss": 38.1094, "step": 2108 }, { "epoch": 0.019963839797048494, "grad_norm": 583.07421875, "learning_rate": 1.3305993690851733e-06, "loss": 66.7812, "step": 2109 }, { "epoch": 0.019973305818763547, "grad_norm": 772.7844848632812, "learning_rate": 1.3312302839116718e-06, "loss": 61.4062, "step": 2110 }, { "epoch": 0.019982771840478603, "grad_norm": 456.80267333984375, "learning_rate": 1.3318611987381703e-06, "loss": 37.0938, "step": 2111 }, { "epoch": 0.019992237862193656, "grad_norm": 1307.0657958984375, "learning_rate": 1.3324921135646688e-06, "loss": 50.8516, "step": 2112 }, { "epoch": 0.02000170388390871, "grad_norm": 536.4132080078125, "learning_rate": 1.3331230283911671e-06, "loss": 74.4062, "step": 2113 }, { "epoch": 0.020011169905623762, "grad_norm": 307.3956298828125, "learning_rate": 1.3337539432176656e-06, "loss": 37.6406, "step": 2114 }, { "epoch": 0.02002063592733882, "grad_norm": 713.4292602539062, "learning_rate": 1.3343848580441639e-06, "loss": 35.4531, "step": 2115 }, { "epoch": 0.020030101949053872, "grad_norm": 543.1886596679688, "learning_rate": 1.3350157728706624e-06, "loss": 35.4062, "step": 2116 }, { "epoch": 0.020039567970768925, "grad_norm": 2.9747438430786133, "learning_rate": 1.3356466876971609e-06, "loss": 0.9966, "step": 2117 }, { "epoch": 0.020049033992483978, "grad_norm": 1229.5357666015625, "learning_rate": 1.3362776025236594e-06, "loss": 55.6875, "step": 2118 }, { "epoch": 0.020058500014199034, "grad_norm": 336.291015625, "learning_rate": 1.3369085173501579e-06, "loss": 29.25, "step": 2119 }, { "epoch": 0.020067966035914087, "grad_norm": 234.57203674316406, "learning_rate": 1.337539432176656e-06, "loss": 34.5312, "step": 2120 }, { "epoch": 0.02007743205762914, "grad_norm": 884.5182495117188, "learning_rate": 1.3381703470031544e-06, "loss": 32.4219, "step": 2121 }, { "epoch": 0.020086898079344193, "grad_norm": 730.8841552734375, "learning_rate": 1.338801261829653e-06, "loss": 47.8516, "step": 2122 }, { "epoch": 0.020096364101059246, "grad_norm": 339.9352111816406, "learning_rate": 1.3394321766561514e-06, "loss": 33.875, "step": 2123 }, { "epoch": 0.020105830122774303, "grad_norm": 692.74755859375, "learning_rate": 1.34006309148265e-06, "loss": 49.875, "step": 2124 }, { "epoch": 0.020115296144489356, "grad_norm": 1087.572021484375, "learning_rate": 1.3406940063091484e-06, "loss": 66.5312, "step": 2125 }, { "epoch": 0.02012476216620441, "grad_norm": 383.54071044921875, "learning_rate": 1.3413249211356465e-06, "loss": 28.7812, "step": 2126 }, { "epoch": 0.020134228187919462, "grad_norm": 545.7343139648438, "learning_rate": 1.341955835962145e-06, "loss": 37.75, "step": 2127 }, { "epoch": 0.020143694209634518, "grad_norm": 270.11297607421875, "learning_rate": 1.3425867507886434e-06, "loss": 33.7969, "step": 2128 }, { "epoch": 0.02015316023134957, "grad_norm": 874.2034912109375, "learning_rate": 1.343217665615142e-06, "loss": 48.75, "step": 2129 }, { "epoch": 0.020162626253064624, "grad_norm": 395.3534240722656, "learning_rate": 1.3438485804416404e-06, "loss": 28.9062, "step": 2130 }, { "epoch": 0.020172092274779677, "grad_norm": 419.6441650390625, "learning_rate": 1.3444794952681387e-06, "loss": 35.0938, "step": 2131 }, { "epoch": 0.020181558296494734, "grad_norm": 3.747548818588257, "learning_rate": 1.3451104100946372e-06, "loss": 0.9048, "step": 2132 }, { "epoch": 0.020191024318209787, "grad_norm": 478.88494873046875, "learning_rate": 1.3457413249211355e-06, "loss": 31.8906, "step": 2133 }, { "epoch": 0.02020049033992484, "grad_norm": 367.72052001953125, "learning_rate": 1.346372239747634e-06, "loss": 35.2812, "step": 2134 }, { "epoch": 0.020209956361639893, "grad_norm": 606.7471313476562, "learning_rate": 1.3470031545741325e-06, "loss": 42.5781, "step": 2135 }, { "epoch": 0.020219422383354946, "grad_norm": 289.1445007324219, "learning_rate": 1.347634069400631e-06, "loss": 33.7969, "step": 2136 }, { "epoch": 0.020228888405070002, "grad_norm": 3.322514295578003, "learning_rate": 1.3482649842271292e-06, "loss": 0.8223, "step": 2137 }, { "epoch": 0.020238354426785055, "grad_norm": 846.284423828125, "learning_rate": 1.3488958990536277e-06, "loss": 49.125, "step": 2138 }, { "epoch": 0.02024782044850011, "grad_norm": 511.39361572265625, "learning_rate": 1.3495268138801262e-06, "loss": 53.6875, "step": 2139 }, { "epoch": 0.02025728647021516, "grad_norm": 3.100926637649536, "learning_rate": 1.3501577287066245e-06, "loss": 0.853, "step": 2140 }, { "epoch": 0.020266752491930218, "grad_norm": 349.5829772949219, "learning_rate": 1.350788643533123e-06, "loss": 43.5, "step": 2141 }, { "epoch": 0.02027621851364527, "grad_norm": 1338.6728515625, "learning_rate": 1.3514195583596213e-06, "loss": 43.8594, "step": 2142 }, { "epoch": 0.020285684535360324, "grad_norm": 220.35922241210938, "learning_rate": 1.3520504731861198e-06, "loss": 26.6719, "step": 2143 }, { "epoch": 0.020295150557075377, "grad_norm": 301.002685546875, "learning_rate": 1.3526813880126183e-06, "loss": 31.7969, "step": 2144 }, { "epoch": 0.020304616578790433, "grad_norm": 475.5758361816406, "learning_rate": 1.3533123028391168e-06, "loss": 30.125, "step": 2145 }, { "epoch": 0.020314082600505486, "grad_norm": 366.2695007324219, "learning_rate": 1.353943217665615e-06, "loss": 35.7031, "step": 2146 }, { "epoch": 0.02032354862222054, "grad_norm": 324.18475341796875, "learning_rate": 1.3545741324921135e-06, "loss": 29.3125, "step": 2147 }, { "epoch": 0.020333014643935592, "grad_norm": 279.09429931640625, "learning_rate": 1.3552050473186118e-06, "loss": 31.5469, "step": 2148 }, { "epoch": 0.020342480665650645, "grad_norm": 1049.1007080078125, "learning_rate": 1.3558359621451103e-06, "loss": 62.0234, "step": 2149 }, { "epoch": 0.020351946687365702, "grad_norm": 303.7206726074219, "learning_rate": 1.3564668769716088e-06, "loss": 31.8594, "step": 2150 }, { "epoch": 0.020361412709080755, "grad_norm": 383.5340881347656, "learning_rate": 1.3570977917981073e-06, "loss": 31.5781, "step": 2151 }, { "epoch": 0.020370878730795808, "grad_norm": 849.4844970703125, "learning_rate": 1.3577287066246058e-06, "loss": 53.1562, "step": 2152 }, { "epoch": 0.02038034475251086, "grad_norm": 201.6598663330078, "learning_rate": 1.3583596214511039e-06, "loss": 25.8125, "step": 2153 }, { "epoch": 0.020389810774225917, "grad_norm": 487.2587890625, "learning_rate": 1.3589905362776023e-06, "loss": 32.6406, "step": 2154 }, { "epoch": 0.02039927679594097, "grad_norm": 1636.7860107421875, "learning_rate": 1.3596214511041008e-06, "loss": 70.0156, "step": 2155 }, { "epoch": 0.020408742817656023, "grad_norm": 489.4699401855469, "learning_rate": 1.3602523659305993e-06, "loss": 62.8906, "step": 2156 }, { "epoch": 0.020418208839371076, "grad_norm": 499.9373474121094, "learning_rate": 1.3608832807570978e-06, "loss": 33.7656, "step": 2157 }, { "epoch": 0.020427674861086133, "grad_norm": 3.4868760108947754, "learning_rate": 1.3615141955835963e-06, "loss": 1.0005, "step": 2158 }, { "epoch": 0.020437140882801186, "grad_norm": 945.8406982421875, "learning_rate": 1.3621451104100946e-06, "loss": 68.0625, "step": 2159 }, { "epoch": 0.02044660690451624, "grad_norm": 349.6187438964844, "learning_rate": 1.3627760252365929e-06, "loss": 30.1719, "step": 2160 }, { "epoch": 0.020456072926231292, "grad_norm": 459.9718017578125, "learning_rate": 1.3634069400630914e-06, "loss": 31.1328, "step": 2161 }, { "epoch": 0.02046553894794635, "grad_norm": 713.6366577148438, "learning_rate": 1.3640378548895899e-06, "loss": 33.2031, "step": 2162 }, { "epoch": 0.0204750049696614, "grad_norm": 1604.6351318359375, "learning_rate": 1.3646687697160884e-06, "loss": 95.4062, "step": 2163 }, { "epoch": 0.020484470991376454, "grad_norm": 930.7652587890625, "learning_rate": 1.3652996845425866e-06, "loss": 52.6094, "step": 2164 }, { "epoch": 0.020493937013091507, "grad_norm": 540.1885986328125, "learning_rate": 1.3659305993690851e-06, "loss": 37.5625, "step": 2165 }, { "epoch": 0.02050340303480656, "grad_norm": 1086.6595458984375, "learning_rate": 1.3665615141955834e-06, "loss": 44.8672, "step": 2166 }, { "epoch": 0.020512869056521617, "grad_norm": 662.2283325195312, "learning_rate": 1.367192429022082e-06, "loss": 52.4062, "step": 2167 }, { "epoch": 0.02052233507823667, "grad_norm": 1089.257568359375, "learning_rate": 1.3678233438485804e-06, "loss": 49.6953, "step": 2168 }, { "epoch": 0.020531801099951723, "grad_norm": 296.2262268066406, "learning_rate": 1.368454258675079e-06, "loss": 29.0156, "step": 2169 }, { "epoch": 0.020541267121666776, "grad_norm": 475.3573303222656, "learning_rate": 1.3690851735015772e-06, "loss": 38.875, "step": 2170 }, { "epoch": 0.020550733143381832, "grad_norm": 562.4888916015625, "learning_rate": 1.3697160883280757e-06, "loss": 51.1406, "step": 2171 }, { "epoch": 0.020560199165096885, "grad_norm": 613.7973022460938, "learning_rate": 1.3703470031545742e-06, "loss": 48.1172, "step": 2172 }, { "epoch": 0.02056966518681194, "grad_norm": 515.345947265625, "learning_rate": 1.3709779179810724e-06, "loss": 70.4219, "step": 2173 }, { "epoch": 0.02057913120852699, "grad_norm": 290.5258483886719, "learning_rate": 1.371608832807571e-06, "loss": 30.5938, "step": 2174 }, { "epoch": 0.020588597230242048, "grad_norm": 539.18115234375, "learning_rate": 1.3722397476340692e-06, "loss": 51.375, "step": 2175 }, { "epoch": 0.0205980632519571, "grad_norm": 3.3848085403442383, "learning_rate": 1.3728706624605677e-06, "loss": 0.708, "step": 2176 }, { "epoch": 0.020607529273672154, "grad_norm": 299.11480712890625, "learning_rate": 1.3735015772870662e-06, "loss": 33.375, "step": 2177 }, { "epoch": 0.020616995295387207, "grad_norm": 365.3611145019531, "learning_rate": 1.3741324921135647e-06, "loss": 27.7188, "step": 2178 }, { "epoch": 0.02062646131710226, "grad_norm": 348.3011169433594, "learning_rate": 1.3747634069400632e-06, "loss": 30.1641, "step": 2179 }, { "epoch": 0.020635927338817316, "grad_norm": 391.5630798339844, "learning_rate": 1.3753943217665615e-06, "loss": 35.8281, "step": 2180 }, { "epoch": 0.02064539336053237, "grad_norm": 509.5236511230469, "learning_rate": 1.3760252365930598e-06, "loss": 40.4062, "step": 2181 }, { "epoch": 0.020654859382247422, "grad_norm": 558.9053955078125, "learning_rate": 1.3766561514195582e-06, "loss": 63.1406, "step": 2182 }, { "epoch": 0.020664325403962475, "grad_norm": 749.6072998046875, "learning_rate": 1.3772870662460567e-06, "loss": 80.0, "step": 2183 }, { "epoch": 0.020673791425677532, "grad_norm": 619.44287109375, "learning_rate": 1.3779179810725552e-06, "loss": 56.6719, "step": 2184 }, { "epoch": 0.020683257447392585, "grad_norm": 620.7703247070312, "learning_rate": 1.3785488958990537e-06, "loss": 36.9219, "step": 2185 }, { "epoch": 0.020692723469107638, "grad_norm": 729.588134765625, "learning_rate": 1.3791798107255518e-06, "loss": 76.375, "step": 2186 }, { "epoch": 0.02070218949082269, "grad_norm": 761.2530517578125, "learning_rate": 1.3798107255520503e-06, "loss": 91.5625, "step": 2187 }, { "epoch": 0.020711655512537747, "grad_norm": 249.72628784179688, "learning_rate": 1.3804416403785488e-06, "loss": 30.0, "step": 2188 }, { "epoch": 0.0207211215342528, "grad_norm": 1183.1383056640625, "learning_rate": 1.3810725552050473e-06, "loss": 50.9766, "step": 2189 }, { "epoch": 0.020730587555967853, "grad_norm": 448.05950927734375, "learning_rate": 1.3817034700315458e-06, "loss": 37.0625, "step": 2190 }, { "epoch": 0.020740053577682906, "grad_norm": 640.8497314453125, "learning_rate": 1.3823343848580443e-06, "loss": 78.9375, "step": 2191 }, { "epoch": 0.02074951959939796, "grad_norm": 282.38458251953125, "learning_rate": 1.3829652996845425e-06, "loss": 27.5938, "step": 2192 }, { "epoch": 0.020758985621113016, "grad_norm": 824.2235717773438, "learning_rate": 1.3835962145110408e-06, "loss": 72.0625, "step": 2193 }, { "epoch": 0.02076845164282807, "grad_norm": 846.9154052734375, "learning_rate": 1.3842271293375393e-06, "loss": 78.6562, "step": 2194 }, { "epoch": 0.020777917664543122, "grad_norm": 758.4829711914062, "learning_rate": 1.3848580441640378e-06, "loss": 78.0781, "step": 2195 }, { "epoch": 0.020787383686258175, "grad_norm": 680.2816772460938, "learning_rate": 1.3854889589905363e-06, "loss": 42.4688, "step": 2196 }, { "epoch": 0.02079684970797323, "grad_norm": 831.1616821289062, "learning_rate": 1.3861198738170346e-06, "loss": 33.6562, "step": 2197 }, { "epoch": 0.020806315729688284, "grad_norm": 780.0845336914062, "learning_rate": 1.386750788643533e-06, "loss": 33.25, "step": 2198 }, { "epoch": 0.020815781751403337, "grad_norm": 2.9668502807617188, "learning_rate": 1.3873817034700316e-06, "loss": 0.7876, "step": 2199 }, { "epoch": 0.02082524777311839, "grad_norm": 375.05340576171875, "learning_rate": 1.3880126182965298e-06, "loss": 40.5312, "step": 2200 }, { "epoch": 0.020834713794833447, "grad_norm": 515.5603637695312, "learning_rate": 1.3886435331230283e-06, "loss": 46.2812, "step": 2201 }, { "epoch": 0.0208441798165485, "grad_norm": 375.2019958496094, "learning_rate": 1.3892744479495266e-06, "loss": 41.9062, "step": 2202 }, { "epoch": 0.020853645838263553, "grad_norm": 315.7974548339844, "learning_rate": 1.3899053627760251e-06, "loss": 29.2031, "step": 2203 }, { "epoch": 0.020863111859978606, "grad_norm": 1154.222900390625, "learning_rate": 1.3905362776025236e-06, "loss": 35.8594, "step": 2204 }, { "epoch": 0.020872577881693662, "grad_norm": 321.7087097167969, "learning_rate": 1.391167192429022e-06, "loss": 31.5938, "step": 2205 }, { "epoch": 0.020882043903408715, "grad_norm": 1726.2049560546875, "learning_rate": 1.3917981072555206e-06, "loss": 61.75, "step": 2206 }, { "epoch": 0.02089150992512377, "grad_norm": 326.9066467285156, "learning_rate": 1.3924290220820189e-06, "loss": 29.7188, "step": 2207 }, { "epoch": 0.02090097594683882, "grad_norm": 411.2711486816406, "learning_rate": 1.3930599369085172e-06, "loss": 41.2344, "step": 2208 }, { "epoch": 0.020910441968553874, "grad_norm": 521.641357421875, "learning_rate": 1.3936908517350156e-06, "loss": 64.8438, "step": 2209 }, { "epoch": 0.02091990799026893, "grad_norm": 1179.251708984375, "learning_rate": 1.3943217665615141e-06, "loss": 86.3125, "step": 2210 }, { "epoch": 0.020929374011983984, "grad_norm": 493.3608093261719, "learning_rate": 1.3949526813880126e-06, "loss": 55.8906, "step": 2211 }, { "epoch": 0.020938840033699037, "grad_norm": 267.9371643066406, "learning_rate": 1.3955835962145111e-06, "loss": 32.8594, "step": 2212 }, { "epoch": 0.02094830605541409, "grad_norm": 585.3629760742188, "learning_rate": 1.3962145110410092e-06, "loss": 33.3516, "step": 2213 }, { "epoch": 0.020957772077129146, "grad_norm": 755.7814331054688, "learning_rate": 1.3968454258675077e-06, "loss": 70.6875, "step": 2214 }, { "epoch": 0.0209672380988442, "grad_norm": 326.96282958984375, "learning_rate": 1.3974763406940062e-06, "loss": 33.6562, "step": 2215 }, { "epoch": 0.020976704120559252, "grad_norm": 1201.5894775390625, "learning_rate": 1.3981072555205047e-06, "loss": 52.5312, "step": 2216 }, { "epoch": 0.020986170142274305, "grad_norm": 757.2534790039062, "learning_rate": 1.3987381703470032e-06, "loss": 81.5234, "step": 2217 }, { "epoch": 0.020995636163989362, "grad_norm": 1139.031982421875, "learning_rate": 1.3993690851735017e-06, "loss": 65.8594, "step": 2218 }, { "epoch": 0.021005102185704415, "grad_norm": 486.8087158203125, "learning_rate": 1.4e-06, "loss": 31.1016, "step": 2219 }, { "epoch": 0.021014568207419468, "grad_norm": 552.7048950195312, "learning_rate": 1.4006309148264982e-06, "loss": 67.75, "step": 2220 }, { "epoch": 0.02102403422913452, "grad_norm": 458.4835205078125, "learning_rate": 1.4012618296529967e-06, "loss": 39.4375, "step": 2221 }, { "epoch": 0.021033500250849574, "grad_norm": 206.02810668945312, "learning_rate": 1.4018927444794952e-06, "loss": 29.2031, "step": 2222 }, { "epoch": 0.02104296627256463, "grad_norm": 266.6800842285156, "learning_rate": 1.4025236593059937e-06, "loss": 28.9062, "step": 2223 }, { "epoch": 0.021052432294279683, "grad_norm": 638.2230834960938, "learning_rate": 1.403154574132492e-06, "loss": 27.6562, "step": 2224 }, { "epoch": 0.021061898315994736, "grad_norm": 515.5881958007812, "learning_rate": 1.4037854889589905e-06, "loss": 69.5312, "step": 2225 }, { "epoch": 0.02107136433770979, "grad_norm": 897.3941040039062, "learning_rate": 1.404416403785489e-06, "loss": 32.9219, "step": 2226 }, { "epoch": 0.021080830359424846, "grad_norm": 254.4386444091797, "learning_rate": 1.4050473186119872e-06, "loss": 37.2031, "step": 2227 }, { "epoch": 0.0210902963811399, "grad_norm": 308.8963317871094, "learning_rate": 1.4056782334384857e-06, "loss": 32.5469, "step": 2228 }, { "epoch": 0.021099762402854952, "grad_norm": 591.424072265625, "learning_rate": 1.4063091482649842e-06, "loss": 83.625, "step": 2229 }, { "epoch": 0.021109228424570005, "grad_norm": 252.09744262695312, "learning_rate": 1.4069400630914825e-06, "loss": 29.3281, "step": 2230 }, { "epoch": 0.02111869444628506, "grad_norm": 255.0266571044922, "learning_rate": 1.407570977917981e-06, "loss": 32.125, "step": 2231 }, { "epoch": 0.021128160468000114, "grad_norm": 748.6367797851562, "learning_rate": 1.4082018927444795e-06, "loss": 73.5, "step": 2232 }, { "epoch": 0.021137626489715167, "grad_norm": 417.3565368652344, "learning_rate": 1.408832807570978e-06, "loss": 38.5, "step": 2233 }, { "epoch": 0.02114709251143022, "grad_norm": 711.2984619140625, "learning_rate": 1.4094637223974763e-06, "loss": 37.375, "step": 2234 }, { "epoch": 0.021156558533145273, "grad_norm": 494.2431640625, "learning_rate": 1.4100946372239746e-06, "loss": 51.6719, "step": 2235 }, { "epoch": 0.02116602455486033, "grad_norm": 476.32330322265625, "learning_rate": 1.410725552050473e-06, "loss": 68.3438, "step": 2236 }, { "epoch": 0.021175490576575383, "grad_norm": 387.900390625, "learning_rate": 1.4113564668769715e-06, "loss": 62.5, "step": 2237 }, { "epoch": 0.021184956598290436, "grad_norm": 3.5105319023132324, "learning_rate": 1.41198738170347e-06, "loss": 0.9607, "step": 2238 }, { "epoch": 0.02119442262000549, "grad_norm": 401.2019958496094, "learning_rate": 1.4126182965299685e-06, "loss": 24.9688, "step": 2239 }, { "epoch": 0.021203888641720545, "grad_norm": 440.0290222167969, "learning_rate": 1.4132492113564668e-06, "loss": 35.5781, "step": 2240 }, { "epoch": 0.0212133546634356, "grad_norm": 439.07598876953125, "learning_rate": 1.413880126182965e-06, "loss": 29.9688, "step": 2241 }, { "epoch": 0.02122282068515065, "grad_norm": 866.3509521484375, "learning_rate": 1.4145110410094636e-06, "loss": 37.7109, "step": 2242 }, { "epoch": 0.021232286706865704, "grad_norm": 584.0252075195312, "learning_rate": 1.415141955835962e-06, "loss": 30.5781, "step": 2243 }, { "epoch": 0.02124175272858076, "grad_norm": 909.0205078125, "learning_rate": 1.4157728706624606e-06, "loss": 36.9922, "step": 2244 }, { "epoch": 0.021251218750295814, "grad_norm": 1023.5075073242188, "learning_rate": 1.416403785488959e-06, "loss": 71.7344, "step": 2245 }, { "epoch": 0.021260684772010867, "grad_norm": 956.8889770507812, "learning_rate": 1.4170347003154573e-06, "loss": 34.2344, "step": 2246 }, { "epoch": 0.02127015079372592, "grad_norm": 444.9805603027344, "learning_rate": 1.4176656151419556e-06, "loss": 33.1875, "step": 2247 }, { "epoch": 0.021279616815440973, "grad_norm": 876.4585571289062, "learning_rate": 1.4182965299684541e-06, "loss": 58.3125, "step": 2248 }, { "epoch": 0.02128908283715603, "grad_norm": 602.4058837890625, "learning_rate": 1.4189274447949526e-06, "loss": 71.5938, "step": 2249 }, { "epoch": 0.021298548858871082, "grad_norm": 973.3199462890625, "learning_rate": 1.419558359621451e-06, "loss": 49.2812, "step": 2250 }, { "epoch": 0.021308014880586135, "grad_norm": 1073.0609130859375, "learning_rate": 1.4201892744479496e-06, "loss": 60.9219, "step": 2251 }, { "epoch": 0.02131748090230119, "grad_norm": 749.9246826171875, "learning_rate": 1.4208201892744479e-06, "loss": 71.9141, "step": 2252 }, { "epoch": 0.021326946924016245, "grad_norm": 470.78277587890625, "learning_rate": 1.4214511041009464e-06, "loss": 46.3281, "step": 2253 }, { "epoch": 0.021336412945731298, "grad_norm": 2.87326717376709, "learning_rate": 1.4220820189274446e-06, "loss": 0.8613, "step": 2254 }, { "epoch": 0.02134587896744635, "grad_norm": 669.472900390625, "learning_rate": 1.4227129337539431e-06, "loss": 48.9062, "step": 2255 }, { "epoch": 0.021355344989161404, "grad_norm": 645.2957763671875, "learning_rate": 1.4233438485804416e-06, "loss": 48.0, "step": 2256 }, { "epoch": 0.02136481101087646, "grad_norm": 389.7939453125, "learning_rate": 1.42397476340694e-06, "loss": 37.0469, "step": 2257 }, { "epoch": 0.021374277032591513, "grad_norm": 634.0523071289062, "learning_rate": 1.4246056782334384e-06, "loss": 35.4062, "step": 2258 }, { "epoch": 0.021383743054306566, "grad_norm": 292.4303894042969, "learning_rate": 1.425236593059937e-06, "loss": 31.9531, "step": 2259 }, { "epoch": 0.02139320907602162, "grad_norm": 853.3124389648438, "learning_rate": 1.4258675078864352e-06, "loss": 34.4688, "step": 2260 }, { "epoch": 0.021402675097736676, "grad_norm": 365.15484619140625, "learning_rate": 1.4264984227129337e-06, "loss": 32.2812, "step": 2261 }, { "epoch": 0.02141214111945173, "grad_norm": 3.1946399211883545, "learning_rate": 1.4271293375394322e-06, "loss": 0.9727, "step": 2262 }, { "epoch": 0.021421607141166782, "grad_norm": 744.307373046875, "learning_rate": 1.4277602523659304e-06, "loss": 67.1875, "step": 2263 }, { "epoch": 0.021431073162881835, "grad_norm": 719.1862182617188, "learning_rate": 1.428391167192429e-06, "loss": 63.0781, "step": 2264 }, { "epoch": 0.021440539184596888, "grad_norm": 3.0538268089294434, "learning_rate": 1.4290220820189274e-06, "loss": 0.8662, "step": 2265 }, { "epoch": 0.021450005206311944, "grad_norm": 273.0624084472656, "learning_rate": 1.429652996845426e-06, "loss": 30.7656, "step": 2266 }, { "epoch": 0.021459471228026997, "grad_norm": 321.3061218261719, "learning_rate": 1.4302839116719242e-06, "loss": 46.1719, "step": 2267 }, { "epoch": 0.02146893724974205, "grad_norm": 362.7535400390625, "learning_rate": 1.4309148264984225e-06, "loss": 30.5938, "step": 2268 }, { "epoch": 0.021478403271457103, "grad_norm": 4.036404609680176, "learning_rate": 1.431545741324921e-06, "loss": 0.9126, "step": 2269 }, { "epoch": 0.02148786929317216, "grad_norm": 680.1099853515625, "learning_rate": 1.4321766561514195e-06, "loss": 52.9688, "step": 2270 }, { "epoch": 0.021497335314887213, "grad_norm": 353.4436950683594, "learning_rate": 1.432807570977918e-06, "loss": 33.0938, "step": 2271 }, { "epoch": 0.021506801336602266, "grad_norm": 352.1375732421875, "learning_rate": 1.4334384858044165e-06, "loss": 33.7656, "step": 2272 }, { "epoch": 0.02151626735831732, "grad_norm": 2.380692958831787, "learning_rate": 1.434069400630915e-06, "loss": 0.8208, "step": 2273 }, { "epoch": 0.021525733380032375, "grad_norm": 214.3273468017578, "learning_rate": 1.434700315457413e-06, "loss": 27.2812, "step": 2274 }, { "epoch": 0.02153519940174743, "grad_norm": 1165.9039306640625, "learning_rate": 1.4353312302839115e-06, "loss": 65.0938, "step": 2275 }, { "epoch": 0.02154466542346248, "grad_norm": 1169.017822265625, "learning_rate": 1.43596214511041e-06, "loss": 48.0039, "step": 2276 }, { "epoch": 0.021554131445177534, "grad_norm": 959.5003051757812, "learning_rate": 1.4365930599369085e-06, "loss": 71.0, "step": 2277 }, { "epoch": 0.021563597466892587, "grad_norm": 851.91796875, "learning_rate": 1.437223974763407e-06, "loss": 64.3438, "step": 2278 }, { "epoch": 0.021573063488607644, "grad_norm": 289.9927673339844, "learning_rate": 1.4378548895899053e-06, "loss": 37.3594, "step": 2279 }, { "epoch": 0.021582529510322697, "grad_norm": 3.5733699798583984, "learning_rate": 1.4384858044164038e-06, "loss": 0.8271, "step": 2280 }, { "epoch": 0.02159199553203775, "grad_norm": 2.9456920623779297, "learning_rate": 1.439116719242902e-06, "loss": 0.8926, "step": 2281 }, { "epoch": 0.021601461553752803, "grad_norm": 345.3650207519531, "learning_rate": 1.4397476340694005e-06, "loss": 32.2188, "step": 2282 }, { "epoch": 0.02161092757546786, "grad_norm": 495.00103759765625, "learning_rate": 1.440378548895899e-06, "loss": 26.9844, "step": 2283 }, { "epoch": 0.021620393597182912, "grad_norm": 279.93658447265625, "learning_rate": 1.4410094637223975e-06, "loss": 45.5469, "step": 2284 }, { "epoch": 0.021629859618897965, "grad_norm": 742.8081665039062, "learning_rate": 1.4416403785488958e-06, "loss": 67.0469, "step": 2285 }, { "epoch": 0.02163932564061302, "grad_norm": 1420.2320556640625, "learning_rate": 1.4422712933753943e-06, "loss": 39.4766, "step": 2286 }, { "epoch": 0.021648791662328075, "grad_norm": 552.4638061523438, "learning_rate": 1.4429022082018926e-06, "loss": 44.7812, "step": 2287 }, { "epoch": 0.021658257684043128, "grad_norm": 1008.65380859375, "learning_rate": 1.443533123028391e-06, "loss": 50.9766, "step": 2288 }, { "epoch": 0.02166772370575818, "grad_norm": 340.0715026855469, "learning_rate": 1.4441640378548896e-06, "loss": 27.3594, "step": 2289 }, { "epoch": 0.021677189727473234, "grad_norm": 451.9587097167969, "learning_rate": 1.4447949526813878e-06, "loss": 43.6094, "step": 2290 }, { "epoch": 0.021686655749188287, "grad_norm": 290.93792724609375, "learning_rate": 1.4454258675078863e-06, "loss": 34.5938, "step": 2291 }, { "epoch": 0.021696121770903343, "grad_norm": 261.5824890136719, "learning_rate": 1.4460567823343848e-06, "loss": 31.7031, "step": 2292 }, { "epoch": 0.021705587792618396, "grad_norm": 266.9480895996094, "learning_rate": 1.4466876971608833e-06, "loss": 35.9844, "step": 2293 }, { "epoch": 0.02171505381433345, "grad_norm": 269.03546142578125, "learning_rate": 1.4473186119873816e-06, "loss": 31.4844, "step": 2294 }, { "epoch": 0.021724519836048502, "grad_norm": 397.3854675292969, "learning_rate": 1.44794952681388e-06, "loss": 33.0312, "step": 2295 }, { "epoch": 0.02173398585776356, "grad_norm": 405.61083984375, "learning_rate": 1.4485804416403784e-06, "loss": 74.9375, "step": 2296 }, { "epoch": 0.021743451879478612, "grad_norm": 2.845357656478882, "learning_rate": 1.4492113564668769e-06, "loss": 0.9363, "step": 2297 }, { "epoch": 0.021752917901193665, "grad_norm": 455.04815673828125, "learning_rate": 1.4498422712933754e-06, "loss": 32.5625, "step": 2298 }, { "epoch": 0.021762383922908718, "grad_norm": 388.98828125, "learning_rate": 1.4504731861198739e-06, "loss": 28.9688, "step": 2299 }, { "epoch": 0.021771849944623774, "grad_norm": 1282.2784423828125, "learning_rate": 1.4511041009463723e-06, "loss": 42.4766, "step": 2300 }, { "epoch": 0.021781315966338827, "grad_norm": 232.36578369140625, "learning_rate": 1.4517350157728704e-06, "loss": 29.7188, "step": 2301 }, { "epoch": 0.02179078198805388, "grad_norm": 427.59478759765625, "learning_rate": 1.452365930599369e-06, "loss": 34.2812, "step": 2302 }, { "epoch": 0.021800248009768933, "grad_norm": 453.0247802734375, "learning_rate": 1.4529968454258674e-06, "loss": 62.875, "step": 2303 }, { "epoch": 0.02180971403148399, "grad_norm": 277.156982421875, "learning_rate": 1.453627760252366e-06, "loss": 37.8438, "step": 2304 }, { "epoch": 0.021819180053199043, "grad_norm": 519.8728637695312, "learning_rate": 1.4542586750788644e-06, "loss": 58.6094, "step": 2305 }, { "epoch": 0.021828646074914096, "grad_norm": 688.0687866210938, "learning_rate": 1.4548895899053629e-06, "loss": 61.8125, "step": 2306 }, { "epoch": 0.02183811209662915, "grad_norm": 3.42303729057312, "learning_rate": 1.455520504731861e-06, "loss": 0.9102, "step": 2307 }, { "epoch": 0.021847578118344202, "grad_norm": 362.2822265625, "learning_rate": 1.4561514195583594e-06, "loss": 31.75, "step": 2308 }, { "epoch": 0.02185704414005926, "grad_norm": 498.8395080566406, "learning_rate": 1.456782334384858e-06, "loss": 24.3672, "step": 2309 }, { "epoch": 0.02186651016177431, "grad_norm": 371.4609069824219, "learning_rate": 1.4574132492113564e-06, "loss": 42.5156, "step": 2310 }, { "epoch": 0.021875976183489364, "grad_norm": 691.4654541015625, "learning_rate": 1.458044164037855e-06, "loss": 83.4688, "step": 2311 }, { "epoch": 0.021885442205204417, "grad_norm": 264.5074157714844, "learning_rate": 1.4586750788643532e-06, "loss": 30.2031, "step": 2312 }, { "epoch": 0.021894908226919474, "grad_norm": 298.0083923339844, "learning_rate": 1.4593059936908517e-06, "loss": 29.9531, "step": 2313 }, { "epoch": 0.021904374248634527, "grad_norm": 475.6119384765625, "learning_rate": 1.45993690851735e-06, "loss": 40.7656, "step": 2314 }, { "epoch": 0.02191384027034958, "grad_norm": 583.7343139648438, "learning_rate": 1.4605678233438485e-06, "loss": 41.6484, "step": 2315 }, { "epoch": 0.021923306292064633, "grad_norm": 623.4053344726562, "learning_rate": 1.461198738170347e-06, "loss": 66.9062, "step": 2316 }, { "epoch": 0.02193277231377969, "grad_norm": 426.6863708496094, "learning_rate": 1.4618296529968455e-06, "loss": 30.6719, "step": 2317 }, { "epoch": 0.021942238335494742, "grad_norm": 760.4212036132812, "learning_rate": 1.4624605678233437e-06, "loss": 67.5, "step": 2318 }, { "epoch": 0.021951704357209795, "grad_norm": 666.774658203125, "learning_rate": 1.4630914826498422e-06, "loss": 68.6094, "step": 2319 }, { "epoch": 0.02196117037892485, "grad_norm": 345.9541015625, "learning_rate": 1.4637223974763407e-06, "loss": 29.0, "step": 2320 }, { "epoch": 0.0219706364006399, "grad_norm": 309.9389343261719, "learning_rate": 1.464353312302839e-06, "loss": 34.8906, "step": 2321 }, { "epoch": 0.021980102422354958, "grad_norm": 496.60577392578125, "learning_rate": 1.4649842271293375e-06, "loss": 72.7344, "step": 2322 }, { "epoch": 0.02198956844407001, "grad_norm": 1144.9000244140625, "learning_rate": 1.4656151419558358e-06, "loss": 33.6562, "step": 2323 }, { "epoch": 0.021999034465785064, "grad_norm": 361.1702575683594, "learning_rate": 1.4662460567823343e-06, "loss": 36.9219, "step": 2324 }, { "epoch": 0.022008500487500117, "grad_norm": 833.6679077148438, "learning_rate": 1.4668769716088328e-06, "loss": 48.5938, "step": 2325 }, { "epoch": 0.022017966509215173, "grad_norm": 1110.737548828125, "learning_rate": 1.4675078864353313e-06, "loss": 41.7344, "step": 2326 }, { "epoch": 0.022027432530930226, "grad_norm": 3.564650297164917, "learning_rate": 1.4681388012618298e-06, "loss": 1.0181, "step": 2327 }, { "epoch": 0.02203689855264528, "grad_norm": 600.9856567382812, "learning_rate": 1.468769716088328e-06, "loss": 77.875, "step": 2328 }, { "epoch": 0.022046364574360332, "grad_norm": 983.251220703125, "learning_rate": 1.4694006309148263e-06, "loss": 50.9219, "step": 2329 }, { "epoch": 0.02205583059607539, "grad_norm": 2.533923864364624, "learning_rate": 1.4700315457413248e-06, "loss": 0.7642, "step": 2330 }, { "epoch": 0.022065296617790442, "grad_norm": 505.85845947265625, "learning_rate": 1.4706624605678233e-06, "loss": 40.7422, "step": 2331 }, { "epoch": 0.022074762639505495, "grad_norm": 583.2932739257812, "learning_rate": 1.4712933753943218e-06, "loss": 64.7344, "step": 2332 }, { "epoch": 0.022084228661220548, "grad_norm": 292.03143310546875, "learning_rate": 1.4719242902208203e-06, "loss": 25.7969, "step": 2333 }, { "epoch": 0.0220936946829356, "grad_norm": 440.4851379394531, "learning_rate": 1.4725552050473184e-06, "loss": 35.625, "step": 2334 }, { "epoch": 0.022103160704650657, "grad_norm": 474.417724609375, "learning_rate": 1.4731861198738168e-06, "loss": 45.1406, "step": 2335 }, { "epoch": 0.02211262672636571, "grad_norm": 221.05162048339844, "learning_rate": 1.4738170347003153e-06, "loss": 29.8281, "step": 2336 }, { "epoch": 0.022122092748080763, "grad_norm": 406.9820861816406, "learning_rate": 1.4744479495268138e-06, "loss": 41.9375, "step": 2337 }, { "epoch": 0.022131558769795816, "grad_norm": 714.3577270507812, "learning_rate": 1.4750788643533123e-06, "loss": 41.1875, "step": 2338 }, { "epoch": 0.022141024791510873, "grad_norm": 1455.33349609375, "learning_rate": 1.4757097791798108e-06, "loss": 84.6562, "step": 2339 }, { "epoch": 0.022150490813225926, "grad_norm": 306.5000305175781, "learning_rate": 1.476340694006309e-06, "loss": 38.5938, "step": 2340 }, { "epoch": 0.02215995683494098, "grad_norm": 709.8580322265625, "learning_rate": 1.4769716088328074e-06, "loss": 80.7812, "step": 2341 }, { "epoch": 0.022169422856656032, "grad_norm": 1080.921142578125, "learning_rate": 1.4776025236593059e-06, "loss": 52.0859, "step": 2342 }, { "epoch": 0.02217888887837109, "grad_norm": 738.951904296875, "learning_rate": 1.4782334384858044e-06, "loss": 58.9531, "step": 2343 }, { "epoch": 0.02218835490008614, "grad_norm": 1164.78369140625, "learning_rate": 1.4788643533123029e-06, "loss": 64.3672, "step": 2344 }, { "epoch": 0.022197820921801194, "grad_norm": 615.897705078125, "learning_rate": 1.4794952681388011e-06, "loss": 54.9766, "step": 2345 }, { "epoch": 0.022207286943516247, "grad_norm": 483.5643005371094, "learning_rate": 1.4801261829652996e-06, "loss": 58.0156, "step": 2346 }, { "epoch": 0.022216752965231304, "grad_norm": 262.18603515625, "learning_rate": 1.4807570977917981e-06, "loss": 27.25, "step": 2347 }, { "epoch": 0.022226218986946357, "grad_norm": 290.754150390625, "learning_rate": 1.4813880126182964e-06, "loss": 29.9844, "step": 2348 }, { "epoch": 0.02223568500866141, "grad_norm": 928.41845703125, "learning_rate": 1.482018927444795e-06, "loss": 30.7344, "step": 2349 }, { "epoch": 0.022245151030376463, "grad_norm": 1181.9990234375, "learning_rate": 1.4826498422712934e-06, "loss": 55.0938, "step": 2350 }, { "epoch": 0.022254617052091516, "grad_norm": 370.62176513671875, "learning_rate": 1.4832807570977917e-06, "loss": 29.0156, "step": 2351 }, { "epoch": 0.022264083073806572, "grad_norm": 258.2720031738281, "learning_rate": 1.4839116719242902e-06, "loss": 26.8594, "step": 2352 }, { "epoch": 0.022273549095521625, "grad_norm": 609.595703125, "learning_rate": 1.4845425867507887e-06, "loss": 48.7109, "step": 2353 }, { "epoch": 0.02228301511723668, "grad_norm": 252.6787872314453, "learning_rate": 1.4851735015772872e-06, "loss": 27.9062, "step": 2354 }, { "epoch": 0.02229248113895173, "grad_norm": 688.6142578125, "learning_rate": 1.4858044164037854e-06, "loss": 44.5938, "step": 2355 }, { "epoch": 0.022301947160666788, "grad_norm": 369.186279296875, "learning_rate": 1.4864353312302837e-06, "loss": 37.75, "step": 2356 }, { "epoch": 0.02231141318238184, "grad_norm": 589.7089233398438, "learning_rate": 1.4870662460567822e-06, "loss": 55.375, "step": 2357 }, { "epoch": 0.022320879204096894, "grad_norm": 868.7932739257812, "learning_rate": 1.4876971608832807e-06, "loss": 24.0156, "step": 2358 }, { "epoch": 0.022330345225811947, "grad_norm": 2666.613037109375, "learning_rate": 1.4883280757097792e-06, "loss": 34.2812, "step": 2359 }, { "epoch": 0.022339811247527003, "grad_norm": 330.0297546386719, "learning_rate": 1.4889589905362777e-06, "loss": 28.5156, "step": 2360 }, { "epoch": 0.022349277269242056, "grad_norm": 252.2090301513672, "learning_rate": 1.489589905362776e-06, "loss": 29.5938, "step": 2361 }, { "epoch": 0.02235874329095711, "grad_norm": 414.0161437988281, "learning_rate": 1.4902208201892742e-06, "loss": 41.4219, "step": 2362 }, { "epoch": 0.022368209312672162, "grad_norm": 2.931870222091675, "learning_rate": 1.4908517350157727e-06, "loss": 1.0073, "step": 2363 }, { "epoch": 0.022377675334387215, "grad_norm": 406.5690002441406, "learning_rate": 1.4914826498422712e-06, "loss": 39.7031, "step": 2364 }, { "epoch": 0.022387141356102272, "grad_norm": 2.869478225708008, "learning_rate": 1.4921135646687697e-06, "loss": 0.7725, "step": 2365 }, { "epoch": 0.022396607377817325, "grad_norm": 573.6174926757812, "learning_rate": 1.4927444794952682e-06, "loss": 83.7812, "step": 2366 }, { "epoch": 0.022406073399532378, "grad_norm": 296.5265197753906, "learning_rate": 1.4933753943217665e-06, "loss": 30.6719, "step": 2367 }, { "epoch": 0.02241553942124743, "grad_norm": 872.9573364257812, "learning_rate": 1.4940063091482648e-06, "loss": 32.4375, "step": 2368 }, { "epoch": 0.022425005442962487, "grad_norm": 374.91571044921875, "learning_rate": 1.4946372239747633e-06, "loss": 44.2812, "step": 2369 }, { "epoch": 0.02243447146467754, "grad_norm": 217.17263793945312, "learning_rate": 1.4952681388012618e-06, "loss": 28.75, "step": 2370 }, { "epoch": 0.022443937486392593, "grad_norm": 358.05438232421875, "learning_rate": 1.4958990536277603e-06, "loss": 31.0625, "step": 2371 }, { "epoch": 0.022453403508107646, "grad_norm": 380.25238037109375, "learning_rate": 1.4965299684542588e-06, "loss": 27.7109, "step": 2372 }, { "epoch": 0.022462869529822703, "grad_norm": 1144.12646484375, "learning_rate": 1.497160883280757e-06, "loss": 58.4375, "step": 2373 }, { "epoch": 0.022472335551537756, "grad_norm": 3.1962244510650635, "learning_rate": 1.4977917981072555e-06, "loss": 0.9067, "step": 2374 }, { "epoch": 0.02248180157325281, "grad_norm": 474.6938781738281, "learning_rate": 1.4984227129337538e-06, "loss": 42.8906, "step": 2375 }, { "epoch": 0.022491267594967862, "grad_norm": 236.0217742919922, "learning_rate": 1.4990536277602523e-06, "loss": 31.4062, "step": 2376 }, { "epoch": 0.022500733616682915, "grad_norm": 1512.5703125, "learning_rate": 1.4996845425867508e-06, "loss": 34.6094, "step": 2377 }, { "epoch": 0.02251019963839797, "grad_norm": 328.22357177734375, "learning_rate": 1.500315457413249e-06, "loss": 45.4688, "step": 2378 }, { "epoch": 0.022519665660113024, "grad_norm": 273.11151123046875, "learning_rate": 1.5009463722397476e-06, "loss": 31.625, "step": 2379 }, { "epoch": 0.022529131681828077, "grad_norm": 455.5160217285156, "learning_rate": 1.501577287066246e-06, "loss": 51.6875, "step": 2380 }, { "epoch": 0.02253859770354313, "grad_norm": 711.7984619140625, "learning_rate": 1.5022082018927443e-06, "loss": 68.5938, "step": 2381 }, { "epoch": 0.022548063725258187, "grad_norm": 554.297607421875, "learning_rate": 1.5028391167192428e-06, "loss": 28.7969, "step": 2382 }, { "epoch": 0.02255752974697324, "grad_norm": 1103.74267578125, "learning_rate": 1.5034700315457411e-06, "loss": 61.9531, "step": 2383 }, { "epoch": 0.022566995768688293, "grad_norm": 393.7529602050781, "learning_rate": 1.5041009463722396e-06, "loss": 31.2656, "step": 2384 }, { "epoch": 0.022576461790403346, "grad_norm": 240.2495880126953, "learning_rate": 1.504731861198738e-06, "loss": 28.6406, "step": 2385 }, { "epoch": 0.022585927812118402, "grad_norm": 518.358154296875, "learning_rate": 1.5053627760252366e-06, "loss": 35.8828, "step": 2386 }, { "epoch": 0.022595393833833455, "grad_norm": 314.09832763671875, "learning_rate": 1.505993690851735e-06, "loss": 34.375, "step": 2387 }, { "epoch": 0.02260485985554851, "grad_norm": 469.20672607421875, "learning_rate": 1.5066246056782334e-06, "loss": 32.9219, "step": 2388 }, { "epoch": 0.02261432587726356, "grad_norm": 883.1495971679688, "learning_rate": 1.5072555205047316e-06, "loss": 40.1094, "step": 2389 }, { "epoch": 0.022623791898978618, "grad_norm": 441.7584533691406, "learning_rate": 1.5078864353312301e-06, "loss": 32.9844, "step": 2390 }, { "epoch": 0.02263325792069367, "grad_norm": 396.71051025390625, "learning_rate": 1.5085173501577286e-06, "loss": 32.2656, "step": 2391 }, { "epoch": 0.022642723942408724, "grad_norm": 419.2718505859375, "learning_rate": 1.5091482649842271e-06, "loss": 40.0625, "step": 2392 }, { "epoch": 0.022652189964123777, "grad_norm": 742.7787475585938, "learning_rate": 1.5097791798107256e-06, "loss": 75.6094, "step": 2393 }, { "epoch": 0.02266165598583883, "grad_norm": 878.5562744140625, "learning_rate": 1.510410094637224e-06, "loss": 73.125, "step": 2394 }, { "epoch": 0.022671122007553886, "grad_norm": 282.17034912109375, "learning_rate": 1.5110410094637222e-06, "loss": 29.6875, "step": 2395 }, { "epoch": 0.02268058802926894, "grad_norm": 1083.0106201171875, "learning_rate": 1.5116719242902207e-06, "loss": 44.5859, "step": 2396 }, { "epoch": 0.022690054050983993, "grad_norm": 1024.00390625, "learning_rate": 1.5123028391167192e-06, "loss": 64.7812, "step": 2397 }, { "epoch": 0.022699520072699046, "grad_norm": 758.758056640625, "learning_rate": 1.5129337539432177e-06, "loss": 83.3125, "step": 2398 }, { "epoch": 0.022708986094414102, "grad_norm": 518.4668579101562, "learning_rate": 1.5135646687697162e-06, "loss": 50.5938, "step": 2399 }, { "epoch": 0.022718452116129155, "grad_norm": 287.4397888183594, "learning_rate": 1.5141955835962144e-06, "loss": 30.5781, "step": 2400 }, { "epoch": 0.022727918137844208, "grad_norm": 256.382080078125, "learning_rate": 1.5148264984227127e-06, "loss": 28.9062, "step": 2401 }, { "epoch": 0.02273738415955926, "grad_norm": 569.135009765625, "learning_rate": 1.5154574132492112e-06, "loss": 38.8594, "step": 2402 }, { "epoch": 0.022746850181274317, "grad_norm": 1330.0994873046875, "learning_rate": 1.5160883280757097e-06, "loss": 69.625, "step": 2403 }, { "epoch": 0.02275631620298937, "grad_norm": 688.12548828125, "learning_rate": 1.5167192429022082e-06, "loss": 36.6562, "step": 2404 }, { "epoch": 0.022765782224704424, "grad_norm": 908.9646606445312, "learning_rate": 1.5173501577287065e-06, "loss": 92.2188, "step": 2405 }, { "epoch": 0.022775248246419477, "grad_norm": 2.612150192260742, "learning_rate": 1.517981072555205e-06, "loss": 0.8306, "step": 2406 }, { "epoch": 0.02278471426813453, "grad_norm": 321.3191223144531, "learning_rate": 1.5186119873817035e-06, "loss": 32.4844, "step": 2407 }, { "epoch": 0.022794180289849586, "grad_norm": 584.8010864257812, "learning_rate": 1.5192429022082017e-06, "loss": 31.875, "step": 2408 }, { "epoch": 0.02280364631156464, "grad_norm": 325.9168395996094, "learning_rate": 1.5198738170347002e-06, "loss": 30.75, "step": 2409 }, { "epoch": 0.022813112333279692, "grad_norm": 277.4589538574219, "learning_rate": 1.5205047318611987e-06, "loss": 43.6094, "step": 2410 }, { "epoch": 0.022822578354994745, "grad_norm": 448.549560546875, "learning_rate": 1.521135646687697e-06, "loss": 32.6875, "step": 2411 }, { "epoch": 0.0228320443767098, "grad_norm": 583.8766479492188, "learning_rate": 1.5217665615141955e-06, "loss": 57.5625, "step": 2412 }, { "epoch": 0.022841510398424855, "grad_norm": 921.271484375, "learning_rate": 1.522397476340694e-06, "loss": 82.5781, "step": 2413 }, { "epoch": 0.022850976420139908, "grad_norm": 551.33154296875, "learning_rate": 1.5230283911671925e-06, "loss": 32.7812, "step": 2414 }, { "epoch": 0.02286044244185496, "grad_norm": 581.7525634765625, "learning_rate": 1.5236593059936908e-06, "loss": 41.1016, "step": 2415 }, { "epoch": 0.022869908463570017, "grad_norm": 736.3814697265625, "learning_rate": 1.524290220820189e-06, "loss": 66.0781, "step": 2416 }, { "epoch": 0.02287937448528507, "grad_norm": 442.8851318359375, "learning_rate": 1.5249211356466875e-06, "loss": 46.3672, "step": 2417 }, { "epoch": 0.022888840507000123, "grad_norm": 362.6528015136719, "learning_rate": 1.525552050473186e-06, "loss": 35.5781, "step": 2418 }, { "epoch": 0.022898306528715176, "grad_norm": 283.0700988769531, "learning_rate": 1.5261829652996845e-06, "loss": 29.5156, "step": 2419 }, { "epoch": 0.02290777255043023, "grad_norm": 469.7838134765625, "learning_rate": 1.526813880126183e-06, "loss": 32.0156, "step": 2420 }, { "epoch": 0.022917238572145286, "grad_norm": 1677.498046875, "learning_rate": 1.5274447949526815e-06, "loss": 78.0938, "step": 2421 }, { "epoch": 0.02292670459386034, "grad_norm": 1077.3629150390625, "learning_rate": 1.5280757097791796e-06, "loss": 80.5938, "step": 2422 }, { "epoch": 0.02293617061557539, "grad_norm": 333.0930480957031, "learning_rate": 1.528706624605678e-06, "loss": 27.0781, "step": 2423 }, { "epoch": 0.022945636637290445, "grad_norm": 498.6107177734375, "learning_rate": 1.5293375394321766e-06, "loss": 57.25, "step": 2424 }, { "epoch": 0.0229551026590055, "grad_norm": 760.1676025390625, "learning_rate": 1.529968454258675e-06, "loss": 76.9219, "step": 2425 }, { "epoch": 0.022964568680720554, "grad_norm": 468.530029296875, "learning_rate": 1.5305993690851736e-06, "loss": 33.4531, "step": 2426 }, { "epoch": 0.022974034702435607, "grad_norm": 1185.3240966796875, "learning_rate": 1.5312302839116718e-06, "loss": 61.4062, "step": 2427 }, { "epoch": 0.02298350072415066, "grad_norm": 575.8603515625, "learning_rate": 1.5318611987381701e-06, "loss": 30.3125, "step": 2428 }, { "epoch": 0.022992966745865717, "grad_norm": 507.6539001464844, "learning_rate": 1.5324921135646686e-06, "loss": 68.1094, "step": 2429 }, { "epoch": 0.02300243276758077, "grad_norm": 420.17901611328125, "learning_rate": 1.533123028391167e-06, "loss": 31.7031, "step": 2430 }, { "epoch": 0.023011898789295823, "grad_norm": 441.7903137207031, "learning_rate": 1.5337539432176656e-06, "loss": 82.5625, "step": 2431 }, { "epoch": 0.023021364811010876, "grad_norm": 494.2134704589844, "learning_rate": 1.534384858044164e-06, "loss": 28.8906, "step": 2432 }, { "epoch": 0.02303083083272593, "grad_norm": 460.95538330078125, "learning_rate": 1.5350157728706624e-06, "loss": 59.1562, "step": 2433 }, { "epoch": 0.023040296854440985, "grad_norm": 869.9039306640625, "learning_rate": 1.5356466876971609e-06, "loss": 52.8047, "step": 2434 }, { "epoch": 0.023049762876156038, "grad_norm": 292.0153503417969, "learning_rate": 1.5362776025236591e-06, "loss": 32.3438, "step": 2435 }, { "epoch": 0.02305922889787109, "grad_norm": 344.89337158203125, "learning_rate": 1.5369085173501576e-06, "loss": 27.3594, "step": 2436 }, { "epoch": 0.023068694919586144, "grad_norm": 955.6107177734375, "learning_rate": 1.5375394321766561e-06, "loss": 58.6875, "step": 2437 }, { "epoch": 0.0230781609413012, "grad_norm": 376.27496337890625, "learning_rate": 1.5381703470031544e-06, "loss": 47.9062, "step": 2438 }, { "epoch": 0.023087626963016254, "grad_norm": 848.0546875, "learning_rate": 1.538801261829653e-06, "loss": 65.7188, "step": 2439 }, { "epoch": 0.023097092984731307, "grad_norm": 748.07275390625, "learning_rate": 1.5394321766561514e-06, "loss": 42.2031, "step": 2440 }, { "epoch": 0.02310655900644636, "grad_norm": 299.4213562011719, "learning_rate": 1.5400630914826499e-06, "loss": 28.9219, "step": 2441 }, { "epoch": 0.023116025028161416, "grad_norm": 791.9278564453125, "learning_rate": 1.5406940063091482e-06, "loss": 37.9219, "step": 2442 }, { "epoch": 0.02312549104987647, "grad_norm": 1199.754638671875, "learning_rate": 1.5413249211356467e-06, "loss": 63.7344, "step": 2443 }, { "epoch": 0.023134957071591522, "grad_norm": 535.5100708007812, "learning_rate": 1.541955835962145e-06, "loss": 55.3438, "step": 2444 }, { "epoch": 0.023144423093306575, "grad_norm": 666.8403930664062, "learning_rate": 1.5425867507886434e-06, "loss": 34.9688, "step": 2445 }, { "epoch": 0.02315388911502163, "grad_norm": 390.2201232910156, "learning_rate": 1.543217665615142e-06, "loss": 57.4688, "step": 2446 }, { "epoch": 0.023163355136736685, "grad_norm": 587.0618896484375, "learning_rate": 1.5438485804416404e-06, "loss": 50.0312, "step": 2447 }, { "epoch": 0.023172821158451738, "grad_norm": 776.1110229492188, "learning_rate": 1.544479495268139e-06, "loss": 57.3125, "step": 2448 }, { "epoch": 0.02318228718016679, "grad_norm": 654.7110595703125, "learning_rate": 1.545110410094637e-06, "loss": 54.0469, "step": 2449 }, { "epoch": 0.023191753201881844, "grad_norm": 1110.5638427734375, "learning_rate": 1.5457413249211355e-06, "loss": 54.1875, "step": 2450 }, { "epoch": 0.0232012192235969, "grad_norm": 504.7755432128906, "learning_rate": 1.546372239747634e-06, "loss": 47.6875, "step": 2451 }, { "epoch": 0.023210685245311953, "grad_norm": 197.56910705566406, "learning_rate": 1.5470031545741325e-06, "loss": 29.3594, "step": 2452 }, { "epoch": 0.023220151267027006, "grad_norm": 801.2271728515625, "learning_rate": 1.547634069400631e-06, "loss": 43.8281, "step": 2453 }, { "epoch": 0.02322961728874206, "grad_norm": 527.1027221679688, "learning_rate": 1.5482649842271294e-06, "loss": 52.4844, "step": 2454 }, { "epoch": 0.023239083310457116, "grad_norm": 222.69725036621094, "learning_rate": 1.5488958990536275e-06, "loss": 23.5781, "step": 2455 }, { "epoch": 0.02324854933217217, "grad_norm": 357.06939697265625, "learning_rate": 1.549526813880126e-06, "loss": 36.9531, "step": 2456 }, { "epoch": 0.02325801535388722, "grad_norm": 431.2113342285156, "learning_rate": 1.5501577287066245e-06, "loss": 31.4844, "step": 2457 }, { "epoch": 0.023267481375602275, "grad_norm": 611.5222778320312, "learning_rate": 1.550788643533123e-06, "loss": 33.9453, "step": 2458 }, { "epoch": 0.02327694739731733, "grad_norm": 631.5997314453125, "learning_rate": 1.5514195583596215e-06, "loss": 44.2891, "step": 2459 }, { "epoch": 0.023286413419032384, "grad_norm": 346.2403869628906, "learning_rate": 1.5520504731861198e-06, "loss": 34.3281, "step": 2460 }, { "epoch": 0.023295879440747437, "grad_norm": 218.24069213867188, "learning_rate": 1.5526813880126183e-06, "loss": 29.5, "step": 2461 }, { "epoch": 0.02330534546246249, "grad_norm": 349.93939208984375, "learning_rate": 1.5533123028391165e-06, "loss": 34.2812, "step": 2462 }, { "epoch": 0.023314811484177543, "grad_norm": 1087.2655029296875, "learning_rate": 1.553943217665615e-06, "loss": 85.0469, "step": 2463 }, { "epoch": 0.0233242775058926, "grad_norm": 350.50439453125, "learning_rate": 1.5545741324921135e-06, "loss": 38.0312, "step": 2464 }, { "epoch": 0.023333743527607653, "grad_norm": 238.1165313720703, "learning_rate": 1.555205047318612e-06, "loss": 27.3438, "step": 2465 }, { "epoch": 0.023343209549322706, "grad_norm": 366.4306335449219, "learning_rate": 1.5558359621451103e-06, "loss": 32.8125, "step": 2466 }, { "epoch": 0.02335267557103776, "grad_norm": 274.64215087890625, "learning_rate": 1.5564668769716088e-06, "loss": 31.4688, "step": 2467 }, { "epoch": 0.023362141592752815, "grad_norm": 177.6446533203125, "learning_rate": 1.5570977917981073e-06, "loss": 26.2969, "step": 2468 }, { "epoch": 0.023371607614467868, "grad_norm": 673.96044921875, "learning_rate": 1.5577287066246056e-06, "loss": 40.4375, "step": 2469 }, { "epoch": 0.02338107363618292, "grad_norm": 504.27056884765625, "learning_rate": 1.558359621451104e-06, "loss": 63.0312, "step": 2470 }, { "epoch": 0.023390539657897974, "grad_norm": 697.6470336914062, "learning_rate": 1.5589905362776023e-06, "loss": 79.6562, "step": 2471 }, { "epoch": 0.02340000567961303, "grad_norm": 712.579345703125, "learning_rate": 1.5596214511041008e-06, "loss": 39.7188, "step": 2472 }, { "epoch": 0.023409471701328084, "grad_norm": 216.06570434570312, "learning_rate": 1.5602523659305993e-06, "loss": 28.3125, "step": 2473 }, { "epoch": 0.023418937723043137, "grad_norm": 492.8212890625, "learning_rate": 1.5608832807570978e-06, "loss": 43.7969, "step": 2474 }, { "epoch": 0.02342840374475819, "grad_norm": 484.0276794433594, "learning_rate": 1.561514195583596e-06, "loss": 34.2031, "step": 2475 }, { "epoch": 0.023437869766473243, "grad_norm": 3.0328526496887207, "learning_rate": 1.5621451104100946e-06, "loss": 0.8733, "step": 2476 }, { "epoch": 0.0234473357881883, "grad_norm": 415.3080139160156, "learning_rate": 1.5627760252365929e-06, "loss": 28.4688, "step": 2477 }, { "epoch": 0.023456801809903352, "grad_norm": 595.7061157226562, "learning_rate": 1.5634069400630914e-06, "loss": 58.1562, "step": 2478 }, { "epoch": 0.023466267831618405, "grad_norm": 294.48040771484375, "learning_rate": 1.5640378548895899e-06, "loss": 30.6875, "step": 2479 }, { "epoch": 0.023475733853333458, "grad_norm": 463.8133850097656, "learning_rate": 1.5646687697160884e-06, "loss": 32.125, "step": 2480 }, { "epoch": 0.023485199875048515, "grad_norm": 539.2545166015625, "learning_rate": 1.5652996845425868e-06, "loss": 60.3594, "step": 2481 }, { "epoch": 0.023494665896763568, "grad_norm": 603.8751220703125, "learning_rate": 1.565930599369085e-06, "loss": 40.3984, "step": 2482 }, { "epoch": 0.02350413191847862, "grad_norm": 520.0425415039062, "learning_rate": 1.5665615141955834e-06, "loss": 76.8438, "step": 2483 }, { "epoch": 0.023513597940193674, "grad_norm": 450.4207458496094, "learning_rate": 1.567192429022082e-06, "loss": 34.5312, "step": 2484 }, { "epoch": 0.02352306396190873, "grad_norm": 619.543701171875, "learning_rate": 1.5678233438485804e-06, "loss": 54.8984, "step": 2485 }, { "epoch": 0.023532529983623783, "grad_norm": 635.2056884765625, "learning_rate": 1.5684542586750789e-06, "loss": 67.1719, "step": 2486 }, { "epoch": 0.023541996005338836, "grad_norm": 1525.3709716796875, "learning_rate": 1.5690851735015774e-06, "loss": 39.1875, "step": 2487 }, { "epoch": 0.02355146202705389, "grad_norm": 395.6950988769531, "learning_rate": 1.5697160883280757e-06, "loss": 30.0156, "step": 2488 }, { "epoch": 0.023560928048768946, "grad_norm": 464.6424255371094, "learning_rate": 1.570347003154574e-06, "loss": 37.2344, "step": 2489 }, { "epoch": 0.023570394070484, "grad_norm": 200.7718963623047, "learning_rate": 1.5709779179810724e-06, "loss": 25.4375, "step": 2490 }, { "epoch": 0.02357986009219905, "grad_norm": 627.3775024414062, "learning_rate": 1.571608832807571e-06, "loss": 71.2812, "step": 2491 }, { "epoch": 0.023589326113914105, "grad_norm": 741.108642578125, "learning_rate": 1.5722397476340694e-06, "loss": 32.0, "step": 2492 }, { "epoch": 0.023598792135629158, "grad_norm": 255.10838317871094, "learning_rate": 1.5728706624605677e-06, "loss": 27.5156, "step": 2493 }, { "epoch": 0.023608258157344214, "grad_norm": 685.4452514648438, "learning_rate": 1.5735015772870662e-06, "loss": 30.9297, "step": 2494 }, { "epoch": 0.023617724179059267, "grad_norm": 429.3882751464844, "learning_rate": 1.5741324921135647e-06, "loss": 32.9062, "step": 2495 }, { "epoch": 0.02362719020077432, "grad_norm": 785.0680541992188, "learning_rate": 1.574763406940063e-06, "loss": 50.0, "step": 2496 }, { "epoch": 0.023636656222489373, "grad_norm": 1134.9925537109375, "learning_rate": 1.5753943217665615e-06, "loss": 50.8906, "step": 2497 }, { "epoch": 0.02364612224420443, "grad_norm": 380.7203369140625, "learning_rate": 1.57602523659306e-06, "loss": 32.5469, "step": 2498 }, { "epoch": 0.023655588265919483, "grad_norm": 607.430419921875, "learning_rate": 1.5766561514195582e-06, "loss": 42.6562, "step": 2499 }, { "epoch": 0.023665054287634536, "grad_norm": 2.920228958129883, "learning_rate": 1.5772870662460567e-06, "loss": 0.918, "step": 2500 }, { "epoch": 0.02367452030934959, "grad_norm": 620.69482421875, "learning_rate": 1.5779179810725552e-06, "loss": 46.3047, "step": 2501 }, { "epoch": 0.023683986331064645, "grad_norm": 366.70196533203125, "learning_rate": 1.5785488958990535e-06, "loss": 32.9531, "step": 2502 }, { "epoch": 0.023693452352779698, "grad_norm": 620.8310546875, "learning_rate": 1.579179810725552e-06, "loss": 63.8125, "step": 2503 }, { "epoch": 0.02370291837449475, "grad_norm": 1540.321044921875, "learning_rate": 1.5798107255520503e-06, "loss": 56.5703, "step": 2504 }, { "epoch": 0.023712384396209804, "grad_norm": 270.3115234375, "learning_rate": 1.5804416403785488e-06, "loss": 29.4531, "step": 2505 }, { "epoch": 0.023721850417924857, "grad_norm": 1041.8233642578125, "learning_rate": 1.5810725552050473e-06, "loss": 41.875, "step": 2506 }, { "epoch": 0.023731316439639914, "grad_norm": 3.472623109817505, "learning_rate": 1.5817034700315458e-06, "loss": 0.9434, "step": 2507 }, { "epoch": 0.023740782461354967, "grad_norm": 719.4097290039062, "learning_rate": 1.5823343848580442e-06, "loss": 46.6719, "step": 2508 }, { "epoch": 0.02375024848307002, "grad_norm": 241.29510498046875, "learning_rate": 1.5829652996845425e-06, "loss": 27.2812, "step": 2509 }, { "epoch": 0.023759714504785073, "grad_norm": 2.912916898727417, "learning_rate": 1.5835962145110408e-06, "loss": 0.8245, "step": 2510 }, { "epoch": 0.02376918052650013, "grad_norm": 278.57989501953125, "learning_rate": 1.5842271293375393e-06, "loss": 32.125, "step": 2511 }, { "epoch": 0.023778646548215182, "grad_norm": 431.1790771484375, "learning_rate": 1.5848580441640378e-06, "loss": 29.9844, "step": 2512 }, { "epoch": 0.023788112569930235, "grad_norm": 390.9819641113281, "learning_rate": 1.5854889589905363e-06, "loss": 40.8438, "step": 2513 }, { "epoch": 0.023797578591645288, "grad_norm": 453.63037109375, "learning_rate": 1.5861198738170348e-06, "loss": 29.8047, "step": 2514 }, { "epoch": 0.023807044613360345, "grad_norm": 744.235595703125, "learning_rate": 1.586750788643533e-06, "loss": 55.5781, "step": 2515 }, { "epoch": 0.023816510635075398, "grad_norm": 380.7840881347656, "learning_rate": 1.5873817034700313e-06, "loss": 32.3438, "step": 2516 }, { "epoch": 0.02382597665679045, "grad_norm": 516.2864990234375, "learning_rate": 1.5880126182965298e-06, "loss": 40.5625, "step": 2517 }, { "epoch": 0.023835442678505504, "grad_norm": 552.4493408203125, "learning_rate": 1.5886435331230283e-06, "loss": 34.2344, "step": 2518 }, { "epoch": 0.023844908700220557, "grad_norm": 631.7605590820312, "learning_rate": 1.5892744479495268e-06, "loss": 47.7188, "step": 2519 }, { "epoch": 0.023854374721935613, "grad_norm": 199.6981964111328, "learning_rate": 1.5899053627760253e-06, "loss": 25.0781, "step": 2520 }, { "epoch": 0.023863840743650666, "grad_norm": 783.0921630859375, "learning_rate": 1.5905362776025236e-06, "loss": 68.0, "step": 2521 }, { "epoch": 0.02387330676536572, "grad_norm": 413.32989501953125, "learning_rate": 1.5911671924290219e-06, "loss": 69.0312, "step": 2522 }, { "epoch": 0.023882772787080772, "grad_norm": 672.3653564453125, "learning_rate": 1.5917981072555204e-06, "loss": 63.9844, "step": 2523 }, { "epoch": 0.02389223880879583, "grad_norm": 305.021240234375, "learning_rate": 1.5924290220820189e-06, "loss": 31.7969, "step": 2524 }, { "epoch": 0.02390170483051088, "grad_norm": 3.0268609523773193, "learning_rate": 1.5930599369085174e-06, "loss": 0.9761, "step": 2525 }, { "epoch": 0.023911170852225935, "grad_norm": 445.1587219238281, "learning_rate": 1.5936908517350156e-06, "loss": 30.5625, "step": 2526 }, { "epoch": 0.023920636873940988, "grad_norm": 817.5070190429688, "learning_rate": 1.5943217665615141e-06, "loss": 69.0938, "step": 2527 }, { "epoch": 0.023930102895656044, "grad_norm": 1163.7994384765625, "learning_rate": 1.5949526813880126e-06, "loss": 61.3984, "step": 2528 }, { "epoch": 0.023939568917371097, "grad_norm": 302.4700622558594, "learning_rate": 1.595583596214511e-06, "loss": 40.375, "step": 2529 }, { "epoch": 0.02394903493908615, "grad_norm": 205.80819702148438, "learning_rate": 1.5962145110410094e-06, "loss": 29.9531, "step": 2530 }, { "epoch": 0.023958500960801203, "grad_norm": 1089.6424560546875, "learning_rate": 1.5968454258675079e-06, "loss": 39.5977, "step": 2531 }, { "epoch": 0.02396796698251626, "grad_norm": 227.6798095703125, "learning_rate": 1.5974763406940062e-06, "loss": 28.1875, "step": 2532 }, { "epoch": 0.023977433004231313, "grad_norm": 631.6924438476562, "learning_rate": 1.5981072555205047e-06, "loss": 73.2188, "step": 2533 }, { "epoch": 0.023986899025946366, "grad_norm": 280.169677734375, "learning_rate": 1.5987381703470032e-06, "loss": 29.6406, "step": 2534 }, { "epoch": 0.02399636504766142, "grad_norm": 531.1072998046875, "learning_rate": 1.5993690851735016e-06, "loss": 41.125, "step": 2535 }, { "epoch": 0.02400583106937647, "grad_norm": 779.2562866210938, "learning_rate": 1.6e-06, "loss": 43.6797, "step": 2536 }, { "epoch": 0.024015297091091528, "grad_norm": 418.2118835449219, "learning_rate": 1.6006309148264982e-06, "loss": 37.9688, "step": 2537 }, { "epoch": 0.02402476311280658, "grad_norm": 493.7008972167969, "learning_rate": 1.6012618296529967e-06, "loss": 36.6719, "step": 2538 }, { "epoch": 0.024034229134521634, "grad_norm": 1895.2301025390625, "learning_rate": 1.6018927444794952e-06, "loss": 80.1562, "step": 2539 }, { "epoch": 0.024043695156236687, "grad_norm": 351.86474609375, "learning_rate": 1.6025236593059937e-06, "loss": 26.5625, "step": 2540 }, { "epoch": 0.024053161177951744, "grad_norm": 223.02389526367188, "learning_rate": 1.6031545741324922e-06, "loss": 28.7969, "step": 2541 }, { "epoch": 0.024062627199666797, "grad_norm": 933.3494262695312, "learning_rate": 1.6037854889589907e-06, "loss": 30.3125, "step": 2542 }, { "epoch": 0.02407209322138185, "grad_norm": 240.0073699951172, "learning_rate": 1.6044164037854887e-06, "loss": 27.6406, "step": 2543 }, { "epoch": 0.024081559243096903, "grad_norm": 371.7530212402344, "learning_rate": 1.6050473186119872e-06, "loss": 29.6875, "step": 2544 }, { "epoch": 0.02409102526481196, "grad_norm": 692.958984375, "learning_rate": 1.6056782334384857e-06, "loss": 48.2734, "step": 2545 }, { "epoch": 0.024100491286527012, "grad_norm": 306.611328125, "learning_rate": 1.6063091482649842e-06, "loss": 27.9531, "step": 2546 }, { "epoch": 0.024109957308242065, "grad_norm": 6738.69384765625, "learning_rate": 1.6069400630914827e-06, "loss": 90.9375, "step": 2547 }, { "epoch": 0.024119423329957118, "grad_norm": 321.13592529296875, "learning_rate": 1.607570977917981e-06, "loss": 30.5625, "step": 2548 }, { "epoch": 0.02412888935167217, "grad_norm": 1500.3009033203125, "learning_rate": 1.6082018927444793e-06, "loss": 80.7812, "step": 2549 }, { "epoch": 0.024138355373387228, "grad_norm": 2.677541971206665, "learning_rate": 1.6088328075709778e-06, "loss": 0.9751, "step": 2550 }, { "epoch": 0.02414782139510228, "grad_norm": 838.2850341796875, "learning_rate": 1.6094637223974763e-06, "loss": 68.3125, "step": 2551 }, { "epoch": 0.024157287416817334, "grad_norm": 458.2590637207031, "learning_rate": 1.6100946372239748e-06, "loss": 32.4219, "step": 2552 }, { "epoch": 0.024166753438532387, "grad_norm": 984.072021484375, "learning_rate": 1.6107255520504732e-06, "loss": 35.9844, "step": 2553 }, { "epoch": 0.024176219460247443, "grad_norm": 716.7880249023438, "learning_rate": 1.6113564668769715e-06, "loss": 62.3594, "step": 2554 }, { "epoch": 0.024185685481962496, "grad_norm": 2.534846067428589, "learning_rate": 1.61198738170347e-06, "loss": 0.8521, "step": 2555 }, { "epoch": 0.02419515150367755, "grad_norm": 792.4749145507812, "learning_rate": 1.6126182965299683e-06, "loss": 53.6719, "step": 2556 }, { "epoch": 0.024204617525392602, "grad_norm": 526.32763671875, "learning_rate": 1.6132492113564668e-06, "loss": 32.125, "step": 2557 }, { "epoch": 0.02421408354710766, "grad_norm": 635.0516357421875, "learning_rate": 1.6138801261829653e-06, "loss": 54.375, "step": 2558 }, { "epoch": 0.02422354956882271, "grad_norm": 782.49072265625, "learning_rate": 1.6145110410094636e-06, "loss": 39.4375, "step": 2559 }, { "epoch": 0.024233015590537765, "grad_norm": 851.5530395507812, "learning_rate": 1.615141955835962e-06, "loss": 75.1406, "step": 2560 }, { "epoch": 0.024242481612252818, "grad_norm": 1468.772216796875, "learning_rate": 1.6157728706624606e-06, "loss": 60.75, "step": 2561 }, { "epoch": 0.02425194763396787, "grad_norm": 291.6654968261719, "learning_rate": 1.616403785488959e-06, "loss": 36.4844, "step": 2562 }, { "epoch": 0.024261413655682927, "grad_norm": 1018.0240478515625, "learning_rate": 1.6170347003154573e-06, "loss": 83.0625, "step": 2563 }, { "epoch": 0.02427087967739798, "grad_norm": 481.2201843261719, "learning_rate": 1.6176656151419556e-06, "loss": 75.4688, "step": 2564 }, { "epoch": 0.024280345699113033, "grad_norm": 418.5592956542969, "learning_rate": 1.618296529968454e-06, "loss": 31.3438, "step": 2565 }, { "epoch": 0.024289811720828086, "grad_norm": 438.0888671875, "learning_rate": 1.6189274447949526e-06, "loss": 67.6562, "step": 2566 }, { "epoch": 0.024299277742543143, "grad_norm": 698.9397583007812, "learning_rate": 1.619558359621451e-06, "loss": 56.8594, "step": 2567 }, { "epoch": 0.024308743764258196, "grad_norm": 279.423583984375, "learning_rate": 1.6201892744479496e-06, "loss": 30.7656, "step": 2568 }, { "epoch": 0.02431820978597325, "grad_norm": 396.282958984375, "learning_rate": 1.620820189274448e-06, "loss": 34.7344, "step": 2569 }, { "epoch": 0.0243276758076883, "grad_norm": 3.4179627895355225, "learning_rate": 1.6214511041009461e-06, "loss": 0.9595, "step": 2570 }, { "epoch": 0.024337141829403358, "grad_norm": 548.4927368164062, "learning_rate": 1.6220820189274446e-06, "loss": 33.7734, "step": 2571 }, { "epoch": 0.02434660785111841, "grad_norm": 422.91705322265625, "learning_rate": 1.6227129337539431e-06, "loss": 57.2344, "step": 2572 }, { "epoch": 0.024356073872833464, "grad_norm": 3.200866222381592, "learning_rate": 1.6233438485804416e-06, "loss": 0.9248, "step": 2573 }, { "epoch": 0.024365539894548517, "grad_norm": 508.5421142578125, "learning_rate": 1.6239747634069401e-06, "loss": 39.1328, "step": 2574 }, { "epoch": 0.024375005916263574, "grad_norm": 699.3915405273438, "learning_rate": 1.6246056782334384e-06, "loss": 78.625, "step": 2575 }, { "epoch": 0.024384471937978627, "grad_norm": 325.57568359375, "learning_rate": 1.6252365930599367e-06, "loss": 30.2188, "step": 2576 }, { "epoch": 0.02439393795969368, "grad_norm": 361.4538269042969, "learning_rate": 1.6258675078864352e-06, "loss": 29.0625, "step": 2577 }, { "epoch": 0.024403403981408733, "grad_norm": 653.7435913085938, "learning_rate": 1.6264984227129337e-06, "loss": 77.5938, "step": 2578 }, { "epoch": 0.024412870003123786, "grad_norm": 1085.3311767578125, "learning_rate": 1.6271293375394322e-06, "loss": 43.375, "step": 2579 }, { "epoch": 0.024422336024838842, "grad_norm": 282.14892578125, "learning_rate": 1.6277602523659306e-06, "loss": 36.2656, "step": 2580 }, { "epoch": 0.024431802046553895, "grad_norm": 612.9072875976562, "learning_rate": 1.628391167192429e-06, "loss": 58.8125, "step": 2581 }, { "epoch": 0.024441268068268948, "grad_norm": 418.5953063964844, "learning_rate": 1.6290220820189274e-06, "loss": 39.5, "step": 2582 }, { "epoch": 0.024450734089984, "grad_norm": 1411.4146728515625, "learning_rate": 1.6296529968454257e-06, "loss": 131.5, "step": 2583 }, { "epoch": 0.024460200111699058, "grad_norm": 369.2118225097656, "learning_rate": 1.6302839116719242e-06, "loss": 35.25, "step": 2584 }, { "epoch": 0.02446966613341411, "grad_norm": 588.639404296875, "learning_rate": 1.6309148264984227e-06, "loss": 39.2969, "step": 2585 }, { "epoch": 0.024479132155129164, "grad_norm": 295.5313415527344, "learning_rate": 1.631545741324921e-06, "loss": 29.8125, "step": 2586 }, { "epoch": 0.024488598176844217, "grad_norm": 284.24444580078125, "learning_rate": 1.6321766561514195e-06, "loss": 31.7969, "step": 2587 }, { "epoch": 0.024498064198559273, "grad_norm": 783.0303955078125, "learning_rate": 1.632807570977918e-06, "loss": 75.9688, "step": 2588 }, { "epoch": 0.024507530220274326, "grad_norm": 3.2457735538482666, "learning_rate": 1.6334384858044164e-06, "loss": 0.835, "step": 2589 }, { "epoch": 0.02451699624198938, "grad_norm": 255.68154907226562, "learning_rate": 1.6340694006309147e-06, "loss": 31.2656, "step": 2590 }, { "epoch": 0.024526462263704432, "grad_norm": 474.560791015625, "learning_rate": 1.6347003154574132e-06, "loss": 35.0781, "step": 2591 }, { "epoch": 0.024535928285419485, "grad_norm": 712.1465454101562, "learning_rate": 1.6353312302839115e-06, "loss": 58.5625, "step": 2592 }, { "epoch": 0.02454539430713454, "grad_norm": 812.2486572265625, "learning_rate": 1.63596214511041e-06, "loss": 90.7812, "step": 2593 }, { "epoch": 0.024554860328849595, "grad_norm": 981.7937622070312, "learning_rate": 1.6365930599369085e-06, "loss": 65.0547, "step": 2594 }, { "epoch": 0.024564326350564648, "grad_norm": 760.3937377929688, "learning_rate": 1.637223974763407e-06, "loss": 80.0781, "step": 2595 }, { "epoch": 0.0245737923722797, "grad_norm": 940.0902709960938, "learning_rate": 1.6378548895899053e-06, "loss": 56.2422, "step": 2596 }, { "epoch": 0.024583258393994757, "grad_norm": 632.6734619140625, "learning_rate": 1.6384858044164035e-06, "loss": 50.7734, "step": 2597 }, { "epoch": 0.02459272441570981, "grad_norm": 459.5621643066406, "learning_rate": 1.639116719242902e-06, "loss": 35.7656, "step": 2598 }, { "epoch": 0.024602190437424863, "grad_norm": 387.0630187988281, "learning_rate": 1.6397476340694005e-06, "loss": 57.5312, "step": 2599 }, { "epoch": 0.024611656459139916, "grad_norm": 2.4796934127807617, "learning_rate": 1.640378548895899e-06, "loss": 0.8801, "step": 2600 }, { "epoch": 0.024621122480854973, "grad_norm": 427.1966247558594, "learning_rate": 1.6410094637223975e-06, "loss": 36.3594, "step": 2601 }, { "epoch": 0.024630588502570026, "grad_norm": 760.2817993164062, "learning_rate": 1.641640378548896e-06, "loss": 78.3438, "step": 2602 }, { "epoch": 0.02464005452428508, "grad_norm": 612.71533203125, "learning_rate": 1.642271293375394e-06, "loss": 33.2969, "step": 2603 }, { "epoch": 0.02464952054600013, "grad_norm": 451.7579650878906, "learning_rate": 1.6429022082018926e-06, "loss": 64.7031, "step": 2604 }, { "epoch": 0.024658986567715185, "grad_norm": 795.85400390625, "learning_rate": 1.643533123028391e-06, "loss": 44.5781, "step": 2605 }, { "epoch": 0.02466845258943024, "grad_norm": 547.9177856445312, "learning_rate": 1.6441640378548896e-06, "loss": 50.6875, "step": 2606 }, { "epoch": 0.024677918611145294, "grad_norm": 285.63006591796875, "learning_rate": 1.644794952681388e-06, "loss": 28.5, "step": 2607 }, { "epoch": 0.024687384632860347, "grad_norm": 387.19427490234375, "learning_rate": 1.6454258675078863e-06, "loss": 51.4531, "step": 2608 }, { "epoch": 0.0246968506545754, "grad_norm": 408.64031982421875, "learning_rate": 1.6460567823343848e-06, "loss": 37.9219, "step": 2609 }, { "epoch": 0.024706316676290457, "grad_norm": 238.26893615722656, "learning_rate": 1.646687697160883e-06, "loss": 24.2188, "step": 2610 }, { "epoch": 0.02471578269800551, "grad_norm": 858.5697631835938, "learning_rate": 1.6473186119873816e-06, "loss": 44.0156, "step": 2611 }, { "epoch": 0.024725248719720563, "grad_norm": 270.4810485839844, "learning_rate": 1.64794952681388e-06, "loss": 31.3438, "step": 2612 }, { "epoch": 0.024734714741435616, "grad_norm": 723.4447631835938, "learning_rate": 1.6485804416403786e-06, "loss": 72.5938, "step": 2613 }, { "epoch": 0.024744180763150672, "grad_norm": 3.218783140182495, "learning_rate": 1.6492113564668769e-06, "loss": 1.0483, "step": 2614 }, { "epoch": 0.024753646784865725, "grad_norm": 266.5059814453125, "learning_rate": 1.6498422712933754e-06, "loss": 26.2031, "step": 2615 }, { "epoch": 0.024763112806580778, "grad_norm": 559.175537109375, "learning_rate": 1.6504731861198736e-06, "loss": 57.8125, "step": 2616 }, { "epoch": 0.02477257882829583, "grad_norm": 254.09620666503906, "learning_rate": 1.6511041009463721e-06, "loss": 29.2031, "step": 2617 }, { "epoch": 0.024782044850010884, "grad_norm": 424.2607421875, "learning_rate": 1.6517350157728706e-06, "loss": 34.7188, "step": 2618 }, { "epoch": 0.02479151087172594, "grad_norm": 910.9334106445312, "learning_rate": 1.652365930599369e-06, "loss": 35.1094, "step": 2619 }, { "epoch": 0.024800976893440994, "grad_norm": 171.67263793945312, "learning_rate": 1.6529968454258674e-06, "loss": 30.0781, "step": 2620 }, { "epoch": 0.024810442915156047, "grad_norm": 530.7036743164062, "learning_rate": 1.6536277602523659e-06, "loss": 44.5625, "step": 2621 }, { "epoch": 0.0248199089368711, "grad_norm": 167.5481414794922, "learning_rate": 1.6542586750788644e-06, "loss": 29.8125, "step": 2622 }, { "epoch": 0.024829374958586156, "grad_norm": 209.80296325683594, "learning_rate": 1.6548895899053627e-06, "loss": 27.3594, "step": 2623 }, { "epoch": 0.02483884098030121, "grad_norm": 470.37811279296875, "learning_rate": 1.6555205047318612e-06, "loss": 29.2266, "step": 2624 }, { "epoch": 0.024848307002016262, "grad_norm": 591.8447875976562, "learning_rate": 1.6561514195583594e-06, "loss": 53.3125, "step": 2625 }, { "epoch": 0.024857773023731315, "grad_norm": 551.34716796875, "learning_rate": 1.656782334384858e-06, "loss": 54.5625, "step": 2626 }, { "epoch": 0.02486723904544637, "grad_norm": 286.8594970703125, "learning_rate": 1.6574132492113564e-06, "loss": 28.5156, "step": 2627 }, { "epoch": 0.024876705067161425, "grad_norm": 392.2592468261719, "learning_rate": 1.658044164037855e-06, "loss": 55.0938, "step": 2628 }, { "epoch": 0.024886171088876478, "grad_norm": 596.5138549804688, "learning_rate": 1.6586750788643534e-06, "loss": 33.5156, "step": 2629 }, { "epoch": 0.02489563711059153, "grad_norm": 198.96395874023438, "learning_rate": 1.6593059936908515e-06, "loss": 30.2031, "step": 2630 }, { "epoch": 0.024905103132306587, "grad_norm": 1028.7899169921875, "learning_rate": 1.65993690851735e-06, "loss": 59.875, "step": 2631 }, { "epoch": 0.02491456915402164, "grad_norm": 404.69757080078125, "learning_rate": 1.6605678233438485e-06, "loss": 29.3438, "step": 2632 }, { "epoch": 0.024924035175736693, "grad_norm": 147.0840301513672, "learning_rate": 1.661198738170347e-06, "loss": 24.3438, "step": 2633 }, { "epoch": 0.024933501197451746, "grad_norm": 326.2648010253906, "learning_rate": 1.6618296529968454e-06, "loss": 37.1406, "step": 2634 }, { "epoch": 0.0249429672191668, "grad_norm": 1136.46142578125, "learning_rate": 1.662460567823344e-06, "loss": 54.6641, "step": 2635 }, { "epoch": 0.024952433240881856, "grad_norm": 810.0060424804688, "learning_rate": 1.663091482649842e-06, "loss": 31.2344, "step": 2636 }, { "epoch": 0.02496189926259691, "grad_norm": 650.9986572265625, "learning_rate": 1.6637223974763405e-06, "loss": 65.375, "step": 2637 }, { "epoch": 0.02497136528431196, "grad_norm": 374.7366638183594, "learning_rate": 1.664353312302839e-06, "loss": 32.0391, "step": 2638 }, { "epoch": 0.024980831306027015, "grad_norm": 695.9627075195312, "learning_rate": 1.6649842271293375e-06, "loss": 28.0938, "step": 2639 }, { "epoch": 0.02499029732774207, "grad_norm": 513.5623779296875, "learning_rate": 1.665615141955836e-06, "loss": 34.4688, "step": 2640 }, { "epoch": 0.024999763349457124, "grad_norm": 314.4490661621094, "learning_rate": 1.6662460567823343e-06, "loss": 36.2656, "step": 2641 }, { "epoch": 0.025009229371172177, "grad_norm": 2.8502538204193115, "learning_rate": 1.6668769716088328e-06, "loss": 0.8604, "step": 2642 }, { "epoch": 0.02501869539288723, "grad_norm": 558.7369384765625, "learning_rate": 1.667507886435331e-06, "loss": 56.5469, "step": 2643 }, { "epoch": 0.025028161414602287, "grad_norm": 180.07810974121094, "learning_rate": 1.6681388012618295e-06, "loss": 25.4844, "step": 2644 }, { "epoch": 0.02503762743631734, "grad_norm": 282.7625427246094, "learning_rate": 1.668769716088328e-06, "loss": 30.4688, "step": 2645 }, { "epoch": 0.025047093458032393, "grad_norm": 403.6738586425781, "learning_rate": 1.6694006309148265e-06, "loss": 40.7812, "step": 2646 }, { "epoch": 0.025056559479747446, "grad_norm": 379.2559814453125, "learning_rate": 1.6700315457413248e-06, "loss": 33.2969, "step": 2647 }, { "epoch": 0.0250660255014625, "grad_norm": 351.2530212402344, "learning_rate": 1.6706624605678233e-06, "loss": 30.9375, "step": 2648 }, { "epoch": 0.025075491523177555, "grad_norm": 322.33331298828125, "learning_rate": 1.6712933753943218e-06, "loss": 43.3281, "step": 2649 }, { "epoch": 0.025084957544892608, "grad_norm": 446.025390625, "learning_rate": 1.67192429022082e-06, "loss": 31.4844, "step": 2650 }, { "epoch": 0.02509442356660766, "grad_norm": 319.3594055175781, "learning_rate": 1.6725552050473186e-06, "loss": 29.1875, "step": 2651 }, { "epoch": 0.025103889588322714, "grad_norm": 3.5256600379943848, "learning_rate": 1.6731861198738168e-06, "loss": 0.8628, "step": 2652 }, { "epoch": 0.02511335561003777, "grad_norm": 280.93890380859375, "learning_rate": 1.6738170347003153e-06, "loss": 30.3906, "step": 2653 }, { "epoch": 0.025122821631752824, "grad_norm": 611.7548828125, "learning_rate": 1.6744479495268138e-06, "loss": 50.5703, "step": 2654 }, { "epoch": 0.025132287653467877, "grad_norm": 819.0433349609375, "learning_rate": 1.6750788643533123e-06, "loss": 36.4375, "step": 2655 }, { "epoch": 0.02514175367518293, "grad_norm": 3.115039587020874, "learning_rate": 1.6757097791798108e-06, "loss": 0.9321, "step": 2656 }, { "epoch": 0.025151219696897986, "grad_norm": 249.7755889892578, "learning_rate": 1.676340694006309e-06, "loss": 27.2969, "step": 2657 }, { "epoch": 0.02516068571861304, "grad_norm": 490.6873474121094, "learning_rate": 1.6769716088328074e-06, "loss": 39.2031, "step": 2658 }, { "epoch": 0.025170151740328092, "grad_norm": 255.0899200439453, "learning_rate": 1.6776025236593059e-06, "loss": 28.1562, "step": 2659 }, { "epoch": 0.025179617762043145, "grad_norm": 499.3311767578125, "learning_rate": 1.6782334384858044e-06, "loss": 54.5469, "step": 2660 }, { "epoch": 0.025189083783758198, "grad_norm": 748.7826538085938, "learning_rate": 1.6788643533123028e-06, "loss": 49.4453, "step": 2661 }, { "epoch": 0.025198549805473255, "grad_norm": 202.21563720703125, "learning_rate": 1.6794952681388013e-06, "loss": 29.4688, "step": 2662 }, { "epoch": 0.025208015827188308, "grad_norm": 527.5892944335938, "learning_rate": 1.6801261829652994e-06, "loss": 70.0, "step": 2663 }, { "epoch": 0.02521748184890336, "grad_norm": 297.2985534667969, "learning_rate": 1.680757097791798e-06, "loss": 29.4844, "step": 2664 }, { "epoch": 0.025226947870618414, "grad_norm": 496.70867919921875, "learning_rate": 1.6813880126182964e-06, "loss": 30.2266, "step": 2665 }, { "epoch": 0.02523641389233347, "grad_norm": 281.6521911621094, "learning_rate": 1.6820189274447949e-06, "loss": 34.2969, "step": 2666 }, { "epoch": 0.025245879914048523, "grad_norm": 355.12335205078125, "learning_rate": 1.6826498422712934e-06, "loss": 46.7266, "step": 2667 }, { "epoch": 0.025255345935763576, "grad_norm": 694.3997192382812, "learning_rate": 1.6832807570977919e-06, "loss": 44.2969, "step": 2668 }, { "epoch": 0.02526481195747863, "grad_norm": 373.8608093261719, "learning_rate": 1.6839116719242902e-06, "loss": 30.2188, "step": 2669 }, { "epoch": 0.025274277979193686, "grad_norm": 608.4727783203125, "learning_rate": 1.6845425867507884e-06, "loss": 56.0625, "step": 2670 }, { "epoch": 0.02528374400090874, "grad_norm": 900.3016967773438, "learning_rate": 1.685173501577287e-06, "loss": 59.4531, "step": 2671 }, { "epoch": 0.02529321002262379, "grad_norm": 227.3461151123047, "learning_rate": 1.6858044164037854e-06, "loss": 27.6875, "step": 2672 }, { "epoch": 0.025302676044338845, "grad_norm": 519.3485107421875, "learning_rate": 1.686435331230284e-06, "loss": 31.3359, "step": 2673 }, { "epoch": 0.0253121420660539, "grad_norm": 358.24005126953125, "learning_rate": 1.6870662460567822e-06, "loss": 34.1953, "step": 2674 }, { "epoch": 0.025321608087768954, "grad_norm": 168.932861328125, "learning_rate": 1.6876971608832807e-06, "loss": 27.8125, "step": 2675 }, { "epoch": 0.025331074109484007, "grad_norm": 546.76904296875, "learning_rate": 1.6883280757097792e-06, "loss": 40.8125, "step": 2676 }, { "epoch": 0.02534054013119906, "grad_norm": 1566.366455078125, "learning_rate": 1.6889589905362775e-06, "loss": 65.125, "step": 2677 }, { "epoch": 0.025350006152914113, "grad_norm": 614.32763671875, "learning_rate": 1.689589905362776e-06, "loss": 69.7969, "step": 2678 }, { "epoch": 0.02535947217462917, "grad_norm": 497.68304443359375, "learning_rate": 1.6902208201892744e-06, "loss": 45.4375, "step": 2679 }, { "epoch": 0.025368938196344223, "grad_norm": 495.5487976074219, "learning_rate": 1.6908517350157727e-06, "loss": 30.375, "step": 2680 }, { "epoch": 0.025378404218059276, "grad_norm": 471.08636474609375, "learning_rate": 1.6914826498422712e-06, "loss": 37.4062, "step": 2681 }, { "epoch": 0.02538787023977433, "grad_norm": 272.4569091796875, "learning_rate": 1.6921135646687697e-06, "loss": 28.8203, "step": 2682 }, { "epoch": 0.025397336261489385, "grad_norm": 279.0712585449219, "learning_rate": 1.6927444794952682e-06, "loss": 27.3438, "step": 2683 }, { "epoch": 0.025406802283204438, "grad_norm": 1166.0223388671875, "learning_rate": 1.6933753943217665e-06, "loss": 76.125, "step": 2684 }, { "epoch": 0.02541626830491949, "grad_norm": 984.75390625, "learning_rate": 1.6940063091482648e-06, "loss": 71.75, "step": 2685 }, { "epoch": 0.025425734326634544, "grad_norm": 239.48341369628906, "learning_rate": 1.6946372239747633e-06, "loss": 36.7656, "step": 2686 }, { "epoch": 0.0254352003483496, "grad_norm": 451.8413391113281, "learning_rate": 1.6952681388012618e-06, "loss": 37.4844, "step": 2687 }, { "epoch": 0.025444666370064654, "grad_norm": 852.718505859375, "learning_rate": 1.6958990536277602e-06, "loss": 70.8281, "step": 2688 }, { "epoch": 0.025454132391779707, "grad_norm": 464.1305847167969, "learning_rate": 1.6965299684542587e-06, "loss": 37.8281, "step": 2689 }, { "epoch": 0.02546359841349476, "grad_norm": 434.5806884765625, "learning_rate": 1.697160883280757e-06, "loss": 33.2031, "step": 2690 }, { "epoch": 0.025473064435209813, "grad_norm": 472.73712158203125, "learning_rate": 1.6977917981072553e-06, "loss": 43.4375, "step": 2691 }, { "epoch": 0.02548253045692487, "grad_norm": 411.58245849609375, "learning_rate": 1.6984227129337538e-06, "loss": 28.8281, "step": 2692 }, { "epoch": 0.025491996478639922, "grad_norm": 641.1148681640625, "learning_rate": 1.6990536277602523e-06, "loss": 64.2812, "step": 2693 }, { "epoch": 0.025501462500354975, "grad_norm": 958.8942260742188, "learning_rate": 1.6996845425867508e-06, "loss": 33.6172, "step": 2694 }, { "epoch": 0.025510928522070028, "grad_norm": 202.77438354492188, "learning_rate": 1.7003154574132493e-06, "loss": 25.4688, "step": 2695 }, { "epoch": 0.025520394543785085, "grad_norm": 598.4524536132812, "learning_rate": 1.7009463722397476e-06, "loss": 58.1875, "step": 2696 }, { "epoch": 0.025529860565500138, "grad_norm": 985.9261474609375, "learning_rate": 1.7015772870662458e-06, "loss": 81.9062, "step": 2697 }, { "epoch": 0.02553932658721519, "grad_norm": 318.88177490234375, "learning_rate": 1.7022082018927443e-06, "loss": 31.4219, "step": 2698 }, { "epoch": 0.025548792608930244, "grad_norm": 710.455322265625, "learning_rate": 1.7028391167192428e-06, "loss": 48.3594, "step": 2699 }, { "epoch": 0.0255582586306453, "grad_norm": 286.2360534667969, "learning_rate": 1.7034700315457413e-06, "loss": 26.4062, "step": 2700 }, { "epoch": 0.025567724652360353, "grad_norm": 1156.4337158203125, "learning_rate": 1.7041009463722398e-06, "loss": 70.1641, "step": 2701 }, { "epoch": 0.025577190674075406, "grad_norm": 323.4486083984375, "learning_rate": 1.704731861198738e-06, "loss": 39.9062, "step": 2702 }, { "epoch": 0.02558665669579046, "grad_norm": 323.2873229980469, "learning_rate": 1.7053627760252366e-06, "loss": 31.8438, "step": 2703 }, { "epoch": 0.025596122717505512, "grad_norm": 682.8297729492188, "learning_rate": 1.7059936908517349e-06, "loss": 68.875, "step": 2704 }, { "epoch": 0.02560558873922057, "grad_norm": 373.12109375, "learning_rate": 1.7066246056782334e-06, "loss": 49.6094, "step": 2705 }, { "epoch": 0.025615054760935622, "grad_norm": 618.4752197265625, "learning_rate": 1.7072555205047318e-06, "loss": 40.4062, "step": 2706 }, { "epoch": 0.025624520782650675, "grad_norm": 475.98345947265625, "learning_rate": 1.7078864353312301e-06, "loss": 39.7422, "step": 2707 }, { "epoch": 0.025633986804365728, "grad_norm": 290.0970153808594, "learning_rate": 1.7085173501577286e-06, "loss": 35.0, "step": 2708 }, { "epoch": 0.025643452826080784, "grad_norm": 844.6924438476562, "learning_rate": 1.7091482649842271e-06, "loss": 33.4922, "step": 2709 }, { "epoch": 0.025652918847795837, "grad_norm": 727.2000122070312, "learning_rate": 1.7097791798107254e-06, "loss": 60.6406, "step": 2710 }, { "epoch": 0.02566238486951089, "grad_norm": 572.1240844726562, "learning_rate": 1.7104100946372239e-06, "loss": 76.6953, "step": 2711 }, { "epoch": 0.025671850891225943, "grad_norm": 626.849853515625, "learning_rate": 1.7110410094637224e-06, "loss": 53.6484, "step": 2712 }, { "epoch": 0.025681316912941, "grad_norm": 400.2272644042969, "learning_rate": 1.7116719242902207e-06, "loss": 45.1406, "step": 2713 }, { "epoch": 0.025690782934656053, "grad_norm": 544.2955932617188, "learning_rate": 1.7123028391167192e-06, "loss": 64.5, "step": 2714 }, { "epoch": 0.025700248956371106, "grad_norm": 542.7023315429688, "learning_rate": 1.7129337539432176e-06, "loss": 34.8125, "step": 2715 }, { "epoch": 0.02570971497808616, "grad_norm": 243.9163360595703, "learning_rate": 1.7135646687697161e-06, "loss": 35.3281, "step": 2716 }, { "epoch": 0.025719180999801215, "grad_norm": 2.527618646621704, "learning_rate": 1.7141955835962144e-06, "loss": 0.9043, "step": 2717 }, { "epoch": 0.025728647021516268, "grad_norm": 2.8647079467773438, "learning_rate": 1.7148264984227127e-06, "loss": 0.7739, "step": 2718 }, { "epoch": 0.02573811304323132, "grad_norm": 1140.50537109375, "learning_rate": 1.7154574132492112e-06, "loss": 53.2031, "step": 2719 }, { "epoch": 0.025747579064946374, "grad_norm": 638.27978515625, "learning_rate": 1.7160883280757097e-06, "loss": 46.9688, "step": 2720 }, { "epoch": 0.025757045086661427, "grad_norm": 378.2288818359375, "learning_rate": 1.7167192429022082e-06, "loss": 35.25, "step": 2721 }, { "epoch": 0.025766511108376484, "grad_norm": 240.7015838623047, "learning_rate": 1.7173501577287067e-06, "loss": 26.5469, "step": 2722 }, { "epoch": 0.025775977130091537, "grad_norm": 355.5525207519531, "learning_rate": 1.7179810725552052e-06, "loss": 56.7344, "step": 2723 }, { "epoch": 0.02578544315180659, "grad_norm": 704.893310546875, "learning_rate": 1.7186119873817032e-06, "loss": 31.9844, "step": 2724 }, { "epoch": 0.025794909173521643, "grad_norm": 1154.3360595703125, "learning_rate": 1.7192429022082017e-06, "loss": 88.2969, "step": 2725 }, { "epoch": 0.0258043751952367, "grad_norm": 477.3113708496094, "learning_rate": 1.7198738170347002e-06, "loss": 45.9375, "step": 2726 }, { "epoch": 0.025813841216951752, "grad_norm": 306.5318603515625, "learning_rate": 1.7205047318611987e-06, "loss": 26.4844, "step": 2727 }, { "epoch": 0.025823307238666805, "grad_norm": 317.53155517578125, "learning_rate": 1.7211356466876972e-06, "loss": 31.1094, "step": 2728 }, { "epoch": 0.02583277326038186, "grad_norm": 650.7823486328125, "learning_rate": 1.7217665615141955e-06, "loss": 65.7812, "step": 2729 }, { "epoch": 0.025842239282096915, "grad_norm": 656.7516479492188, "learning_rate": 1.722397476340694e-06, "loss": 55.4219, "step": 2730 }, { "epoch": 0.025851705303811968, "grad_norm": 872.271240234375, "learning_rate": 1.7230283911671923e-06, "loss": 49.3047, "step": 2731 }, { "epoch": 0.02586117132552702, "grad_norm": 3.0492806434631348, "learning_rate": 1.7236593059936908e-06, "loss": 0.9639, "step": 2732 }, { "epoch": 0.025870637347242074, "grad_norm": 263.12451171875, "learning_rate": 1.7242902208201892e-06, "loss": 28.7656, "step": 2733 }, { "epoch": 0.025880103368957127, "grad_norm": 362.71343994140625, "learning_rate": 1.7249211356466875e-06, "loss": 37.875, "step": 2734 }, { "epoch": 0.025889569390672183, "grad_norm": 185.2198486328125, "learning_rate": 1.725552050473186e-06, "loss": 23.9219, "step": 2735 }, { "epoch": 0.025899035412387236, "grad_norm": 273.0008239746094, "learning_rate": 1.7261829652996845e-06, "loss": 26.2188, "step": 2736 }, { "epoch": 0.02590850143410229, "grad_norm": 525.3977661132812, "learning_rate": 1.7268138801261828e-06, "loss": 40.0312, "step": 2737 }, { "epoch": 0.025917967455817342, "grad_norm": 170.51141357421875, "learning_rate": 1.7274447949526813e-06, "loss": 25.25, "step": 2738 }, { "epoch": 0.0259274334775324, "grad_norm": 3.146193027496338, "learning_rate": 1.7280757097791798e-06, "loss": 0.9331, "step": 2739 }, { "epoch": 0.025936899499247452, "grad_norm": 3.0030317306518555, "learning_rate": 1.728706624605678e-06, "loss": 0.9316, "step": 2740 }, { "epoch": 0.025946365520962505, "grad_norm": 3.310807228088379, "learning_rate": 1.7293375394321766e-06, "loss": 0.73, "step": 2741 }, { "epoch": 0.025955831542677558, "grad_norm": 218.32090759277344, "learning_rate": 1.729968454258675e-06, "loss": 25.4062, "step": 2742 }, { "epoch": 0.025965297564392614, "grad_norm": 1039.82373046875, "learning_rate": 1.7305993690851735e-06, "loss": 43.6719, "step": 2743 }, { "epoch": 0.025974763586107667, "grad_norm": 478.22412109375, "learning_rate": 1.7312302839116718e-06, "loss": 32.4375, "step": 2744 }, { "epoch": 0.02598422960782272, "grad_norm": 550.2298583984375, "learning_rate": 1.73186119873817e-06, "loss": 34.2969, "step": 2745 }, { "epoch": 0.025993695629537773, "grad_norm": 546.6349487304688, "learning_rate": 1.7324921135646686e-06, "loss": 28.6172, "step": 2746 }, { "epoch": 0.026003161651252826, "grad_norm": 406.2030334472656, "learning_rate": 1.733123028391167e-06, "loss": 38.7969, "step": 2747 }, { "epoch": 0.026012627672967883, "grad_norm": 617.5592651367188, "learning_rate": 1.7337539432176656e-06, "loss": 47.3125, "step": 2748 }, { "epoch": 0.026022093694682936, "grad_norm": 945.5282592773438, "learning_rate": 1.734384858044164e-06, "loss": 33.7344, "step": 2749 }, { "epoch": 0.02603155971639799, "grad_norm": 799.9861450195312, "learning_rate": 1.7350157728706626e-06, "loss": 55.6875, "step": 2750 }, { "epoch": 0.026041025738113042, "grad_norm": 746.1524047851562, "learning_rate": 1.7356466876971606e-06, "loss": 55.6406, "step": 2751 }, { "epoch": 0.0260504917598281, "grad_norm": 529.6326904296875, "learning_rate": 1.7362776025236591e-06, "loss": 24.7656, "step": 2752 }, { "epoch": 0.02605995778154315, "grad_norm": 217.0042266845703, "learning_rate": 1.7369085173501576e-06, "loss": 25.7969, "step": 2753 }, { "epoch": 0.026069423803258204, "grad_norm": 585.0179443359375, "learning_rate": 1.7375394321766561e-06, "loss": 38.5, "step": 2754 }, { "epoch": 0.026078889824973257, "grad_norm": 3.275090217590332, "learning_rate": 1.7381703470031546e-06, "loss": 0.96, "step": 2755 }, { "epoch": 0.026088355846688314, "grad_norm": 979.0383911132812, "learning_rate": 1.7388012618296529e-06, "loss": 56.3359, "step": 2756 }, { "epoch": 0.026097821868403367, "grad_norm": 1065.514892578125, "learning_rate": 1.7394321766561512e-06, "loss": 40.7031, "step": 2757 }, { "epoch": 0.02610728789011842, "grad_norm": 482.52789306640625, "learning_rate": 1.7400630914826497e-06, "loss": 39.8594, "step": 2758 }, { "epoch": 0.026116753911833473, "grad_norm": 397.1623229980469, "learning_rate": 1.7406940063091482e-06, "loss": 27.0625, "step": 2759 }, { "epoch": 0.02612621993354853, "grad_norm": 3.2249345779418945, "learning_rate": 1.7413249211356467e-06, "loss": 1.0259, "step": 2760 }, { "epoch": 0.026135685955263582, "grad_norm": 203.3842315673828, "learning_rate": 1.7419558359621451e-06, "loss": 29.1406, "step": 2761 }, { "epoch": 0.026145151976978635, "grad_norm": 324.55291748046875, "learning_rate": 1.7425867507886434e-06, "loss": 36.7344, "step": 2762 }, { "epoch": 0.02615461799869369, "grad_norm": 901.9595947265625, "learning_rate": 1.743217665615142e-06, "loss": 40.1719, "step": 2763 }, { "epoch": 0.02616408402040874, "grad_norm": 409.9023742675781, "learning_rate": 1.7438485804416402e-06, "loss": 25.5156, "step": 2764 }, { "epoch": 0.026173550042123798, "grad_norm": 983.4058837890625, "learning_rate": 1.7444794952681387e-06, "loss": 60.75, "step": 2765 }, { "epoch": 0.02618301606383885, "grad_norm": 186.9498291015625, "learning_rate": 1.7451104100946372e-06, "loss": 30.5781, "step": 2766 }, { "epoch": 0.026192482085553904, "grad_norm": 645.4475708007812, "learning_rate": 1.7457413249211355e-06, "loss": 38.9844, "step": 2767 }, { "epoch": 0.026201948107268957, "grad_norm": 461.9501037597656, "learning_rate": 1.746372239747634e-06, "loss": 28.625, "step": 2768 }, { "epoch": 0.026211414128984013, "grad_norm": 591.8121948242188, "learning_rate": 1.7470031545741325e-06, "loss": 35.2031, "step": 2769 }, { "epoch": 0.026220880150699066, "grad_norm": 3.4063401222229004, "learning_rate": 1.747634069400631e-06, "loss": 1.0864, "step": 2770 }, { "epoch": 0.02623034617241412, "grad_norm": 227.680908203125, "learning_rate": 1.7482649842271292e-06, "loss": 34.2969, "step": 2771 }, { "epoch": 0.026239812194129172, "grad_norm": 223.0672149658203, "learning_rate": 1.7488958990536277e-06, "loss": 26.875, "step": 2772 }, { "epoch": 0.02624927821584423, "grad_norm": 270.0115661621094, "learning_rate": 1.749526813880126e-06, "loss": 28.9375, "step": 2773 }, { "epoch": 0.026258744237559282, "grad_norm": 242.89503479003906, "learning_rate": 1.7501577287066245e-06, "loss": 26.2969, "step": 2774 }, { "epoch": 0.026268210259274335, "grad_norm": 579.7394409179688, "learning_rate": 1.750788643533123e-06, "loss": 63.2812, "step": 2775 }, { "epoch": 0.026277676280989388, "grad_norm": 591.1427612304688, "learning_rate": 1.7514195583596215e-06, "loss": 48.9531, "step": 2776 }, { "epoch": 0.02628714230270444, "grad_norm": 1042.229736328125, "learning_rate": 1.75205047318612e-06, "loss": 59.0469, "step": 2777 }, { "epoch": 0.026296608324419497, "grad_norm": 181.74331665039062, "learning_rate": 1.752681388012618e-06, "loss": 27.75, "step": 2778 }, { "epoch": 0.02630607434613455, "grad_norm": 471.4330749511719, "learning_rate": 1.7533123028391165e-06, "loss": 28.3438, "step": 2779 }, { "epoch": 0.026315540367849603, "grad_norm": 238.8199462890625, "learning_rate": 1.753943217665615e-06, "loss": 26.8438, "step": 2780 }, { "epoch": 0.026325006389564656, "grad_norm": 2.891493797302246, "learning_rate": 1.7545741324921135e-06, "loss": 0.8516, "step": 2781 }, { "epoch": 0.026334472411279713, "grad_norm": 228.8212432861328, "learning_rate": 1.755205047318612e-06, "loss": 30.5312, "step": 2782 }, { "epoch": 0.026343938432994766, "grad_norm": 924.3455200195312, "learning_rate": 1.7558359621451105e-06, "loss": 61.6094, "step": 2783 }, { "epoch": 0.02635340445470982, "grad_norm": 387.9447937011719, "learning_rate": 1.7564668769716086e-06, "loss": 57.75, "step": 2784 }, { "epoch": 0.026362870476424872, "grad_norm": 416.6798400878906, "learning_rate": 1.757097791798107e-06, "loss": 37.8125, "step": 2785 }, { "epoch": 0.02637233649813993, "grad_norm": 223.91384887695312, "learning_rate": 1.7577287066246056e-06, "loss": 32.5, "step": 2786 }, { "epoch": 0.02638180251985498, "grad_norm": 437.2425537109375, "learning_rate": 1.758359621451104e-06, "loss": 38.8125, "step": 2787 }, { "epoch": 0.026391268541570034, "grad_norm": 522.0120239257812, "learning_rate": 1.7589905362776025e-06, "loss": 57.4844, "step": 2788 }, { "epoch": 0.026400734563285087, "grad_norm": 622.3343505859375, "learning_rate": 1.7596214511041008e-06, "loss": 54.5469, "step": 2789 }, { "epoch": 0.02641020058500014, "grad_norm": 217.47483825683594, "learning_rate": 1.7602523659305993e-06, "loss": 27.7344, "step": 2790 }, { "epoch": 0.026419666606715197, "grad_norm": 2.903043270111084, "learning_rate": 1.7608832807570976e-06, "loss": 1.0098, "step": 2791 }, { "epoch": 0.02642913262843025, "grad_norm": 434.9029541015625, "learning_rate": 1.761514195583596e-06, "loss": 31.8047, "step": 2792 }, { "epoch": 0.026438598650145303, "grad_norm": 446.3778381347656, "learning_rate": 1.7621451104100946e-06, "loss": 71.75, "step": 2793 }, { "epoch": 0.026448064671860356, "grad_norm": 379.9262390136719, "learning_rate": 1.762776025236593e-06, "loss": 36.0156, "step": 2794 }, { "epoch": 0.026457530693575412, "grad_norm": 1284.861083984375, "learning_rate": 1.7634069400630914e-06, "loss": 75.7578, "step": 2795 }, { "epoch": 0.026466996715290465, "grad_norm": 207.57882690429688, "learning_rate": 1.7640378548895899e-06, "loss": 37.9688, "step": 2796 }, { "epoch": 0.02647646273700552, "grad_norm": 564.1875610351562, "learning_rate": 1.7646687697160883e-06, "loss": 61.9062, "step": 2797 }, { "epoch": 0.02648592875872057, "grad_norm": 264.1395568847656, "learning_rate": 1.7652996845425866e-06, "loss": 34.5469, "step": 2798 }, { "epoch": 0.026495394780435628, "grad_norm": 771.4548950195312, "learning_rate": 1.7659305993690851e-06, "loss": 63.8125, "step": 2799 }, { "epoch": 0.02650486080215068, "grad_norm": 178.29049682617188, "learning_rate": 1.7665615141955834e-06, "loss": 26.7812, "step": 2800 }, { "epoch": 0.026514326823865734, "grad_norm": 749.8849487304688, "learning_rate": 1.7671924290220819e-06, "loss": 68.5312, "step": 2801 }, { "epoch": 0.026523792845580787, "grad_norm": 580.8681030273438, "learning_rate": 1.7678233438485804e-06, "loss": 53.0625, "step": 2802 }, { "epoch": 0.02653325886729584, "grad_norm": 288.089599609375, "learning_rate": 1.7684542586750789e-06, "loss": 36.3281, "step": 2803 }, { "epoch": 0.026542724889010896, "grad_norm": 316.9490661621094, "learning_rate": 1.7690851735015774e-06, "loss": 31.8906, "step": 2804 }, { "epoch": 0.02655219091072595, "grad_norm": 369.63287353515625, "learning_rate": 1.7697160883280757e-06, "loss": 25.1562, "step": 2805 }, { "epoch": 0.026561656932441002, "grad_norm": 258.46282958984375, "learning_rate": 1.770347003154574e-06, "loss": 25.8906, "step": 2806 }, { "epoch": 0.026571122954156055, "grad_norm": 515.8242797851562, "learning_rate": 1.7709779179810724e-06, "loss": 44.2031, "step": 2807 }, { "epoch": 0.026580588975871112, "grad_norm": 405.8348693847656, "learning_rate": 1.771608832807571e-06, "loss": 26.6094, "step": 2808 }, { "epoch": 0.026590054997586165, "grad_norm": 3.2274625301361084, "learning_rate": 1.7722397476340694e-06, "loss": 0.8809, "step": 2809 }, { "epoch": 0.026599521019301218, "grad_norm": 3.190586805343628, "learning_rate": 1.772870662460568e-06, "loss": 0.9146, "step": 2810 }, { "epoch": 0.02660898704101627, "grad_norm": 341.8644104003906, "learning_rate": 1.773501577287066e-06, "loss": 29.2344, "step": 2811 }, { "epoch": 0.026618453062731327, "grad_norm": 2.847303628921509, "learning_rate": 1.7741324921135645e-06, "loss": 0.7664, "step": 2812 }, { "epoch": 0.02662791908444638, "grad_norm": 531.3494262695312, "learning_rate": 1.774763406940063e-06, "loss": 33.2969, "step": 2813 }, { "epoch": 0.026637385106161433, "grad_norm": 467.3076477050781, "learning_rate": 1.7753943217665615e-06, "loss": 37.7812, "step": 2814 }, { "epoch": 0.026646851127876486, "grad_norm": 277.7179870605469, "learning_rate": 1.77602523659306e-06, "loss": 27.9375, "step": 2815 }, { "epoch": 0.026656317149591543, "grad_norm": 1118.260986328125, "learning_rate": 1.7766561514195584e-06, "loss": 96.125, "step": 2816 }, { "epoch": 0.026665783171306596, "grad_norm": 484.56427001953125, "learning_rate": 1.7772870662460567e-06, "loss": 31.6094, "step": 2817 }, { "epoch": 0.02667524919302165, "grad_norm": 735.9550170898438, "learning_rate": 1.777917981072555e-06, "loss": 30.6875, "step": 2818 }, { "epoch": 0.026684715214736702, "grad_norm": 340.3723449707031, "learning_rate": 1.7785488958990535e-06, "loss": 29.1406, "step": 2819 }, { "epoch": 0.026694181236451755, "grad_norm": 1080.3291015625, "learning_rate": 1.779179810725552e-06, "loss": 79.25, "step": 2820 }, { "epoch": 0.02670364725816681, "grad_norm": 230.18215942382812, "learning_rate": 1.7798107255520505e-06, "loss": 27.8438, "step": 2821 }, { "epoch": 0.026713113279881864, "grad_norm": 297.8983154296875, "learning_rate": 1.7804416403785488e-06, "loss": 16.8125, "step": 2822 }, { "epoch": 0.026722579301596917, "grad_norm": 212.73069763183594, "learning_rate": 1.7810725552050473e-06, "loss": 30.5938, "step": 2823 }, { "epoch": 0.02673204532331197, "grad_norm": 864.5977783203125, "learning_rate": 1.7817034700315457e-06, "loss": 41.9297, "step": 2824 }, { "epoch": 0.026741511345027027, "grad_norm": 411.2685546875, "learning_rate": 1.782334384858044e-06, "loss": 33.4375, "step": 2825 }, { "epoch": 0.02675097736674208, "grad_norm": 367.34368896484375, "learning_rate": 1.7829652996845425e-06, "loss": 35.3281, "step": 2826 }, { "epoch": 0.026760443388457133, "grad_norm": 304.8624572753906, "learning_rate": 1.783596214511041e-06, "loss": 30.0156, "step": 2827 }, { "epoch": 0.026769909410172186, "grad_norm": 510.8151550292969, "learning_rate": 1.7842271293375393e-06, "loss": 31.7188, "step": 2828 }, { "epoch": 0.026779375431887242, "grad_norm": 1009.8579711914062, "learning_rate": 1.7848580441640378e-06, "loss": 67.125, "step": 2829 }, { "epoch": 0.026788841453602295, "grad_norm": 995.3522338867188, "learning_rate": 1.7854889589905363e-06, "loss": 51.8281, "step": 2830 }, { "epoch": 0.02679830747531735, "grad_norm": 483.04833984375, "learning_rate": 1.7861198738170346e-06, "loss": 28.3125, "step": 2831 }, { "epoch": 0.0268077734970324, "grad_norm": 2.9699525833129883, "learning_rate": 1.786750788643533e-06, "loss": 0.7661, "step": 2832 }, { "epoch": 0.026817239518747454, "grad_norm": 333.9750061035156, "learning_rate": 1.7873817034700313e-06, "loss": 41.0938, "step": 2833 }, { "epoch": 0.02682670554046251, "grad_norm": 781.246826171875, "learning_rate": 1.7880126182965298e-06, "loss": 24.7969, "step": 2834 }, { "epoch": 0.026836171562177564, "grad_norm": 395.54901123046875, "learning_rate": 1.7886435331230283e-06, "loss": 34.0938, "step": 2835 }, { "epoch": 0.026845637583892617, "grad_norm": 434.2903747558594, "learning_rate": 1.7892744479495268e-06, "loss": 33.8672, "step": 2836 }, { "epoch": 0.02685510360560767, "grad_norm": 797.2318725585938, "learning_rate": 1.7899053627760253e-06, "loss": 76.6562, "step": 2837 }, { "epoch": 0.026864569627322726, "grad_norm": 235.50624084472656, "learning_rate": 1.7905362776025236e-06, "loss": 33.125, "step": 2838 }, { "epoch": 0.02687403564903778, "grad_norm": 841.7157592773438, "learning_rate": 1.7911671924290219e-06, "loss": 60.5938, "step": 2839 }, { "epoch": 0.026883501670752832, "grad_norm": 304.28167724609375, "learning_rate": 1.7917981072555204e-06, "loss": 29.9375, "step": 2840 }, { "epoch": 0.026892967692467885, "grad_norm": 1488.8602294921875, "learning_rate": 1.7924290220820189e-06, "loss": 92.9531, "step": 2841 }, { "epoch": 0.026902433714182942, "grad_norm": 407.4042053222656, "learning_rate": 1.7930599369085173e-06, "loss": 20.5938, "step": 2842 }, { "epoch": 0.026911899735897995, "grad_norm": 265.29425048828125, "learning_rate": 1.7936908517350158e-06, "loss": 33.2656, "step": 2843 }, { "epoch": 0.026921365757613048, "grad_norm": 1419.1766357421875, "learning_rate": 1.7943217665615141e-06, "loss": 94.4922, "step": 2844 }, { "epoch": 0.0269308317793281, "grad_norm": 432.6026306152344, "learning_rate": 1.7949526813880124e-06, "loss": 39.3125, "step": 2845 }, { "epoch": 0.026940297801043154, "grad_norm": 261.84100341796875, "learning_rate": 1.7955835962145109e-06, "loss": 29.9844, "step": 2846 }, { "epoch": 0.02694976382275821, "grad_norm": 2.5843558311462402, "learning_rate": 1.7962145110410094e-06, "loss": 0.8862, "step": 2847 }, { "epoch": 0.026959229844473263, "grad_norm": 426.1330261230469, "learning_rate": 1.7968454258675079e-06, "loss": 54.3438, "step": 2848 }, { "epoch": 0.026968695866188316, "grad_norm": 406.1197204589844, "learning_rate": 1.7974763406940064e-06, "loss": 31.6562, "step": 2849 }, { "epoch": 0.02697816188790337, "grad_norm": 1153.66845703125, "learning_rate": 1.7981072555205047e-06, "loss": 78.4531, "step": 2850 }, { "epoch": 0.026987627909618426, "grad_norm": 445.91510009765625, "learning_rate": 1.798738170347003e-06, "loss": 53.9375, "step": 2851 }, { "epoch": 0.02699709393133348, "grad_norm": 2.7544503211975098, "learning_rate": 1.7993690851735014e-06, "loss": 0.9282, "step": 2852 }, { "epoch": 0.027006559953048532, "grad_norm": 3.807955741882324, "learning_rate": 1.8e-06, "loss": 0.9839, "step": 2853 }, { "epoch": 0.027016025974763585, "grad_norm": 195.02525329589844, "learning_rate": 1.8006309148264984e-06, "loss": 28.1875, "step": 2854 }, { "epoch": 0.02702549199647864, "grad_norm": 699.3561401367188, "learning_rate": 1.8012618296529967e-06, "loss": 37.75, "step": 2855 }, { "epoch": 0.027034958018193694, "grad_norm": 536.2155151367188, "learning_rate": 1.8018927444794952e-06, "loss": 62.4688, "step": 2856 }, { "epoch": 0.027044424039908747, "grad_norm": 237.69003295898438, "learning_rate": 1.8025236593059937e-06, "loss": 24.875, "step": 2857 }, { "epoch": 0.0270538900616238, "grad_norm": 215.80499267578125, "learning_rate": 1.803154574132492e-06, "loss": 27.9531, "step": 2858 }, { "epoch": 0.027063356083338857, "grad_norm": 216.193359375, "learning_rate": 1.8037854889589905e-06, "loss": 24.3594, "step": 2859 }, { "epoch": 0.02707282210505391, "grad_norm": 429.9735412597656, "learning_rate": 1.804416403785489e-06, "loss": 28.4844, "step": 2860 }, { "epoch": 0.027082288126768963, "grad_norm": 1073.587158203125, "learning_rate": 1.8050473186119872e-06, "loss": 38.4531, "step": 2861 }, { "epoch": 0.027091754148484016, "grad_norm": 748.8652954101562, "learning_rate": 1.8056782334384857e-06, "loss": 36.4219, "step": 2862 }, { "epoch": 0.02710122017019907, "grad_norm": 675.3941650390625, "learning_rate": 1.8063091482649842e-06, "loss": 65.9062, "step": 2863 }, { "epoch": 0.027110686191914125, "grad_norm": 281.9307861328125, "learning_rate": 1.8069400630914827e-06, "loss": 31.625, "step": 2864 }, { "epoch": 0.02712015221362918, "grad_norm": 628.9131469726562, "learning_rate": 1.807570977917981e-06, "loss": 66.4688, "step": 2865 }, { "epoch": 0.02712961823534423, "grad_norm": 244.9151153564453, "learning_rate": 1.8082018927444793e-06, "loss": 29.9062, "step": 2866 }, { "epoch": 0.027139084257059284, "grad_norm": 245.7979736328125, "learning_rate": 1.8088328075709778e-06, "loss": 29.9219, "step": 2867 }, { "epoch": 0.02714855027877434, "grad_norm": 516.9104614257812, "learning_rate": 1.8094637223974763e-06, "loss": 47.8125, "step": 2868 }, { "epoch": 0.027158016300489394, "grad_norm": 777.917724609375, "learning_rate": 1.8100946372239747e-06, "loss": 49.5859, "step": 2869 }, { "epoch": 0.027167482322204447, "grad_norm": 752.7832641601562, "learning_rate": 1.8107255520504732e-06, "loss": 29.5547, "step": 2870 }, { "epoch": 0.0271769483439195, "grad_norm": 332.5315856933594, "learning_rate": 1.8113564668769717e-06, "loss": 33.1406, "step": 2871 }, { "epoch": 0.027186414365634556, "grad_norm": 2.7734451293945312, "learning_rate": 1.8119873817034698e-06, "loss": 0.9702, "step": 2872 }, { "epoch": 0.02719588038734961, "grad_norm": 913.514892578125, "learning_rate": 1.8126182965299683e-06, "loss": 70.6875, "step": 2873 }, { "epoch": 0.027205346409064662, "grad_norm": 236.07952880859375, "learning_rate": 1.8132492113564668e-06, "loss": 27.2031, "step": 2874 }, { "epoch": 0.027214812430779715, "grad_norm": 219.46685791015625, "learning_rate": 1.8138801261829653e-06, "loss": 24.0781, "step": 2875 }, { "epoch": 0.02722427845249477, "grad_norm": 544.4822387695312, "learning_rate": 1.8145110410094638e-06, "loss": 64.6875, "step": 2876 }, { "epoch": 0.027233744474209825, "grad_norm": 251.7190704345703, "learning_rate": 1.815141955835962e-06, "loss": 29.0156, "step": 2877 }, { "epoch": 0.027243210495924878, "grad_norm": 635.8859252929688, "learning_rate": 1.8157728706624603e-06, "loss": 69.0, "step": 2878 }, { "epoch": 0.02725267651763993, "grad_norm": 478.3950500488281, "learning_rate": 1.8164037854889588e-06, "loss": 61.9375, "step": 2879 }, { "epoch": 0.027262142539354984, "grad_norm": 153.2584991455078, "learning_rate": 1.8170347003154573e-06, "loss": 27.5156, "step": 2880 }, { "epoch": 0.02727160856107004, "grad_norm": 236.82737731933594, "learning_rate": 1.8176656151419558e-06, "loss": 26.5156, "step": 2881 }, { "epoch": 0.027281074582785093, "grad_norm": 978.193115234375, "learning_rate": 1.8182965299684543e-06, "loss": 55.875, "step": 2882 }, { "epoch": 0.027290540604500146, "grad_norm": 337.3212585449219, "learning_rate": 1.8189274447949526e-06, "loss": 41.5781, "step": 2883 }, { "epoch": 0.0273000066262152, "grad_norm": 253.72573852539062, "learning_rate": 1.819558359621451e-06, "loss": 31.9531, "step": 2884 }, { "epoch": 0.027309472647930256, "grad_norm": 707.2276000976562, "learning_rate": 1.8201892744479494e-06, "loss": 63.4531, "step": 2885 }, { "epoch": 0.02731893866964531, "grad_norm": 371.9125671386719, "learning_rate": 1.8208201892744479e-06, "loss": 28.9219, "step": 2886 }, { "epoch": 0.027328404691360362, "grad_norm": 273.8075256347656, "learning_rate": 1.8214511041009463e-06, "loss": 29.5625, "step": 2887 }, { "epoch": 0.027337870713075415, "grad_norm": 226.48712158203125, "learning_rate": 1.8220820189274446e-06, "loss": 26.7188, "step": 2888 }, { "epoch": 0.027347336734790468, "grad_norm": 619.0675659179688, "learning_rate": 1.8227129337539431e-06, "loss": 80.25, "step": 2889 }, { "epoch": 0.027356802756505524, "grad_norm": 349.6076965332031, "learning_rate": 1.8233438485804416e-06, "loss": 37.0625, "step": 2890 }, { "epoch": 0.027366268778220577, "grad_norm": 414.3895263671875, "learning_rate": 1.82397476340694e-06, "loss": 30.1719, "step": 2891 }, { "epoch": 0.02737573479993563, "grad_norm": 559.587890625, "learning_rate": 1.8246056782334384e-06, "loss": 35.0469, "step": 2892 }, { "epoch": 0.027385200821650683, "grad_norm": 226.46749877929688, "learning_rate": 1.8252365930599369e-06, "loss": 29.7812, "step": 2893 }, { "epoch": 0.02739466684336574, "grad_norm": 327.94012451171875, "learning_rate": 1.8258675078864352e-06, "loss": 30.4531, "step": 2894 }, { "epoch": 0.027404132865080793, "grad_norm": 335.7247009277344, "learning_rate": 1.8264984227129337e-06, "loss": 34.5469, "step": 2895 }, { "epoch": 0.027413598886795846, "grad_norm": 3.0588088035583496, "learning_rate": 1.8271293375394321e-06, "loss": 0.8994, "step": 2896 }, { "epoch": 0.0274230649085109, "grad_norm": 555.658447265625, "learning_rate": 1.8277602523659306e-06, "loss": 70.4375, "step": 2897 }, { "epoch": 0.027432530930225955, "grad_norm": 351.83941650390625, "learning_rate": 1.8283911671924291e-06, "loss": 28.9844, "step": 2898 }, { "epoch": 0.02744199695194101, "grad_norm": 296.3174743652344, "learning_rate": 1.8290220820189272e-06, "loss": 33.5781, "step": 2899 }, { "epoch": 0.02745146297365606, "grad_norm": 3.9454493522644043, "learning_rate": 1.8296529968454257e-06, "loss": 1.0605, "step": 2900 }, { "epoch": 0.027460928995371114, "grad_norm": 584.4071655273438, "learning_rate": 1.8302839116719242e-06, "loss": 54.2812, "step": 2901 }, { "epoch": 0.02747039501708617, "grad_norm": 263.21392822265625, "learning_rate": 1.8309148264984227e-06, "loss": 26.3203, "step": 2902 }, { "epoch": 0.027479861038801224, "grad_norm": 226.60963439941406, "learning_rate": 1.8315457413249212e-06, "loss": 28.9219, "step": 2903 }, { "epoch": 0.027489327060516277, "grad_norm": 266.7934875488281, "learning_rate": 1.8321766561514197e-06, "loss": 28.0938, "step": 2904 }, { "epoch": 0.02749879308223133, "grad_norm": 206.91920471191406, "learning_rate": 1.8328075709779177e-06, "loss": 27.125, "step": 2905 }, { "epoch": 0.027508259103946383, "grad_norm": 355.7350769042969, "learning_rate": 1.8334384858044162e-06, "loss": 32.4844, "step": 2906 }, { "epoch": 0.02751772512566144, "grad_norm": 579.0957641601562, "learning_rate": 1.8340694006309147e-06, "loss": 87.2188, "step": 2907 }, { "epoch": 0.027527191147376492, "grad_norm": 436.8077087402344, "learning_rate": 1.8347003154574132e-06, "loss": 54.5312, "step": 2908 }, { "epoch": 0.027536657169091545, "grad_norm": 3.432182550430298, "learning_rate": 1.8353312302839117e-06, "loss": 0.8867, "step": 2909 }, { "epoch": 0.0275461231908066, "grad_norm": 271.53558349609375, "learning_rate": 1.83596214511041e-06, "loss": 26.8281, "step": 2910 }, { "epoch": 0.027555589212521655, "grad_norm": 619.4029541015625, "learning_rate": 1.8365930599369085e-06, "loss": 35.9062, "step": 2911 }, { "epoch": 0.027565055234236708, "grad_norm": 3.172870397567749, "learning_rate": 1.8372239747634068e-06, "loss": 0.9033, "step": 2912 }, { "epoch": 0.02757452125595176, "grad_norm": 1512.4412841796875, "learning_rate": 1.8378548895899053e-06, "loss": 91.7812, "step": 2913 }, { "epoch": 0.027583987277666814, "grad_norm": 600.175048828125, "learning_rate": 1.8384858044164037e-06, "loss": 36.6602, "step": 2914 }, { "epoch": 0.02759345329938187, "grad_norm": 2.70405650138855, "learning_rate": 1.839116719242902e-06, "loss": 0.9741, "step": 2915 }, { "epoch": 0.027602919321096923, "grad_norm": 429.63360595703125, "learning_rate": 1.8397476340694005e-06, "loss": 45.7812, "step": 2916 }, { "epoch": 0.027612385342811976, "grad_norm": 312.9840393066406, "learning_rate": 1.840378548895899e-06, "loss": 31.0625, "step": 2917 }, { "epoch": 0.02762185136452703, "grad_norm": 343.0050964355469, "learning_rate": 1.8410094637223975e-06, "loss": 31.1406, "step": 2918 }, { "epoch": 0.027631317386242082, "grad_norm": 746.9326171875, "learning_rate": 1.8416403785488958e-06, "loss": 50.0938, "step": 2919 }, { "epoch": 0.02764078340795714, "grad_norm": 587.8279418945312, "learning_rate": 1.8422712933753943e-06, "loss": 38.9375, "step": 2920 }, { "epoch": 0.027650249429672192, "grad_norm": 818.1333618164062, "learning_rate": 1.8429022082018926e-06, "loss": 33.8828, "step": 2921 }, { "epoch": 0.027659715451387245, "grad_norm": 643.8295288085938, "learning_rate": 1.843533123028391e-06, "loss": 74.4375, "step": 2922 }, { "epoch": 0.027669181473102298, "grad_norm": 520.696533203125, "learning_rate": 1.8441640378548895e-06, "loss": 46.625, "step": 2923 }, { "epoch": 0.027678647494817354, "grad_norm": 3.6650547981262207, "learning_rate": 1.844794952681388e-06, "loss": 1.0493, "step": 2924 }, { "epoch": 0.027688113516532407, "grad_norm": 1017.4833984375, "learning_rate": 1.8454258675078863e-06, "loss": 56.0, "step": 2925 }, { "epoch": 0.02769757953824746, "grad_norm": 361.9676513671875, "learning_rate": 1.8460567823343846e-06, "loss": 33.625, "step": 2926 }, { "epoch": 0.027707045559962513, "grad_norm": 262.0277404785156, "learning_rate": 1.846687697160883e-06, "loss": 26.3906, "step": 2927 }, { "epoch": 0.02771651158167757, "grad_norm": 3.108132839202881, "learning_rate": 1.8473186119873816e-06, "loss": 0.8926, "step": 2928 }, { "epoch": 0.027725977603392623, "grad_norm": 957.115234375, "learning_rate": 1.84794952681388e-06, "loss": 60.3672, "step": 2929 }, { "epoch": 0.027735443625107676, "grad_norm": 491.28790283203125, "learning_rate": 1.8485804416403786e-06, "loss": 59.625, "step": 2930 }, { "epoch": 0.02774490964682273, "grad_norm": 3.061229705810547, "learning_rate": 1.849211356466877e-06, "loss": 0.875, "step": 2931 }, { "epoch": 0.027754375668537782, "grad_norm": 449.1922302246094, "learning_rate": 1.8498422712933751e-06, "loss": 45.25, "step": 2932 }, { "epoch": 0.02776384169025284, "grad_norm": 405.2156066894531, "learning_rate": 1.8504731861198736e-06, "loss": 27.0625, "step": 2933 }, { "epoch": 0.02777330771196789, "grad_norm": 254.56509399414062, "learning_rate": 1.8511041009463721e-06, "loss": 27.7109, "step": 2934 }, { "epoch": 0.027782773733682944, "grad_norm": 475.0409851074219, "learning_rate": 1.8517350157728706e-06, "loss": 49.2969, "step": 2935 }, { "epoch": 0.027792239755397997, "grad_norm": 2.91096568107605, "learning_rate": 1.852365930599369e-06, "loss": 0.8521, "step": 2936 }, { "epoch": 0.027801705777113054, "grad_norm": 625.6689453125, "learning_rate": 1.8529968454258674e-06, "loss": 26.6562, "step": 2937 }, { "epoch": 0.027811171798828107, "grad_norm": 593.9344482421875, "learning_rate": 1.8536277602523659e-06, "loss": 25.9375, "step": 2938 }, { "epoch": 0.02782063782054316, "grad_norm": 863.5092163085938, "learning_rate": 1.8542586750788642e-06, "loss": 54.0312, "step": 2939 }, { "epoch": 0.027830103842258213, "grad_norm": 3.1169614791870117, "learning_rate": 1.8548895899053627e-06, "loss": 0.8408, "step": 2940 }, { "epoch": 0.02783956986397327, "grad_norm": 1422.12939453125, "learning_rate": 1.8555205047318611e-06, "loss": 47.7578, "step": 2941 }, { "epoch": 0.027849035885688322, "grad_norm": 273.90093994140625, "learning_rate": 1.8561514195583596e-06, "loss": 25.8438, "step": 2942 }, { "epoch": 0.027858501907403375, "grad_norm": 215.62661743164062, "learning_rate": 1.856782334384858e-06, "loss": 26.4375, "step": 2943 }, { "epoch": 0.02786796792911843, "grad_norm": 3.064091444015503, "learning_rate": 1.8574132492113564e-06, "loss": 0.8662, "step": 2944 }, { "epoch": 0.027877433950833485, "grad_norm": 383.3791809082031, "learning_rate": 1.858044164037855e-06, "loss": 44.2266, "step": 2945 }, { "epoch": 0.027886899972548538, "grad_norm": 477.2187194824219, "learning_rate": 1.8586750788643532e-06, "loss": 31.1016, "step": 2946 }, { "epoch": 0.02789636599426359, "grad_norm": 1146.1290283203125, "learning_rate": 1.8593059936908517e-06, "loss": 90.8125, "step": 2947 }, { "epoch": 0.027905832015978644, "grad_norm": 615.4558715820312, "learning_rate": 1.85993690851735e-06, "loss": 31.4375, "step": 2948 }, { "epoch": 0.027915298037693697, "grad_norm": 201.60423278808594, "learning_rate": 1.8605678233438485e-06, "loss": 27.0781, "step": 2949 }, { "epoch": 0.027924764059408753, "grad_norm": 420.35284423828125, "learning_rate": 1.861198738170347e-06, "loss": 27.875, "step": 2950 }, { "epoch": 0.027934230081123806, "grad_norm": 371.0447998046875, "learning_rate": 1.8618296529968454e-06, "loss": 43.7344, "step": 2951 }, { "epoch": 0.02794369610283886, "grad_norm": 358.84716796875, "learning_rate": 1.8624605678233437e-06, "loss": 36.3281, "step": 2952 }, { "epoch": 0.027953162124553912, "grad_norm": 189.9296875, "learning_rate": 1.8630914826498422e-06, "loss": 24.7656, "step": 2953 }, { "epoch": 0.02796262814626897, "grad_norm": 755.2401733398438, "learning_rate": 1.8637223974763405e-06, "loss": 31.0781, "step": 2954 }, { "epoch": 0.027972094167984022, "grad_norm": 470.6728515625, "learning_rate": 1.864353312302839e-06, "loss": 47.8594, "step": 2955 }, { "epoch": 0.027981560189699075, "grad_norm": 568.0224609375, "learning_rate": 1.8649842271293375e-06, "loss": 30.3711, "step": 2956 }, { "epoch": 0.027991026211414128, "grad_norm": 242.3195343017578, "learning_rate": 1.865615141955836e-06, "loss": 28.1719, "step": 2957 }, { "epoch": 0.028000492233129184, "grad_norm": 3.082076072692871, "learning_rate": 1.8662460567823345e-06, "loss": 0.9028, "step": 2958 }, { "epoch": 0.028009958254844237, "grad_norm": 433.72271728515625, "learning_rate": 1.8668769716088325e-06, "loss": 31.8984, "step": 2959 }, { "epoch": 0.02801942427655929, "grad_norm": 2.7240304946899414, "learning_rate": 1.867507886435331e-06, "loss": 0.7896, "step": 2960 }, { "epoch": 0.028028890298274343, "grad_norm": 3.179124116897583, "learning_rate": 1.8681388012618295e-06, "loss": 1.0146, "step": 2961 }, { "epoch": 0.028038356319989396, "grad_norm": 462.6819152832031, "learning_rate": 1.868769716088328e-06, "loss": 26.4531, "step": 2962 }, { "epoch": 0.028047822341704453, "grad_norm": 3.368075370788574, "learning_rate": 1.8694006309148265e-06, "loss": 0.9277, "step": 2963 }, { "epoch": 0.028057288363419506, "grad_norm": 726.3485717773438, "learning_rate": 1.870031545741325e-06, "loss": 63.5469, "step": 2964 }, { "epoch": 0.02806675438513456, "grad_norm": 661.0130615234375, "learning_rate": 1.8706624605678233e-06, "loss": 67.2188, "step": 2965 }, { "epoch": 0.028076220406849612, "grad_norm": 492.2293395996094, "learning_rate": 1.8712933753943216e-06, "loss": 32.0078, "step": 2966 }, { "epoch": 0.02808568642856467, "grad_norm": 549.1591796875, "learning_rate": 1.87192429022082e-06, "loss": 49.4688, "step": 2967 }, { "epoch": 0.02809515245027972, "grad_norm": 166.63673400878906, "learning_rate": 1.8725552050473185e-06, "loss": 25.8281, "step": 2968 }, { "epoch": 0.028104618471994774, "grad_norm": 802.514404296875, "learning_rate": 1.873186119873817e-06, "loss": 26.6172, "step": 2969 }, { "epoch": 0.028114084493709827, "grad_norm": 189.3831024169922, "learning_rate": 1.8738170347003153e-06, "loss": 30.75, "step": 2970 }, { "epoch": 0.028123550515424884, "grad_norm": 181.5418701171875, "learning_rate": 1.8744479495268138e-06, "loss": 30.3594, "step": 2971 }, { "epoch": 0.028133016537139937, "grad_norm": 380.3318786621094, "learning_rate": 1.875078864353312e-06, "loss": 35.2812, "step": 2972 }, { "epoch": 0.02814248255885499, "grad_norm": 1136.01806640625, "learning_rate": 1.8757097791798106e-06, "loss": 67.2344, "step": 2973 }, { "epoch": 0.028151948580570043, "grad_norm": 606.4057006835938, "learning_rate": 1.876340694006309e-06, "loss": 32.7031, "step": 2974 }, { "epoch": 0.028161414602285096, "grad_norm": 292.56011962890625, "learning_rate": 1.8769716088328076e-06, "loss": 39.0625, "step": 2975 }, { "epoch": 0.028170880624000152, "grad_norm": 304.9939270019531, "learning_rate": 1.8776025236593059e-06, "loss": 33.6562, "step": 2976 }, { "epoch": 0.028180346645715205, "grad_norm": 245.83876037597656, "learning_rate": 1.8782334384858043e-06, "loss": 26.875, "step": 2977 }, { "epoch": 0.02818981266743026, "grad_norm": 437.8835144042969, "learning_rate": 1.8788643533123028e-06, "loss": 35.1875, "step": 2978 }, { "epoch": 0.02819927868914531, "grad_norm": 394.51348876953125, "learning_rate": 1.8794952681388011e-06, "loss": 30.1875, "step": 2979 }, { "epoch": 0.028208744710860368, "grad_norm": 377.5068054199219, "learning_rate": 1.8801261829652996e-06, "loss": 29.7812, "step": 2980 }, { "epoch": 0.02821821073257542, "grad_norm": 235.54354858398438, "learning_rate": 1.880757097791798e-06, "loss": 26.125, "step": 2981 }, { "epoch": 0.028227676754290474, "grad_norm": 595.423828125, "learning_rate": 1.8813880126182964e-06, "loss": 40.6484, "step": 2982 }, { "epoch": 0.028237142776005527, "grad_norm": 830.7849731445312, "learning_rate": 1.8820189274447949e-06, "loss": 41.2891, "step": 2983 }, { "epoch": 0.028246608797720583, "grad_norm": 216.68862915039062, "learning_rate": 1.8826498422712934e-06, "loss": 30.125, "step": 2984 }, { "epoch": 0.028256074819435636, "grad_norm": 201.319091796875, "learning_rate": 1.8832807570977919e-06, "loss": 25.5625, "step": 2985 }, { "epoch": 0.02826554084115069, "grad_norm": 896.2003784179688, "learning_rate": 1.8839116719242901e-06, "loss": 78.9531, "step": 2986 }, { "epoch": 0.028275006862865742, "grad_norm": 213.08804321289062, "learning_rate": 1.8845425867507884e-06, "loss": 28.1562, "step": 2987 }, { "epoch": 0.028284472884580796, "grad_norm": 618.325439453125, "learning_rate": 1.885173501577287e-06, "loss": 32.7344, "step": 2988 }, { "epoch": 0.028293938906295852, "grad_norm": 1431.76611328125, "learning_rate": 1.8858044164037854e-06, "loss": 62.0781, "step": 2989 }, { "epoch": 0.028303404928010905, "grad_norm": 1140.7257080078125, "learning_rate": 1.886435331230284e-06, "loss": 59.375, "step": 2990 }, { "epoch": 0.028312870949725958, "grad_norm": 856.966796875, "learning_rate": 1.8870662460567824e-06, "loss": 66.3906, "step": 2991 }, { "epoch": 0.02832233697144101, "grad_norm": 1277.63037109375, "learning_rate": 1.8876971608832805e-06, "loss": 64.2344, "step": 2992 }, { "epoch": 0.028331802993156067, "grad_norm": 384.1257019042969, "learning_rate": 1.888328075709779e-06, "loss": 31.9375, "step": 2993 }, { "epoch": 0.02834126901487112, "grad_norm": 1344.2340087890625, "learning_rate": 1.8889589905362775e-06, "loss": 59.7188, "step": 2994 }, { "epoch": 0.028350735036586173, "grad_norm": 3.85440731048584, "learning_rate": 1.889589905362776e-06, "loss": 0.9976, "step": 2995 }, { "epoch": 0.028360201058301227, "grad_norm": 471.2390441894531, "learning_rate": 1.8902208201892744e-06, "loss": 24.3828, "step": 2996 }, { "epoch": 0.028369667080016283, "grad_norm": 173.5531768798828, "learning_rate": 1.890851735015773e-06, "loss": 28.6875, "step": 2997 }, { "epoch": 0.028379133101731336, "grad_norm": 583.5234375, "learning_rate": 1.8914826498422712e-06, "loss": 82.5, "step": 2998 }, { "epoch": 0.02838859912344639, "grad_norm": 195.54908752441406, "learning_rate": 1.8921135646687695e-06, "loss": 26.8594, "step": 2999 }, { "epoch": 0.028398065145161442, "grad_norm": 577.6681518554688, "learning_rate": 1.892744479495268e-06, "loss": 58.7109, "step": 3000 }, { "epoch": 0.0284075311668765, "grad_norm": 733.3356323242188, "learning_rate": 1.8933753943217665e-06, "loss": 52.4609, "step": 3001 }, { "epoch": 0.02841699718859155, "grad_norm": 240.54437255859375, "learning_rate": 1.894006309148265e-06, "loss": 28.5, "step": 3002 }, { "epoch": 0.028426463210306605, "grad_norm": 422.3265380859375, "learning_rate": 1.8946372239747633e-06, "loss": 28.8281, "step": 3003 }, { "epoch": 0.028435929232021658, "grad_norm": 377.0937805175781, "learning_rate": 1.8952681388012617e-06, "loss": 23.2812, "step": 3004 }, { "epoch": 0.02844539525373671, "grad_norm": 814.57763671875, "learning_rate": 1.8958990536277602e-06, "loss": 63.2812, "step": 3005 }, { "epoch": 0.028454861275451767, "grad_norm": 254.05279541015625, "learning_rate": 1.8965299684542585e-06, "loss": 28.7344, "step": 3006 }, { "epoch": 0.02846432729716682, "grad_norm": 603.8756103515625, "learning_rate": 1.897160883280757e-06, "loss": 58.0469, "step": 3007 }, { "epoch": 0.028473793318881873, "grad_norm": 396.6353759765625, "learning_rate": 1.8977917981072555e-06, "loss": 61.0625, "step": 3008 }, { "epoch": 0.028483259340596926, "grad_norm": 161.9495391845703, "learning_rate": 1.8984227129337538e-06, "loss": 33.1406, "step": 3009 }, { "epoch": 0.028492725362311982, "grad_norm": 301.6107482910156, "learning_rate": 1.8990536277602523e-06, "loss": 29.7344, "step": 3010 }, { "epoch": 0.028502191384027036, "grad_norm": 715.6193237304688, "learning_rate": 1.8996845425867508e-06, "loss": 58.8125, "step": 3011 }, { "epoch": 0.02851165740574209, "grad_norm": 612.27978515625, "learning_rate": 1.9003154574132493e-06, "loss": 32.6875, "step": 3012 }, { "epoch": 0.02852112342745714, "grad_norm": 316.9184875488281, "learning_rate": 1.9009463722397475e-06, "loss": 31.1875, "step": 3013 }, { "epoch": 0.028530589449172198, "grad_norm": 301.16070556640625, "learning_rate": 1.9015772870662458e-06, "loss": 26.8438, "step": 3014 }, { "epoch": 0.02854005547088725, "grad_norm": 415.5513916015625, "learning_rate": 1.9022082018927443e-06, "loss": 48.7344, "step": 3015 }, { "epoch": 0.028549521492602304, "grad_norm": 495.078857421875, "learning_rate": 1.9028391167192428e-06, "loss": 38.0625, "step": 3016 }, { "epoch": 0.028558987514317357, "grad_norm": 529.0376586914062, "learning_rate": 1.9034700315457413e-06, "loss": 64.3438, "step": 3017 }, { "epoch": 0.02856845353603241, "grad_norm": 764.2074584960938, "learning_rate": 1.9041009463722398e-06, "loss": 31.5, "step": 3018 }, { "epoch": 0.028577919557747467, "grad_norm": 186.86245727539062, "learning_rate": 1.9047318611987383e-06, "loss": 34.25, "step": 3019 }, { "epoch": 0.02858738557946252, "grad_norm": 170.32614135742188, "learning_rate": 1.9053627760252364e-06, "loss": 26.0625, "step": 3020 }, { "epoch": 0.028596851601177573, "grad_norm": 176.12733459472656, "learning_rate": 1.9059936908517349e-06, "loss": 25.8438, "step": 3021 }, { "epoch": 0.028606317622892626, "grad_norm": 267.1184997558594, "learning_rate": 1.9066246056782333e-06, "loss": 26.0938, "step": 3022 }, { "epoch": 0.028615783644607682, "grad_norm": 1719.5550537109375, "learning_rate": 1.9072555205047318e-06, "loss": 59.2188, "step": 3023 }, { "epoch": 0.028625249666322735, "grad_norm": 551.0123291015625, "learning_rate": 1.90788643533123e-06, "loss": 31.5938, "step": 3024 }, { "epoch": 0.028634715688037788, "grad_norm": 411.9028625488281, "learning_rate": 1.9085173501577286e-06, "loss": 60.875, "step": 3025 }, { "epoch": 0.02864418170975284, "grad_norm": 307.43927001953125, "learning_rate": 1.909148264984227e-06, "loss": 30.4062, "step": 3026 }, { "epoch": 0.028653647731467898, "grad_norm": 3.6146726608276367, "learning_rate": 1.9097791798107256e-06, "loss": 1.0459, "step": 3027 }, { "epoch": 0.02866311375318295, "grad_norm": 768.6837158203125, "learning_rate": 1.910410094637224e-06, "loss": 58.2031, "step": 3028 }, { "epoch": 0.028672579774898004, "grad_norm": 376.70452880859375, "learning_rate": 1.9110410094637226e-06, "loss": 34.5781, "step": 3029 }, { "epoch": 0.028682045796613057, "grad_norm": 289.410400390625, "learning_rate": 1.9116719242902207e-06, "loss": 29.3281, "step": 3030 }, { "epoch": 0.02869151181832811, "grad_norm": 405.9792175292969, "learning_rate": 1.912302839116719e-06, "loss": 29.6562, "step": 3031 }, { "epoch": 0.028700977840043166, "grad_norm": 963.8055419921875, "learning_rate": 1.9129337539432176e-06, "loss": 71.1172, "step": 3032 }, { "epoch": 0.02871044386175822, "grad_norm": 954.6480102539062, "learning_rate": 1.913564668769716e-06, "loss": 62.3281, "step": 3033 }, { "epoch": 0.028719909883473272, "grad_norm": 313.32989501953125, "learning_rate": 1.9141955835962146e-06, "loss": 30.9688, "step": 3034 }, { "epoch": 0.028729375905188325, "grad_norm": 692.7454833984375, "learning_rate": 1.914826498422713e-06, "loss": 49.0703, "step": 3035 }, { "epoch": 0.02873884192690338, "grad_norm": 206.32167053222656, "learning_rate": 1.915457413249211e-06, "loss": 26.8438, "step": 3036 }, { "epoch": 0.028748307948618435, "grad_norm": 345.513671875, "learning_rate": 1.9160883280757097e-06, "loss": 32.4844, "step": 3037 }, { "epoch": 0.028757773970333488, "grad_norm": 354.3600158691406, "learning_rate": 1.916719242902208e-06, "loss": 31.8438, "step": 3038 }, { "epoch": 0.02876723999204854, "grad_norm": 1350.3837890625, "learning_rate": 1.9173501577287067e-06, "loss": 38.8438, "step": 3039 }, { "epoch": 0.028776706013763597, "grad_norm": 516.7263793945312, "learning_rate": 1.917981072555205e-06, "loss": 33.5625, "step": 3040 }, { "epoch": 0.02878617203547865, "grad_norm": 313.1994934082031, "learning_rate": 1.9186119873817037e-06, "loss": 22.9062, "step": 3041 }, { "epoch": 0.028795638057193703, "grad_norm": 358.66943359375, "learning_rate": 1.9192429022082017e-06, "loss": 47.5938, "step": 3042 }, { "epoch": 0.028805104078908756, "grad_norm": 423.15789794921875, "learning_rate": 1.9198738170347002e-06, "loss": 56.2969, "step": 3043 }, { "epoch": 0.028814570100623813, "grad_norm": 3.065624237060547, "learning_rate": 1.9205047318611987e-06, "loss": 0.7681, "step": 3044 }, { "epoch": 0.028824036122338866, "grad_norm": 242.20101928710938, "learning_rate": 1.921135646687697e-06, "loss": 29.1094, "step": 3045 }, { "epoch": 0.02883350214405392, "grad_norm": 625.7954711914062, "learning_rate": 1.9217665615141957e-06, "loss": 34.3281, "step": 3046 }, { "epoch": 0.02884296816576897, "grad_norm": 371.7037658691406, "learning_rate": 1.9223974763406938e-06, "loss": 35.6719, "step": 3047 }, { "epoch": 0.028852434187484025, "grad_norm": 3.605592966079712, "learning_rate": 1.9230283911671923e-06, "loss": 1.0073, "step": 3048 }, { "epoch": 0.02886190020919908, "grad_norm": 340.3345947265625, "learning_rate": 1.9236593059936907e-06, "loss": 38.25, "step": 3049 }, { "epoch": 0.028871366230914134, "grad_norm": 430.219970703125, "learning_rate": 1.9242902208201892e-06, "loss": 59.1719, "step": 3050 }, { "epoch": 0.028880832252629187, "grad_norm": 212.24609375, "learning_rate": 1.9249211356466877e-06, "loss": 29.25, "step": 3051 }, { "epoch": 0.02889029827434424, "grad_norm": 523.6144409179688, "learning_rate": 1.9255520504731862e-06, "loss": 44.7969, "step": 3052 }, { "epoch": 0.028899764296059297, "grad_norm": 462.2934875488281, "learning_rate": 1.9261829652996843e-06, "loss": 52.25, "step": 3053 }, { "epoch": 0.02890923031777435, "grad_norm": 510.462646484375, "learning_rate": 1.926813880126183e-06, "loss": 27.5312, "step": 3054 }, { "epoch": 0.028918696339489403, "grad_norm": 534.5834350585938, "learning_rate": 1.9274447949526813e-06, "loss": 62.4688, "step": 3055 }, { "epoch": 0.028928162361204456, "grad_norm": 594.3785400390625, "learning_rate": 1.9280757097791798e-06, "loss": 33.0, "step": 3056 }, { "epoch": 0.028937628382919512, "grad_norm": 465.9027099609375, "learning_rate": 1.9287066246056783e-06, "loss": 40.4219, "step": 3057 }, { "epoch": 0.028947094404634565, "grad_norm": 263.90911865234375, "learning_rate": 1.9293375394321763e-06, "loss": 25.4062, "step": 3058 }, { "epoch": 0.028956560426349618, "grad_norm": 859.252197265625, "learning_rate": 1.929968454258675e-06, "loss": 56.3203, "step": 3059 }, { "epoch": 0.02896602644806467, "grad_norm": 355.9149475097656, "learning_rate": 1.9305993690851733e-06, "loss": 27.9531, "step": 3060 }, { "epoch": 0.028975492469779724, "grad_norm": 724.5003051757812, "learning_rate": 1.931230283911672e-06, "loss": 57.6875, "step": 3061 }, { "epoch": 0.02898495849149478, "grad_norm": 632.14208984375, "learning_rate": 1.9318611987381703e-06, "loss": 35.2969, "step": 3062 }, { "epoch": 0.028994424513209834, "grad_norm": 907.6917724609375, "learning_rate": 1.932492113564669e-06, "loss": 50.9766, "step": 3063 }, { "epoch": 0.029003890534924887, "grad_norm": 574.6113891601562, "learning_rate": 1.933123028391167e-06, "loss": 21.7891, "step": 3064 }, { "epoch": 0.02901335655663994, "grad_norm": 482.6669616699219, "learning_rate": 1.9337539432176654e-06, "loss": 35.1406, "step": 3065 }, { "epoch": 0.029022822578354996, "grad_norm": 373.1529541015625, "learning_rate": 1.934384858044164e-06, "loss": 36.1719, "step": 3066 }, { "epoch": 0.02903228860007005, "grad_norm": 532.2530517578125, "learning_rate": 1.9350157728706623e-06, "loss": 67.75, "step": 3067 }, { "epoch": 0.029041754621785102, "grad_norm": 552.7474365234375, "learning_rate": 1.935646687697161e-06, "loss": 31.1719, "step": 3068 }, { "epoch": 0.029051220643500155, "grad_norm": 644.087890625, "learning_rate": 1.9362776025236593e-06, "loss": 53.1797, "step": 3069 }, { "epoch": 0.02906068666521521, "grad_norm": 596.1608276367188, "learning_rate": 1.9369085173501574e-06, "loss": 77.2812, "step": 3070 }, { "epoch": 0.029070152686930265, "grad_norm": 194.372314453125, "learning_rate": 1.937539432176656e-06, "loss": 25.4062, "step": 3071 }, { "epoch": 0.029079618708645318, "grad_norm": 842.5868530273438, "learning_rate": 1.9381703470031544e-06, "loss": 63.3828, "step": 3072 }, { "epoch": 0.02908908473036037, "grad_norm": 592.0133056640625, "learning_rate": 1.938801261829653e-06, "loss": 46.6562, "step": 3073 }, { "epoch": 0.029098550752075424, "grad_norm": 770.9590454101562, "learning_rate": 1.9394321766561514e-06, "loss": 30.0312, "step": 3074 }, { "epoch": 0.02910801677379048, "grad_norm": 249.2825927734375, "learning_rate": 1.94006309148265e-06, "loss": 29.125, "step": 3075 }, { "epoch": 0.029117482795505533, "grad_norm": 620.82373046875, "learning_rate": 1.940694006309148e-06, "loss": 67.9531, "step": 3076 }, { "epoch": 0.029126948817220586, "grad_norm": 2.656998634338379, "learning_rate": 1.9413249211356464e-06, "loss": 0.7539, "step": 3077 }, { "epoch": 0.02913641483893564, "grad_norm": 529.2372436523438, "learning_rate": 1.941955835962145e-06, "loss": 58.3594, "step": 3078 }, { "epoch": 0.029145880860650696, "grad_norm": 785.6359252929688, "learning_rate": 1.9425867507886434e-06, "loss": 49.0547, "step": 3079 }, { "epoch": 0.02915534688236575, "grad_norm": 211.0615997314453, "learning_rate": 1.943217665615142e-06, "loss": 28.0781, "step": 3080 }, { "epoch": 0.0291648129040808, "grad_norm": 555.6659545898438, "learning_rate": 1.9438485804416404e-06, "loss": 30.0156, "step": 3081 }, { "epoch": 0.029174278925795855, "grad_norm": 342.466796875, "learning_rate": 1.944479495268139e-06, "loss": 44.625, "step": 3082 }, { "epoch": 0.02918374494751091, "grad_norm": 298.42620849609375, "learning_rate": 1.945110410094637e-06, "loss": 26.7812, "step": 3083 }, { "epoch": 0.029193210969225964, "grad_norm": 678.5958251953125, "learning_rate": 1.9457413249211355e-06, "loss": 74.6094, "step": 3084 }, { "epoch": 0.029202676990941017, "grad_norm": 535.227783203125, "learning_rate": 1.946372239747634e-06, "loss": 56.6562, "step": 3085 }, { "epoch": 0.02921214301265607, "grad_norm": 723.6743774414062, "learning_rate": 1.9470031545741324e-06, "loss": 70.9375, "step": 3086 }, { "epoch": 0.029221609034371127, "grad_norm": 280.4714660644531, "learning_rate": 1.947634069400631e-06, "loss": 27.6875, "step": 3087 }, { "epoch": 0.02923107505608618, "grad_norm": 439.0805969238281, "learning_rate": 1.9482649842271294e-06, "loss": 59.1562, "step": 3088 }, { "epoch": 0.029240541077801233, "grad_norm": 978.1036987304688, "learning_rate": 1.948895899053628e-06, "loss": 70.375, "step": 3089 }, { "epoch": 0.029250007099516286, "grad_norm": 640.5734252929688, "learning_rate": 1.949526813880126e-06, "loss": 64.9922, "step": 3090 }, { "epoch": 0.02925947312123134, "grad_norm": 164.64476013183594, "learning_rate": 1.9501577287066245e-06, "loss": 30.625, "step": 3091 }, { "epoch": 0.029268939142946395, "grad_norm": 532.6482543945312, "learning_rate": 1.950788643533123e-06, "loss": 26.0391, "step": 3092 }, { "epoch": 0.029278405164661448, "grad_norm": 310.9605712890625, "learning_rate": 1.9514195583596215e-06, "loss": 28.0938, "step": 3093 }, { "epoch": 0.0292878711863765, "grad_norm": 580.2094116210938, "learning_rate": 1.95205047318612e-06, "loss": 33.4219, "step": 3094 }, { "epoch": 0.029297337208091554, "grad_norm": 336.94915771484375, "learning_rate": 1.9526813880126185e-06, "loss": 41.0625, "step": 3095 }, { "epoch": 0.02930680322980661, "grad_norm": 398.5977478027344, "learning_rate": 1.9533123028391165e-06, "loss": 51.2188, "step": 3096 }, { "epoch": 0.029316269251521664, "grad_norm": 601.50634765625, "learning_rate": 1.953943217665615e-06, "loss": 59.875, "step": 3097 }, { "epoch": 0.029325735273236717, "grad_norm": 600.732666015625, "learning_rate": 1.9545741324921135e-06, "loss": 38.8516, "step": 3098 }, { "epoch": 0.02933520129495177, "grad_norm": 371.4056396484375, "learning_rate": 1.955205047318612e-06, "loss": 33.1562, "step": 3099 }, { "epoch": 0.029344667316666826, "grad_norm": 214.05520629882812, "learning_rate": 1.9558359621451105e-06, "loss": 21.8828, "step": 3100 }, { "epoch": 0.02935413333838188, "grad_norm": 279.1535339355469, "learning_rate": 1.956466876971609e-06, "loss": 29.5156, "step": 3101 }, { "epoch": 0.029363599360096932, "grad_norm": 578.9552612304688, "learning_rate": 1.957097791798107e-06, "loss": 65.875, "step": 3102 }, { "epoch": 0.029373065381811985, "grad_norm": 788.7144165039062, "learning_rate": 1.9577287066246055e-06, "loss": 31.9609, "step": 3103 }, { "epoch": 0.029382531403527038, "grad_norm": 401.9947204589844, "learning_rate": 1.958359621451104e-06, "loss": 29.25, "step": 3104 }, { "epoch": 0.029391997425242095, "grad_norm": 2.8628127574920654, "learning_rate": 1.9589905362776025e-06, "loss": 0.8906, "step": 3105 }, { "epoch": 0.029401463446957148, "grad_norm": 599.5540161132812, "learning_rate": 1.959621451104101e-06, "loss": 29.2734, "step": 3106 }, { "epoch": 0.0294109294686722, "grad_norm": 581.6146240234375, "learning_rate": 1.960252365930599e-06, "loss": 43.6406, "step": 3107 }, { "epoch": 0.029420395490387254, "grad_norm": 577.175537109375, "learning_rate": 1.9608832807570976e-06, "loss": 45.8281, "step": 3108 }, { "epoch": 0.02942986151210231, "grad_norm": 510.3935852050781, "learning_rate": 1.961514195583596e-06, "loss": 35.6562, "step": 3109 }, { "epoch": 0.029439327533817363, "grad_norm": 477.02215576171875, "learning_rate": 1.9621451104100946e-06, "loss": 27.6094, "step": 3110 }, { "epoch": 0.029448793555532416, "grad_norm": 3.247584342956543, "learning_rate": 1.962776025236593e-06, "loss": 0.8472, "step": 3111 }, { "epoch": 0.02945825957724747, "grad_norm": 354.92413330078125, "learning_rate": 1.9634069400630916e-06, "loss": 30.7891, "step": 3112 }, { "epoch": 0.029467725598962526, "grad_norm": 1059.577392578125, "learning_rate": 1.9640378548895896e-06, "loss": 66.8906, "step": 3113 }, { "epoch": 0.02947719162067758, "grad_norm": 556.3794555664062, "learning_rate": 1.964668769716088e-06, "loss": 25.6562, "step": 3114 }, { "epoch": 0.02948665764239263, "grad_norm": 199.82968139648438, "learning_rate": 1.9652996845425866e-06, "loss": 28.6406, "step": 3115 }, { "epoch": 0.029496123664107685, "grad_norm": 712.4205932617188, "learning_rate": 1.965930599369085e-06, "loss": 42.6094, "step": 3116 }, { "epoch": 0.029505589685822738, "grad_norm": 472.3949279785156, "learning_rate": 1.9665615141955836e-06, "loss": 39.2031, "step": 3117 }, { "epoch": 0.029515055707537794, "grad_norm": 273.7834167480469, "learning_rate": 1.9671924290220817e-06, "loss": 31.2969, "step": 3118 }, { "epoch": 0.029524521729252847, "grad_norm": 2.4284160137176514, "learning_rate": 1.96782334384858e-06, "loss": 0.7749, "step": 3119 }, { "epoch": 0.0295339877509679, "grad_norm": 490.3192138671875, "learning_rate": 1.9684542586750787e-06, "loss": 37.4688, "step": 3120 }, { "epoch": 0.029543453772682953, "grad_norm": 901.8837890625, "learning_rate": 1.969085173501577e-06, "loss": 54.75, "step": 3121 }, { "epoch": 0.02955291979439801, "grad_norm": 598.9456176757812, "learning_rate": 1.9697160883280756e-06, "loss": 53.375, "step": 3122 }, { "epoch": 0.029562385816113063, "grad_norm": 237.89077758789062, "learning_rate": 1.970347003154574e-06, "loss": 29.6406, "step": 3123 }, { "epoch": 0.029571851837828116, "grad_norm": 1173.3260498046875, "learning_rate": 1.970977917981072e-06, "loss": 65.2891, "step": 3124 }, { "epoch": 0.02958131785954317, "grad_norm": 488.6764221191406, "learning_rate": 1.9716088328075707e-06, "loss": 37.7031, "step": 3125 }, { "epoch": 0.029590783881258225, "grad_norm": 2.9530367851257324, "learning_rate": 1.972239747634069e-06, "loss": 0.9192, "step": 3126 }, { "epoch": 0.029600249902973278, "grad_norm": 543.4263305664062, "learning_rate": 1.9728706624605677e-06, "loss": 43.1719, "step": 3127 }, { "epoch": 0.02960971592468833, "grad_norm": 899.5927734375, "learning_rate": 1.973501577287066e-06, "loss": 41.7031, "step": 3128 }, { "epoch": 0.029619181946403384, "grad_norm": 284.1377868652344, "learning_rate": 1.9741324921135647e-06, "loss": 28.625, "step": 3129 }, { "epoch": 0.02962864796811844, "grad_norm": 577.85205078125, "learning_rate": 1.9747634069400627e-06, "loss": 44.4531, "step": 3130 }, { "epoch": 0.029638113989833494, "grad_norm": 891.6448974609375, "learning_rate": 1.9753943217665612e-06, "loss": 38.6719, "step": 3131 }, { "epoch": 0.029647580011548547, "grad_norm": 616.3634643554688, "learning_rate": 1.9760252365930597e-06, "loss": 36.3906, "step": 3132 }, { "epoch": 0.0296570460332636, "grad_norm": 500.46337890625, "learning_rate": 1.9766561514195582e-06, "loss": 26.4844, "step": 3133 }, { "epoch": 0.029666512054978653, "grad_norm": 853.7125244140625, "learning_rate": 1.9772870662460567e-06, "loss": 51.3281, "step": 3134 }, { "epoch": 0.02967597807669371, "grad_norm": 1225.314697265625, "learning_rate": 1.977917981072555e-06, "loss": 58.2422, "step": 3135 }, { "epoch": 0.029685444098408762, "grad_norm": 1324.1092529296875, "learning_rate": 1.9785488958990537e-06, "loss": 47.8906, "step": 3136 }, { "epoch": 0.029694910120123815, "grad_norm": 546.4848022460938, "learning_rate": 1.9791798107255518e-06, "loss": 37.3594, "step": 3137 }, { "epoch": 0.029704376141838868, "grad_norm": 501.7084655761719, "learning_rate": 1.9798107255520503e-06, "loss": 41.125, "step": 3138 }, { "epoch": 0.029713842163553925, "grad_norm": 308.98101806640625, "learning_rate": 1.9804416403785487e-06, "loss": 26.2344, "step": 3139 }, { "epoch": 0.029723308185268978, "grad_norm": 244.4647979736328, "learning_rate": 1.9810725552050472e-06, "loss": 26.9688, "step": 3140 }, { "epoch": 0.02973277420698403, "grad_norm": 405.2528991699219, "learning_rate": 1.9817034700315457e-06, "loss": 52.8906, "step": 3141 }, { "epoch": 0.029742240228699084, "grad_norm": 698.5236206054688, "learning_rate": 1.9823343848580442e-06, "loss": 58.75, "step": 3142 }, { "epoch": 0.02975170625041414, "grad_norm": 480.1764831542969, "learning_rate": 1.9829652996845427e-06, "loss": 57.3281, "step": 3143 }, { "epoch": 0.029761172272129193, "grad_norm": 632.9220581054688, "learning_rate": 1.983596214511041e-06, "loss": 26.5, "step": 3144 }, { "epoch": 0.029770638293844246, "grad_norm": 488.35662841796875, "learning_rate": 1.9842271293375393e-06, "loss": 48.6875, "step": 3145 }, { "epoch": 0.0297801043155593, "grad_norm": 399.93798828125, "learning_rate": 1.9848580441640378e-06, "loss": 34.8594, "step": 3146 }, { "epoch": 0.029789570337274352, "grad_norm": 244.40890502929688, "learning_rate": 1.9854889589905363e-06, "loss": 26.4062, "step": 3147 }, { "epoch": 0.02979903635898941, "grad_norm": 454.66461181640625, "learning_rate": 1.9861198738170348e-06, "loss": 35.5156, "step": 3148 }, { "epoch": 0.02980850238070446, "grad_norm": 1342.02099609375, "learning_rate": 1.9867507886435333e-06, "loss": 51.4062, "step": 3149 }, { "epoch": 0.029817968402419515, "grad_norm": 2.7180888652801514, "learning_rate": 1.9873817034700313e-06, "loss": 0.7175, "step": 3150 }, { "epoch": 0.029827434424134568, "grad_norm": 261.70416259765625, "learning_rate": 1.98801261829653e-06, "loss": 28.8281, "step": 3151 }, { "epoch": 0.029836900445849624, "grad_norm": 1050.95947265625, "learning_rate": 1.9886435331230283e-06, "loss": 54.2812, "step": 3152 }, { "epoch": 0.029846366467564677, "grad_norm": 641.332763671875, "learning_rate": 1.989274447949527e-06, "loss": 44.4844, "step": 3153 }, { "epoch": 0.02985583248927973, "grad_norm": 632.338623046875, "learning_rate": 1.9899053627760253e-06, "loss": 59.6719, "step": 3154 }, { "epoch": 0.029865298510994783, "grad_norm": 956.9154663085938, "learning_rate": 1.9905362776025238e-06, "loss": 110.4531, "step": 3155 }, { "epoch": 0.02987476453270984, "grad_norm": 241.5674591064453, "learning_rate": 1.9911671924290223e-06, "loss": 30.6562, "step": 3156 }, { "epoch": 0.029884230554424893, "grad_norm": 313.2276611328125, "learning_rate": 1.9917981072555203e-06, "loss": 27.8438, "step": 3157 }, { "epoch": 0.029893696576139946, "grad_norm": 250.06887817382812, "learning_rate": 1.992429022082019e-06, "loss": 27.5156, "step": 3158 }, { "epoch": 0.029903162597855, "grad_norm": 281.59246826171875, "learning_rate": 1.9930599369085173e-06, "loss": 31.9531, "step": 3159 }, { "epoch": 0.02991262861957005, "grad_norm": 412.3233947753906, "learning_rate": 1.993690851735016e-06, "loss": 29.1406, "step": 3160 }, { "epoch": 0.029922094641285108, "grad_norm": 650.00830078125, "learning_rate": 1.9943217665615143e-06, "loss": 34.5781, "step": 3161 }, { "epoch": 0.02993156066300016, "grad_norm": 458.2017517089844, "learning_rate": 1.9949526813880124e-06, "loss": 27.875, "step": 3162 }, { "epoch": 0.029941026684715214, "grad_norm": 172.95606994628906, "learning_rate": 1.995583596214511e-06, "loss": 26.3438, "step": 3163 }, { "epoch": 0.029950492706430267, "grad_norm": 293.0791015625, "learning_rate": 1.9962145110410094e-06, "loss": 39.7188, "step": 3164 }, { "epoch": 0.029959958728145324, "grad_norm": 502.2494812011719, "learning_rate": 1.996845425867508e-06, "loss": 32.7344, "step": 3165 }, { "epoch": 0.029969424749860377, "grad_norm": 223.95590209960938, "learning_rate": 1.9974763406940064e-06, "loss": 31.3906, "step": 3166 }, { "epoch": 0.02997889077157543, "grad_norm": 786.6055908203125, "learning_rate": 1.998107255520505e-06, "loss": 40.7266, "step": 3167 }, { "epoch": 0.029988356793290483, "grad_norm": 645.2817993164062, "learning_rate": 1.998738170347003e-06, "loss": 35.9688, "step": 3168 }, { "epoch": 0.02999782281500554, "grad_norm": 788.1417236328125, "learning_rate": 1.9993690851735014e-06, "loss": 57.3438, "step": 3169 }, { "epoch": 0.030007288836720592, "grad_norm": 392.1400146484375, "learning_rate": 2e-06, "loss": 32.0156, "step": 3170 }, { "epoch": 0.030016754858435645, "grad_norm": 486.1142883300781, "learning_rate": 1.999999999530032e-06, "loss": 39.6719, "step": 3171 }, { "epoch": 0.030026220880150698, "grad_norm": 746.9313354492188, "learning_rate": 1.9999999981201297e-06, "loss": 58.25, "step": 3172 }, { "epoch": 0.03003568690186575, "grad_norm": 569.4801635742188, "learning_rate": 1.9999999957702926e-06, "loss": 37.5312, "step": 3173 }, { "epoch": 0.030045152923580808, "grad_norm": 235.57142639160156, "learning_rate": 1.9999999924805204e-06, "loss": 27.0312, "step": 3174 }, { "epoch": 0.03005461894529586, "grad_norm": 279.6739196777344, "learning_rate": 1.999999988250813e-06, "loss": 32.0781, "step": 3175 }, { "epoch": 0.030064084967010914, "grad_norm": 442.7533874511719, "learning_rate": 1.9999999830811707e-06, "loss": 51.0938, "step": 3176 }, { "epoch": 0.030073550988725967, "grad_norm": 421.93572998046875, "learning_rate": 1.999999976971594e-06, "loss": 28.2031, "step": 3177 }, { "epoch": 0.030083017010441023, "grad_norm": 541.4947509765625, "learning_rate": 1.999999969922082e-06, "loss": 54.1719, "step": 3178 }, { "epoch": 0.030092483032156076, "grad_norm": 448.1448974609375, "learning_rate": 1.999999961932635e-06, "loss": 29.2188, "step": 3179 }, { "epoch": 0.03010194905387113, "grad_norm": 789.3575439453125, "learning_rate": 1.999999953003253e-06, "loss": 30.3125, "step": 3180 }, { "epoch": 0.030111415075586182, "grad_norm": 502.7801818847656, "learning_rate": 1.9999999431339365e-06, "loss": 35.125, "step": 3181 }, { "epoch": 0.03012088109730124, "grad_norm": 696.0413208007812, "learning_rate": 1.9999999323246848e-06, "loss": 63.7422, "step": 3182 }, { "epoch": 0.03013034711901629, "grad_norm": 446.79144287109375, "learning_rate": 1.999999920575498e-06, "loss": 50.5781, "step": 3183 }, { "epoch": 0.030139813140731345, "grad_norm": 3369.864501953125, "learning_rate": 1.9999999078863765e-06, "loss": 31.4688, "step": 3184 }, { "epoch": 0.030149279162446398, "grad_norm": 694.4679565429688, "learning_rate": 1.9999998942573203e-06, "loss": 55.3672, "step": 3185 }, { "epoch": 0.030158745184161454, "grad_norm": 998.7286376953125, "learning_rate": 1.9999998796883295e-06, "loss": 32.9453, "step": 3186 }, { "epoch": 0.030168211205876507, "grad_norm": 182.1781005859375, "learning_rate": 1.9999998641794035e-06, "loss": 27.1719, "step": 3187 }, { "epoch": 0.03017767722759156, "grad_norm": 513.7490844726562, "learning_rate": 1.9999998477305425e-06, "loss": 31.9688, "step": 3188 }, { "epoch": 0.030187143249306613, "grad_norm": 455.4222412109375, "learning_rate": 1.9999998303417472e-06, "loss": 45.0, "step": 3189 }, { "epoch": 0.030196609271021666, "grad_norm": 908.7887573242188, "learning_rate": 1.9999998120130164e-06, "loss": 66.8281, "step": 3190 }, { "epoch": 0.030206075292736723, "grad_norm": 875.125732421875, "learning_rate": 1.9999997927443514e-06, "loss": 37.9531, "step": 3191 }, { "epoch": 0.030215541314451776, "grad_norm": 269.120361328125, "learning_rate": 1.9999997725357517e-06, "loss": 24.4062, "step": 3192 }, { "epoch": 0.03022500733616683, "grad_norm": 331.8780517578125, "learning_rate": 1.9999997513872173e-06, "loss": 31.7188, "step": 3193 }, { "epoch": 0.03023447335788188, "grad_norm": 209.45777893066406, "learning_rate": 1.9999997292987478e-06, "loss": 27.7969, "step": 3194 }, { "epoch": 0.030243939379596938, "grad_norm": 405.7959899902344, "learning_rate": 1.9999997062703436e-06, "loss": 54.5938, "step": 3195 }, { "epoch": 0.03025340540131199, "grad_norm": 655.9832763671875, "learning_rate": 1.999999682302005e-06, "loss": 45.75, "step": 3196 }, { "epoch": 0.030262871423027044, "grad_norm": 683.6698608398438, "learning_rate": 1.9999996573937317e-06, "loss": 35.8438, "step": 3197 }, { "epoch": 0.030272337444742097, "grad_norm": 312.967041015625, "learning_rate": 1.9999996315455235e-06, "loss": 26.875, "step": 3198 }, { "epoch": 0.030281803466457154, "grad_norm": 204.2545166015625, "learning_rate": 1.9999996047573815e-06, "loss": 27.625, "step": 3199 }, { "epoch": 0.030291269488172207, "grad_norm": 3.7489678859710693, "learning_rate": 1.999999577029304e-06, "loss": 0.9409, "step": 3200 }, { "epoch": 0.03030073550988726, "grad_norm": 642.7227172851562, "learning_rate": 1.9999995483612926e-06, "loss": 38.2969, "step": 3201 }, { "epoch": 0.030310201531602313, "grad_norm": 323.2586364746094, "learning_rate": 1.999999518753346e-06, "loss": 31.9531, "step": 3202 }, { "epoch": 0.030319667553317366, "grad_norm": 257.43353271484375, "learning_rate": 1.999999488205466e-06, "loss": 24.5312, "step": 3203 }, { "epoch": 0.030329133575032422, "grad_norm": 332.5235290527344, "learning_rate": 1.9999994567176504e-06, "loss": 28.2656, "step": 3204 }, { "epoch": 0.030338599596747475, "grad_norm": 525.5113525390625, "learning_rate": 1.9999994242899008e-06, "loss": 46.125, "step": 3205 }, { "epoch": 0.030348065618462528, "grad_norm": 487.46136474609375, "learning_rate": 1.999999390922217e-06, "loss": 69.0625, "step": 3206 }, { "epoch": 0.03035753164017758, "grad_norm": 1184.462158203125, "learning_rate": 1.9999993566145983e-06, "loss": 44.875, "step": 3207 }, { "epoch": 0.030366997661892638, "grad_norm": 803.6336669921875, "learning_rate": 1.999999321367046e-06, "loss": 37.25, "step": 3208 }, { "epoch": 0.03037646368360769, "grad_norm": 1426.6248779296875, "learning_rate": 1.999999285179559e-06, "loss": 36.4219, "step": 3209 }, { "epoch": 0.030385929705322744, "grad_norm": 252.62213134765625, "learning_rate": 1.9999992480521376e-06, "loss": 39.6875, "step": 3210 }, { "epoch": 0.030395395727037797, "grad_norm": 607.0608520507812, "learning_rate": 1.999999209984782e-06, "loss": 63.75, "step": 3211 }, { "epoch": 0.030404861748752853, "grad_norm": 349.3918151855469, "learning_rate": 1.999999170977492e-06, "loss": 29.6875, "step": 3212 }, { "epoch": 0.030414327770467906, "grad_norm": 739.0469360351562, "learning_rate": 1.9999991310302686e-06, "loss": 57.25, "step": 3213 }, { "epoch": 0.03042379379218296, "grad_norm": 3.6623353958129883, "learning_rate": 1.9999990901431107e-06, "loss": 0.9355, "step": 3214 }, { "epoch": 0.030433259813898012, "grad_norm": 340.71173095703125, "learning_rate": 1.999999048316018e-06, "loss": 42.8125, "step": 3215 }, { "epoch": 0.030442725835613065, "grad_norm": 1003.0576782226562, "learning_rate": 1.999999005548992e-06, "loss": 67.3828, "step": 3216 }, { "epoch": 0.03045219185732812, "grad_norm": 229.98934936523438, "learning_rate": 1.999998961842032e-06, "loss": 31.4688, "step": 3217 }, { "epoch": 0.030461657879043175, "grad_norm": 450.3116149902344, "learning_rate": 1.999998917195138e-06, "loss": 47.75, "step": 3218 }, { "epoch": 0.030471123900758228, "grad_norm": 529.7185668945312, "learning_rate": 1.9999988716083096e-06, "loss": 31.0, "step": 3219 }, { "epoch": 0.03048058992247328, "grad_norm": 523.8282470703125, "learning_rate": 1.999998825081548e-06, "loss": 60.5781, "step": 3220 }, { "epoch": 0.030490055944188337, "grad_norm": 380.2268981933594, "learning_rate": 1.9999987776148523e-06, "loss": 26.7656, "step": 3221 }, { "epoch": 0.03049952196590339, "grad_norm": 647.36767578125, "learning_rate": 1.999998729208222e-06, "loss": 38.8594, "step": 3222 }, { "epoch": 0.030508987987618443, "grad_norm": 432.58770751953125, "learning_rate": 1.999998679861659e-06, "loss": 30.0312, "step": 3223 }, { "epoch": 0.030518454009333496, "grad_norm": 485.7008361816406, "learning_rate": 1.999998629575162e-06, "loss": 26.9219, "step": 3224 }, { "epoch": 0.030527920031048553, "grad_norm": 537.4630737304688, "learning_rate": 1.999998578348731e-06, "loss": 42.0312, "step": 3225 }, { "epoch": 0.030537386052763606, "grad_norm": 813.52978515625, "learning_rate": 1.999998526182367e-06, "loss": 61.5938, "step": 3226 }, { "epoch": 0.03054685207447866, "grad_norm": 3.3232784271240234, "learning_rate": 1.9999984730760693e-06, "loss": 0.8687, "step": 3227 }, { "epoch": 0.03055631809619371, "grad_norm": 307.9361877441406, "learning_rate": 1.9999984190298374e-06, "loss": 25.6406, "step": 3228 }, { "epoch": 0.030565784117908768, "grad_norm": 431.9600524902344, "learning_rate": 1.999998364043673e-06, "loss": 36.0312, "step": 3229 }, { "epoch": 0.03057525013962382, "grad_norm": 214.1068878173828, "learning_rate": 1.9999983081175747e-06, "loss": 28.7188, "step": 3230 }, { "epoch": 0.030584716161338874, "grad_norm": 478.6322326660156, "learning_rate": 1.999998251251543e-06, "loss": 31.9531, "step": 3231 }, { "epoch": 0.030594182183053927, "grad_norm": 517.6741333007812, "learning_rate": 1.999998193445578e-06, "loss": 54.8125, "step": 3232 }, { "epoch": 0.03060364820476898, "grad_norm": 840.2664794921875, "learning_rate": 1.99999813469968e-06, "loss": 63.5625, "step": 3233 }, { "epoch": 0.030613114226484037, "grad_norm": 407.71148681640625, "learning_rate": 1.9999980750138484e-06, "loss": 28.1875, "step": 3234 }, { "epoch": 0.03062258024819909, "grad_norm": 1142.6806640625, "learning_rate": 1.999998014388084e-06, "loss": 72.9453, "step": 3235 }, { "epoch": 0.030632046269914143, "grad_norm": 384.70233154296875, "learning_rate": 1.9999979528223865e-06, "loss": 52.0781, "step": 3236 }, { "epoch": 0.030641512291629196, "grad_norm": 435.7012634277344, "learning_rate": 1.9999978903167557e-06, "loss": 24.6641, "step": 3237 }, { "epoch": 0.030650978313344252, "grad_norm": 437.2834167480469, "learning_rate": 1.9999978268711923e-06, "loss": 33.4531, "step": 3238 }, { "epoch": 0.030660444335059305, "grad_norm": 639.8961791992188, "learning_rate": 1.999997762485696e-06, "loss": 30.1562, "step": 3239 }, { "epoch": 0.030669910356774358, "grad_norm": 449.9952087402344, "learning_rate": 1.9999976971602666e-06, "loss": 48.8281, "step": 3240 }, { "epoch": 0.03067937637848941, "grad_norm": 205.69735717773438, "learning_rate": 1.9999976308949047e-06, "loss": 24.4844, "step": 3241 }, { "epoch": 0.030688842400204468, "grad_norm": 281.23614501953125, "learning_rate": 1.99999756368961e-06, "loss": 35.9844, "step": 3242 }, { "epoch": 0.03069830842191952, "grad_norm": 245.3309783935547, "learning_rate": 1.9999974955443824e-06, "loss": 28.4062, "step": 3243 }, { "epoch": 0.030707774443634574, "grad_norm": 453.17840576171875, "learning_rate": 1.9999974264592224e-06, "loss": 52.5625, "step": 3244 }, { "epoch": 0.030717240465349627, "grad_norm": 248.95689392089844, "learning_rate": 1.99999735643413e-06, "loss": 23.6094, "step": 3245 }, { "epoch": 0.03072670648706468, "grad_norm": 190.487060546875, "learning_rate": 1.9999972854691048e-06, "loss": 27.7812, "step": 3246 }, { "epoch": 0.030736172508779736, "grad_norm": 3.1487772464752197, "learning_rate": 1.999997213564147e-06, "loss": 0.9062, "step": 3247 }, { "epoch": 0.03074563853049479, "grad_norm": 359.7401123046875, "learning_rate": 1.9999971407192573e-06, "loss": 24.375, "step": 3248 }, { "epoch": 0.030755104552209842, "grad_norm": 169.2506866455078, "learning_rate": 1.9999970669344354e-06, "loss": 27.2031, "step": 3249 }, { "epoch": 0.030764570573924895, "grad_norm": 623.527099609375, "learning_rate": 1.9999969922096813e-06, "loss": 60.375, "step": 3250 }, { "epoch": 0.03077403659563995, "grad_norm": 1009.948486328125, "learning_rate": 1.9999969165449947e-06, "loss": 83.3125, "step": 3251 }, { "epoch": 0.030783502617355005, "grad_norm": 623.0900268554688, "learning_rate": 1.999996839940377e-06, "loss": 51.1562, "step": 3252 }, { "epoch": 0.030792968639070058, "grad_norm": 2.6727352142333984, "learning_rate": 1.9999967623958264e-06, "loss": 0.9238, "step": 3253 }, { "epoch": 0.03080243466078511, "grad_norm": 721.450439453125, "learning_rate": 1.999996683911344e-06, "loss": 40.4688, "step": 3254 }, { "epoch": 0.030811900682500167, "grad_norm": 2.913428783416748, "learning_rate": 1.99999660448693e-06, "loss": 0.854, "step": 3255 }, { "epoch": 0.03082136670421522, "grad_norm": 1240.0350341796875, "learning_rate": 1.9999965241225843e-06, "loss": 46.7812, "step": 3256 }, { "epoch": 0.030830832725930273, "grad_norm": 896.8504638671875, "learning_rate": 1.9999964428183066e-06, "loss": 59.8125, "step": 3257 }, { "epoch": 0.030840298747645326, "grad_norm": 219.51719665527344, "learning_rate": 1.9999963605740976e-06, "loss": 29.2656, "step": 3258 }, { "epoch": 0.03084976476936038, "grad_norm": 500.43133544921875, "learning_rate": 1.999996277389957e-06, "loss": 28.9922, "step": 3259 }, { "epoch": 0.030859230791075436, "grad_norm": 537.9011840820312, "learning_rate": 1.999996193265885e-06, "loss": 54.4531, "step": 3260 }, { "epoch": 0.03086869681279049, "grad_norm": 184.24314880371094, "learning_rate": 1.9999961082018816e-06, "loss": 26.4531, "step": 3261 }, { "epoch": 0.03087816283450554, "grad_norm": 273.1463317871094, "learning_rate": 1.9999960221979466e-06, "loss": 33.4531, "step": 3262 }, { "epoch": 0.030887628856220595, "grad_norm": 652.9874877929688, "learning_rate": 1.9999959352540808e-06, "loss": 74.25, "step": 3263 }, { "epoch": 0.03089709487793565, "grad_norm": 987.5709228515625, "learning_rate": 1.9999958473702836e-06, "loss": 42.7344, "step": 3264 }, { "epoch": 0.030906560899650704, "grad_norm": 492.3360900878906, "learning_rate": 1.9999957585465556e-06, "loss": 54.1562, "step": 3265 }, { "epoch": 0.030916026921365757, "grad_norm": 1432.8507080078125, "learning_rate": 1.9999956687828963e-06, "loss": 27.5469, "step": 3266 }, { "epoch": 0.03092549294308081, "grad_norm": 1779.567138671875, "learning_rate": 1.9999955780793066e-06, "loss": 36.25, "step": 3267 }, { "epoch": 0.030934958964795867, "grad_norm": 905.7508544921875, "learning_rate": 1.9999954864357856e-06, "loss": 26.3125, "step": 3268 }, { "epoch": 0.03094442498651092, "grad_norm": 330.67169189453125, "learning_rate": 1.9999953938523346e-06, "loss": 41.1406, "step": 3269 }, { "epoch": 0.030953891008225973, "grad_norm": 620.3714599609375, "learning_rate": 1.9999953003289522e-06, "loss": 70.5781, "step": 3270 }, { "epoch": 0.030963357029941026, "grad_norm": 459.8768615722656, "learning_rate": 1.99999520586564e-06, "loss": 28.7578, "step": 3271 }, { "epoch": 0.030972823051656082, "grad_norm": 312.7322998046875, "learning_rate": 1.9999951104623967e-06, "loss": 28.1094, "step": 3272 }, { "epoch": 0.030982289073371135, "grad_norm": 251.69932556152344, "learning_rate": 1.999995014119223e-06, "loss": 27.7031, "step": 3273 }, { "epoch": 0.030991755095086188, "grad_norm": 670.822509765625, "learning_rate": 1.9999949168361195e-06, "loss": 52.4219, "step": 3274 }, { "epoch": 0.03100122111680124, "grad_norm": 512.6666259765625, "learning_rate": 1.999994818613086e-06, "loss": 45.1562, "step": 3275 }, { "epoch": 0.031010687138516294, "grad_norm": 291.78076171875, "learning_rate": 1.999994719450122e-06, "loss": 23.9375, "step": 3276 }, { "epoch": 0.03102015316023135, "grad_norm": 406.6192321777344, "learning_rate": 1.999994619347228e-06, "loss": 39.5781, "step": 3277 }, { "epoch": 0.031029619181946404, "grad_norm": 327.3577880859375, "learning_rate": 1.9999945183044043e-06, "loss": 26.7812, "step": 3278 }, { "epoch": 0.031039085203661457, "grad_norm": 256.3595886230469, "learning_rate": 1.999994416321651e-06, "loss": 27.4062, "step": 3279 }, { "epoch": 0.03104855122537651, "grad_norm": 556.26220703125, "learning_rate": 1.9999943133989677e-06, "loss": 38.9844, "step": 3280 }, { "epoch": 0.031058017247091566, "grad_norm": 293.7099304199219, "learning_rate": 1.9999942095363548e-06, "loss": 28.6719, "step": 3281 }, { "epoch": 0.03106748326880662, "grad_norm": 356.8780822753906, "learning_rate": 1.9999941047338123e-06, "loss": 36.7344, "step": 3282 }, { "epoch": 0.031076949290521672, "grad_norm": 186.85328674316406, "learning_rate": 1.9999939989913406e-06, "loss": 28.9688, "step": 3283 }, { "epoch": 0.031086415312236725, "grad_norm": 768.3455200195312, "learning_rate": 1.99999389230894e-06, "loss": 51.9922, "step": 3284 }, { "epoch": 0.03109588133395178, "grad_norm": 2.9011898040771484, "learning_rate": 1.9999937846866094e-06, "loss": 0.8628, "step": 3285 }, { "epoch": 0.031105347355666835, "grad_norm": 928.253662109375, "learning_rate": 1.99999367612435e-06, "loss": 40.8672, "step": 3286 }, { "epoch": 0.031114813377381888, "grad_norm": 709.6005249023438, "learning_rate": 1.999993566622162e-06, "loss": 49.4844, "step": 3287 }, { "epoch": 0.03112427939909694, "grad_norm": 417.0143737792969, "learning_rate": 1.9999934561800444e-06, "loss": 51.5469, "step": 3288 }, { "epoch": 0.031133745420811994, "grad_norm": 385.7736511230469, "learning_rate": 1.9999933447979985e-06, "loss": 25.4141, "step": 3289 }, { "epoch": 0.03114321144252705, "grad_norm": 233.8036651611328, "learning_rate": 1.9999932324760235e-06, "loss": 28.125, "step": 3290 }, { "epoch": 0.031152677464242103, "grad_norm": 524.3990478515625, "learning_rate": 1.9999931192141202e-06, "loss": 23.707, "step": 3291 }, { "epoch": 0.031162143485957156, "grad_norm": 314.5956115722656, "learning_rate": 1.9999930050122886e-06, "loss": 35.3516, "step": 3292 }, { "epoch": 0.03117160950767221, "grad_norm": 1412.33251953125, "learning_rate": 1.9999928898705286e-06, "loss": 53.4531, "step": 3293 }, { "epoch": 0.031181075529387266, "grad_norm": 363.5382080078125, "learning_rate": 1.99999277378884e-06, "loss": 25.4375, "step": 3294 }, { "epoch": 0.03119054155110232, "grad_norm": 517.1614379882812, "learning_rate": 1.9999926567672233e-06, "loss": 83.625, "step": 3295 }, { "epoch": 0.031200007572817372, "grad_norm": 676.931396484375, "learning_rate": 1.999992538805679e-06, "loss": 41.2422, "step": 3296 }, { "epoch": 0.031209473594532425, "grad_norm": 289.19940185546875, "learning_rate": 1.999992419904206e-06, "loss": 25.5547, "step": 3297 }, { "epoch": 0.03121893961624748, "grad_norm": 716.4273071289062, "learning_rate": 1.9999923000628057e-06, "loss": 49.5156, "step": 3298 }, { "epoch": 0.031228405637962534, "grad_norm": 251.3394012451172, "learning_rate": 1.999992179281478e-06, "loss": 32.2031, "step": 3299 }, { "epoch": 0.031237871659677587, "grad_norm": 612.5974731445312, "learning_rate": 1.999992057560222e-06, "loss": 39.5625, "step": 3300 }, { "epoch": 0.03124733768139264, "grad_norm": 194.70318603515625, "learning_rate": 1.9999919348990387e-06, "loss": 28.5156, "step": 3301 }, { "epoch": 0.0312568037031077, "grad_norm": 314.84722900390625, "learning_rate": 1.9999918112979285e-06, "loss": 39.9375, "step": 3302 }, { "epoch": 0.03126626972482275, "grad_norm": 277.33990478515625, "learning_rate": 1.9999916867568905e-06, "loss": 26.1328, "step": 3303 }, { "epoch": 0.0312757357465378, "grad_norm": 389.2458801269531, "learning_rate": 1.9999915612759257e-06, "loss": 25.7266, "step": 3304 }, { "epoch": 0.031285201768252856, "grad_norm": 604.673828125, "learning_rate": 1.9999914348550336e-06, "loss": 32.6484, "step": 3305 }, { "epoch": 0.03129466778996791, "grad_norm": 457.7138366699219, "learning_rate": 1.999991307494215e-06, "loss": 34.4453, "step": 3306 }, { "epoch": 0.03130413381168296, "grad_norm": 426.0896911621094, "learning_rate": 1.9999911791934693e-06, "loss": 66.5781, "step": 3307 }, { "epoch": 0.031313599833398015, "grad_norm": 365.23931884765625, "learning_rate": 1.999991049952797e-06, "loss": 33.7969, "step": 3308 }, { "epoch": 0.031323065855113075, "grad_norm": 600.200439453125, "learning_rate": 1.999990919772198e-06, "loss": 46.5625, "step": 3309 }, { "epoch": 0.03133253187682813, "grad_norm": 202.21177673339844, "learning_rate": 1.9999907886516732e-06, "loss": 29.8125, "step": 3310 }, { "epoch": 0.03134199789854318, "grad_norm": 303.09619140625, "learning_rate": 1.9999906565912217e-06, "loss": 29.0312, "step": 3311 }, { "epoch": 0.031351463920258234, "grad_norm": 895.576904296875, "learning_rate": 1.999990523590844e-06, "loss": 60.7344, "step": 3312 }, { "epoch": 0.03136092994197329, "grad_norm": 513.9495239257812, "learning_rate": 1.99999038965054e-06, "loss": 30.375, "step": 3313 }, { "epoch": 0.03137039596368834, "grad_norm": 1219.282958984375, "learning_rate": 1.9999902547703103e-06, "loss": 51.7344, "step": 3314 }, { "epoch": 0.03137986198540339, "grad_norm": 292.9039611816406, "learning_rate": 1.9999901189501552e-06, "loss": 28.7344, "step": 3315 }, { "epoch": 0.031389328007118446, "grad_norm": 706.609130859375, "learning_rate": 1.9999899821900744e-06, "loss": 71.3438, "step": 3316 }, { "epoch": 0.0313987940288335, "grad_norm": 510.2052917480469, "learning_rate": 1.9999898444900678e-06, "loss": 57.6406, "step": 3317 }, { "epoch": 0.03140826005054856, "grad_norm": 673.063720703125, "learning_rate": 1.9999897058501358e-06, "loss": 51.3438, "step": 3318 }, { "epoch": 0.03141772607226361, "grad_norm": 303.8896789550781, "learning_rate": 1.9999895662702784e-06, "loss": 27.1094, "step": 3319 }, { "epoch": 0.031427192093978665, "grad_norm": 435.636474609375, "learning_rate": 1.9999894257504957e-06, "loss": 61.75, "step": 3320 }, { "epoch": 0.03143665811569372, "grad_norm": 607.21435546875, "learning_rate": 1.9999892842907885e-06, "loss": 59.7344, "step": 3321 }, { "epoch": 0.03144612413740877, "grad_norm": 178.88523864746094, "learning_rate": 1.9999891418911564e-06, "loss": 26.7031, "step": 3322 }, { "epoch": 0.031455590159123824, "grad_norm": 1023.7301025390625, "learning_rate": 1.9999889985515993e-06, "loss": 46.4453, "step": 3323 }, { "epoch": 0.03146505618083888, "grad_norm": 848.8565673828125, "learning_rate": 1.9999888542721177e-06, "loss": 52.5, "step": 3324 }, { "epoch": 0.03147452220255393, "grad_norm": 799.8582153320312, "learning_rate": 1.9999887090527116e-06, "loss": 72.1875, "step": 3325 }, { "epoch": 0.03148398822426899, "grad_norm": 364.5196228027344, "learning_rate": 1.9999885628933815e-06, "loss": 32.2031, "step": 3326 }, { "epoch": 0.03149345424598404, "grad_norm": 245.7971649169922, "learning_rate": 1.9999884157941268e-06, "loss": 25.6172, "step": 3327 }, { "epoch": 0.031502920267699096, "grad_norm": 429.31134033203125, "learning_rate": 1.999988267754948e-06, "loss": 44.75, "step": 3328 }, { "epoch": 0.03151238628941415, "grad_norm": 1012.7095336914062, "learning_rate": 1.9999881187758456e-06, "loss": 58.2344, "step": 3329 }, { "epoch": 0.0315218523111292, "grad_norm": 474.3479309082031, "learning_rate": 1.999987968856819e-06, "loss": 38.8047, "step": 3330 }, { "epoch": 0.031531318332844255, "grad_norm": 456.90093994140625, "learning_rate": 1.9999878179978693e-06, "loss": 32.1875, "step": 3331 }, { "epoch": 0.03154078435455931, "grad_norm": 299.2099304199219, "learning_rate": 1.9999876661989954e-06, "loss": 30.7188, "step": 3332 }, { "epoch": 0.03155025037627436, "grad_norm": 306.9521789550781, "learning_rate": 1.9999875134601988e-06, "loss": 28.4219, "step": 3333 }, { "epoch": 0.031559716397989414, "grad_norm": 603.1840209960938, "learning_rate": 1.999987359781479e-06, "loss": 59.4375, "step": 3334 }, { "epoch": 0.031569182419704474, "grad_norm": 387.1522521972656, "learning_rate": 1.9999872051628353e-06, "loss": 41.7031, "step": 3335 }, { "epoch": 0.03157864844141953, "grad_norm": 705.6858520507812, "learning_rate": 1.9999870496042693e-06, "loss": 44.8125, "step": 3336 }, { "epoch": 0.03158811446313458, "grad_norm": 1911.96484375, "learning_rate": 1.9999868931057806e-06, "loss": 44.0625, "step": 3337 }, { "epoch": 0.03159758048484963, "grad_norm": 664.0446166992188, "learning_rate": 1.9999867356673694e-06, "loss": 33.2812, "step": 3338 }, { "epoch": 0.031607046506564686, "grad_norm": 408.93328857421875, "learning_rate": 1.9999865772890354e-06, "loss": 32.625, "step": 3339 }, { "epoch": 0.03161651252827974, "grad_norm": 276.5694885253906, "learning_rate": 1.999986417970779e-06, "loss": 25.7344, "step": 3340 }, { "epoch": 0.03162597854999479, "grad_norm": 439.4228820800781, "learning_rate": 1.9999862577126007e-06, "loss": 48.4531, "step": 3341 }, { "epoch": 0.031635444571709845, "grad_norm": 647.5966796875, "learning_rate": 1.9999860965145e-06, "loss": 75.5469, "step": 3342 }, { "epoch": 0.031644910593424905, "grad_norm": 363.9642639160156, "learning_rate": 1.999985934376478e-06, "loss": 28.4219, "step": 3343 }, { "epoch": 0.03165437661513996, "grad_norm": 1019.212646484375, "learning_rate": 1.999985771298534e-06, "loss": 67.875, "step": 3344 }, { "epoch": 0.03166384263685501, "grad_norm": 348.827392578125, "learning_rate": 1.999985607280668e-06, "loss": 29.5625, "step": 3345 }, { "epoch": 0.031673308658570064, "grad_norm": 332.645263671875, "learning_rate": 1.999985442322881e-06, "loss": 33.9062, "step": 3346 }, { "epoch": 0.03168277468028512, "grad_norm": 260.5066223144531, "learning_rate": 1.999985276425173e-06, "loss": 24.375, "step": 3347 }, { "epoch": 0.03169224070200017, "grad_norm": 191.41416931152344, "learning_rate": 1.9999851095875436e-06, "loss": 31.0312, "step": 3348 }, { "epoch": 0.03170170672371522, "grad_norm": 658.6119995117188, "learning_rate": 1.999984941809993e-06, "loss": 33.2266, "step": 3349 }, { "epoch": 0.031711172745430276, "grad_norm": 205.93592834472656, "learning_rate": 1.999984773092522e-06, "loss": 23.2812, "step": 3350 }, { "epoch": 0.03172063876714533, "grad_norm": 240.97244262695312, "learning_rate": 1.99998460343513e-06, "loss": 29.5781, "step": 3351 }, { "epoch": 0.03173010478886039, "grad_norm": 1157.7183837890625, "learning_rate": 1.999984432837818e-06, "loss": 42.75, "step": 3352 }, { "epoch": 0.03173957081057544, "grad_norm": 526.8160400390625, "learning_rate": 1.999984261300585e-06, "loss": 30.0078, "step": 3353 }, { "epoch": 0.031749036832290495, "grad_norm": 276.1238098144531, "learning_rate": 1.9999840888234327e-06, "loss": 30.2344, "step": 3354 }, { "epoch": 0.03175850285400555, "grad_norm": 204.40716552734375, "learning_rate": 1.99998391540636e-06, "loss": 29.9219, "step": 3355 }, { "epoch": 0.0317679688757206, "grad_norm": 208.9193878173828, "learning_rate": 1.9999837410493673e-06, "loss": 27.6094, "step": 3356 }, { "epoch": 0.031777434897435654, "grad_norm": 904.1305541992188, "learning_rate": 1.9999835657524553e-06, "loss": 59.8906, "step": 3357 }, { "epoch": 0.03178690091915071, "grad_norm": 334.5029602050781, "learning_rate": 1.9999833895156234e-06, "loss": 24.2891, "step": 3358 }, { "epoch": 0.03179636694086576, "grad_norm": 221.40673828125, "learning_rate": 1.9999832123388725e-06, "loss": 24.25, "step": 3359 }, { "epoch": 0.03180583296258081, "grad_norm": 585.0147705078125, "learning_rate": 1.999983034222202e-06, "loss": 58.3672, "step": 3360 }, { "epoch": 0.03181529898429587, "grad_norm": 562.0262451171875, "learning_rate": 1.9999828551656132e-06, "loss": 61.0312, "step": 3361 }, { "epoch": 0.031824765006010926, "grad_norm": 1074.15283203125, "learning_rate": 1.999982675169105e-06, "loss": 76.8516, "step": 3362 }, { "epoch": 0.03183423102772598, "grad_norm": 467.15045166015625, "learning_rate": 1.999982494232678e-06, "loss": 55.9375, "step": 3363 }, { "epoch": 0.03184369704944103, "grad_norm": 341.1269836425781, "learning_rate": 1.999982312356333e-06, "loss": 28.2031, "step": 3364 }, { "epoch": 0.031853163071156085, "grad_norm": 390.0650329589844, "learning_rate": 1.9999821295400693e-06, "loss": 33.1016, "step": 3365 }, { "epoch": 0.03186262909287114, "grad_norm": 556.1158447265625, "learning_rate": 1.999981945783888e-06, "loss": 50.9062, "step": 3366 }, { "epoch": 0.03187209511458619, "grad_norm": 653.0689697265625, "learning_rate": 1.9999817610877883e-06, "loss": 53.6406, "step": 3367 }, { "epoch": 0.031881561136301244, "grad_norm": 3.1060891151428223, "learning_rate": 1.9999815754517706e-06, "loss": 0.8911, "step": 3368 }, { "epoch": 0.031891027158016304, "grad_norm": 477.9071044921875, "learning_rate": 1.999981388875836e-06, "loss": 53.4375, "step": 3369 }, { "epoch": 0.03190049317973136, "grad_norm": 336.6730041503906, "learning_rate": 1.9999812013599833e-06, "loss": 29.9844, "step": 3370 }, { "epoch": 0.03190995920144641, "grad_norm": 681.0098876953125, "learning_rate": 1.9999810129042132e-06, "loss": 44.7812, "step": 3371 }, { "epoch": 0.03191942522316146, "grad_norm": 694.3992919921875, "learning_rate": 1.9999808235085263e-06, "loss": 42.2969, "step": 3372 }, { "epoch": 0.031928891244876516, "grad_norm": 1285.9647216796875, "learning_rate": 1.999980633172923e-06, "loss": 77.5312, "step": 3373 }, { "epoch": 0.03193835726659157, "grad_norm": 3.225764036178589, "learning_rate": 1.999980441897402e-06, "loss": 1.0474, "step": 3374 }, { "epoch": 0.03194782328830662, "grad_norm": 406.3236389160156, "learning_rate": 1.999980249681965e-06, "loss": 28.2812, "step": 3375 }, { "epoch": 0.031957289310021675, "grad_norm": 3.220310926437378, "learning_rate": 1.999980056526612e-06, "loss": 0.8965, "step": 3376 }, { "epoch": 0.03196675533173673, "grad_norm": 592.7985229492188, "learning_rate": 1.999979862431342e-06, "loss": 48.5625, "step": 3377 }, { "epoch": 0.03197622135345179, "grad_norm": 870.5581665039062, "learning_rate": 1.9999796673961564e-06, "loss": 57.1719, "step": 3378 }, { "epoch": 0.03198568737516684, "grad_norm": 225.9832000732422, "learning_rate": 1.9999794714210547e-06, "loss": 28.9531, "step": 3379 }, { "epoch": 0.031995153396881894, "grad_norm": 194.03749084472656, "learning_rate": 1.9999792745060377e-06, "loss": 21.8906, "step": 3380 }, { "epoch": 0.03200461941859695, "grad_norm": 402.4746398925781, "learning_rate": 1.9999790766511047e-06, "loss": 33.9531, "step": 3381 }, { "epoch": 0.032014085440312, "grad_norm": 397.0271301269531, "learning_rate": 1.9999788778562565e-06, "loss": 35.8906, "step": 3382 }, { "epoch": 0.03202355146202705, "grad_norm": 746.3742065429688, "learning_rate": 1.9999786781214936e-06, "loss": 62.7969, "step": 3383 }, { "epoch": 0.032033017483742106, "grad_norm": 539.602783203125, "learning_rate": 1.999978477446816e-06, "loss": 40.2031, "step": 3384 }, { "epoch": 0.03204248350545716, "grad_norm": 737.9983520507812, "learning_rate": 1.9999782758322234e-06, "loss": 48.4375, "step": 3385 }, { "epoch": 0.03205194952717222, "grad_norm": 917.9995727539062, "learning_rate": 1.9999780732777157e-06, "loss": 73.0078, "step": 3386 }, { "epoch": 0.03206141554888727, "grad_norm": 855.3569946289062, "learning_rate": 1.9999778697832946e-06, "loss": 44.3828, "step": 3387 }, { "epoch": 0.032070881570602325, "grad_norm": 794.7740478515625, "learning_rate": 1.999977665348959e-06, "loss": 63.125, "step": 3388 }, { "epoch": 0.03208034759231738, "grad_norm": 218.09326171875, "learning_rate": 1.999977459974709e-06, "loss": 29.2969, "step": 3389 }, { "epoch": 0.03208981361403243, "grad_norm": 424.3377380371094, "learning_rate": 1.999977253660546e-06, "loss": 57.7031, "step": 3390 }, { "epoch": 0.032099279635747484, "grad_norm": 1058.726806640625, "learning_rate": 1.9999770464064685e-06, "loss": 55.0938, "step": 3391 }, { "epoch": 0.03210874565746254, "grad_norm": 3.0147533416748047, "learning_rate": 1.9999768382124786e-06, "loss": 0.8356, "step": 3392 }, { "epoch": 0.03211821167917759, "grad_norm": 3.0228869915008545, "learning_rate": 1.999976629078575e-06, "loss": 0.917, "step": 3393 }, { "epoch": 0.03212767770089264, "grad_norm": 379.7904357910156, "learning_rate": 1.9999764190047587e-06, "loss": 54.0938, "step": 3394 }, { "epoch": 0.0321371437226077, "grad_norm": 3.2490735054016113, "learning_rate": 1.9999762079910295e-06, "loss": 0.9688, "step": 3395 }, { "epoch": 0.032146609744322756, "grad_norm": 403.87677001953125, "learning_rate": 1.9999759960373877e-06, "loss": 32.9375, "step": 3396 }, { "epoch": 0.03215607576603781, "grad_norm": 313.8369140625, "learning_rate": 1.9999757831438332e-06, "loss": 30.1719, "step": 3397 }, { "epoch": 0.03216554178775286, "grad_norm": 539.1091918945312, "learning_rate": 1.999975569310367e-06, "loss": 54.7734, "step": 3398 }, { "epoch": 0.032175007809467915, "grad_norm": 536.8291015625, "learning_rate": 1.9999753545369885e-06, "loss": 33.6719, "step": 3399 }, { "epoch": 0.03218447383118297, "grad_norm": 281.17779541015625, "learning_rate": 1.999975138823698e-06, "loss": 26.6406, "step": 3400 }, { "epoch": 0.03219393985289802, "grad_norm": 411.66943359375, "learning_rate": 1.9999749221704965e-06, "loss": 36.7578, "step": 3401 }, { "epoch": 0.032203405874613074, "grad_norm": 869.3652954101562, "learning_rate": 1.9999747045773834e-06, "loss": 48.1875, "step": 3402 }, { "epoch": 0.03221287189632813, "grad_norm": 392.89324951171875, "learning_rate": 1.9999744860443594e-06, "loss": 33.1172, "step": 3403 }, { "epoch": 0.03222233791804319, "grad_norm": 3.7795960903167725, "learning_rate": 1.999974266571424e-06, "loss": 0.9243, "step": 3404 }, { "epoch": 0.03223180393975824, "grad_norm": 2.7467689514160156, "learning_rate": 1.999974046158578e-06, "loss": 0.7791, "step": 3405 }, { "epoch": 0.03224126996147329, "grad_norm": 3.111161947250366, "learning_rate": 1.9999738248058217e-06, "loss": 1.0098, "step": 3406 }, { "epoch": 0.032250735983188346, "grad_norm": 441.5636291503906, "learning_rate": 1.9999736025131547e-06, "loss": 38.625, "step": 3407 }, { "epoch": 0.0322602020049034, "grad_norm": 559.6317138671875, "learning_rate": 1.9999733792805776e-06, "loss": 69.4609, "step": 3408 }, { "epoch": 0.03226966802661845, "grad_norm": 3.6285147666931152, "learning_rate": 1.9999731551080903e-06, "loss": 1.0176, "step": 3409 }, { "epoch": 0.032279134048333505, "grad_norm": 458.4759826660156, "learning_rate": 1.999972929995694e-06, "loss": 27.0625, "step": 3410 }, { "epoch": 0.03228860007004856, "grad_norm": 3.017141580581665, "learning_rate": 1.9999727039433877e-06, "loss": 0.9155, "step": 3411 }, { "epoch": 0.03229806609176362, "grad_norm": 264.9696350097656, "learning_rate": 1.9999724769511723e-06, "loss": 28.1406, "step": 3412 }, { "epoch": 0.03230753211347867, "grad_norm": 522.0541381835938, "learning_rate": 1.9999722490190476e-06, "loss": 44.0781, "step": 3413 }, { "epoch": 0.032316998135193724, "grad_norm": 745.7404174804688, "learning_rate": 1.9999720201470145e-06, "loss": 35.9062, "step": 3414 }, { "epoch": 0.03232646415690878, "grad_norm": 2.5280985832214355, "learning_rate": 1.9999717903350725e-06, "loss": 0.8511, "step": 3415 }, { "epoch": 0.03233593017862383, "grad_norm": 213.4480743408203, "learning_rate": 1.9999715595832218e-06, "loss": 30.9688, "step": 3416 }, { "epoch": 0.03234539620033888, "grad_norm": 566.2525634765625, "learning_rate": 1.999971327891463e-06, "loss": 46.4062, "step": 3417 }, { "epoch": 0.032354862222053936, "grad_norm": 724.3072509765625, "learning_rate": 1.999971095259796e-06, "loss": 62.0, "step": 3418 }, { "epoch": 0.03236432824376899, "grad_norm": 336.5927734375, "learning_rate": 1.999970861688222e-06, "loss": 27.375, "step": 3419 }, { "epoch": 0.03237379426548404, "grad_norm": 463.91888427734375, "learning_rate": 1.99997062717674e-06, "loss": 32.6562, "step": 3420 }, { "epoch": 0.0323832602871991, "grad_norm": 254.52938842773438, "learning_rate": 1.9999703917253505e-06, "loss": 31.2812, "step": 3421 }, { "epoch": 0.032392726308914155, "grad_norm": 293.4938659667969, "learning_rate": 1.9999701553340543e-06, "loss": 30.0156, "step": 3422 }, { "epoch": 0.03240219233062921, "grad_norm": 1044.8099365234375, "learning_rate": 1.9999699180028505e-06, "loss": 38.3906, "step": 3423 }, { "epoch": 0.03241165835234426, "grad_norm": 280.5499572753906, "learning_rate": 1.999969679731741e-06, "loss": 28.0938, "step": 3424 }, { "epoch": 0.032421124374059314, "grad_norm": 1304.0301513671875, "learning_rate": 1.9999694405207245e-06, "loss": 87.6094, "step": 3425 }, { "epoch": 0.03243059039577437, "grad_norm": 1077.2945556640625, "learning_rate": 1.9999692003698014e-06, "loss": 58.5859, "step": 3426 }, { "epoch": 0.03244005641748942, "grad_norm": 952.1422729492188, "learning_rate": 1.999968959278973e-06, "loss": 30.5781, "step": 3427 }, { "epoch": 0.03244952243920447, "grad_norm": 397.8121337890625, "learning_rate": 1.9999687172482385e-06, "loss": 26.7812, "step": 3428 }, { "epoch": 0.032458988460919526, "grad_norm": 269.05853271484375, "learning_rate": 1.9999684742775982e-06, "loss": 29.4219, "step": 3429 }, { "epoch": 0.032468454482634586, "grad_norm": 742.05859375, "learning_rate": 1.999968230367053e-06, "loss": 31.5469, "step": 3430 }, { "epoch": 0.03247792050434964, "grad_norm": 462.35284423828125, "learning_rate": 1.9999679855166026e-06, "loss": 64.0, "step": 3431 }, { "epoch": 0.03248738652606469, "grad_norm": 265.41192626953125, "learning_rate": 1.999967739726247e-06, "loss": 26.1719, "step": 3432 }, { "epoch": 0.032496852547779745, "grad_norm": 3.029388904571533, "learning_rate": 1.9999674929959872e-06, "loss": 0.8696, "step": 3433 }, { "epoch": 0.0325063185694948, "grad_norm": 974.12841796875, "learning_rate": 1.999967245325823e-06, "loss": 34.2188, "step": 3434 }, { "epoch": 0.03251578459120985, "grad_norm": 274.8855285644531, "learning_rate": 1.9999669967157547e-06, "loss": 30.2344, "step": 3435 }, { "epoch": 0.032525250612924904, "grad_norm": 213.17384338378906, "learning_rate": 1.9999667471657822e-06, "loss": 20.3047, "step": 3436 }, { "epoch": 0.03253471663463996, "grad_norm": 855.0426025390625, "learning_rate": 1.9999664966759064e-06, "loss": 82.8828, "step": 3437 }, { "epoch": 0.03254418265635502, "grad_norm": 175.62197875976562, "learning_rate": 1.999966245246127e-06, "loss": 26.2031, "step": 3438 }, { "epoch": 0.03255364867807007, "grad_norm": 303.38800048828125, "learning_rate": 1.9999659928764443e-06, "loss": 34.3125, "step": 3439 }, { "epoch": 0.03256311469978512, "grad_norm": 271.7328796386719, "learning_rate": 1.9999657395668584e-06, "loss": 30.7969, "step": 3440 }, { "epoch": 0.032572580721500176, "grad_norm": 357.66839599609375, "learning_rate": 1.99996548531737e-06, "loss": 27.9375, "step": 3441 }, { "epoch": 0.03258204674321523, "grad_norm": 297.5777587890625, "learning_rate": 1.999965230127979e-06, "loss": 28.375, "step": 3442 }, { "epoch": 0.03259151276493028, "grad_norm": 777.15283203125, "learning_rate": 1.999964973998686e-06, "loss": 57.1562, "step": 3443 }, { "epoch": 0.032600978786645335, "grad_norm": 1041.4718017578125, "learning_rate": 1.9999647169294906e-06, "loss": 89.0625, "step": 3444 }, { "epoch": 0.03261044480836039, "grad_norm": 288.6856384277344, "learning_rate": 1.9999644589203936e-06, "loss": 32.4062, "step": 3445 }, { "epoch": 0.03261991083007544, "grad_norm": 824.8536987304688, "learning_rate": 1.999964199971395e-06, "loss": 67.25, "step": 3446 }, { "epoch": 0.0326293768517905, "grad_norm": 458.56988525390625, "learning_rate": 1.999963940082495e-06, "loss": 45.1719, "step": 3447 }, { "epoch": 0.032638842873505554, "grad_norm": 294.4161682128906, "learning_rate": 1.9999636792536944e-06, "loss": 32.5469, "step": 3448 }, { "epoch": 0.03264830889522061, "grad_norm": 833.5234985351562, "learning_rate": 1.9999634174849925e-06, "loss": 72.9219, "step": 3449 }, { "epoch": 0.03265777491693566, "grad_norm": 3.1974027156829834, "learning_rate": 1.9999631547763903e-06, "loss": 0.9175, "step": 3450 }, { "epoch": 0.03266724093865071, "grad_norm": 268.6354064941406, "learning_rate": 1.9999628911278877e-06, "loss": 35.7812, "step": 3451 }, { "epoch": 0.032676706960365766, "grad_norm": 394.9869384765625, "learning_rate": 1.999962626539485e-06, "loss": 22.4219, "step": 3452 }, { "epoch": 0.03268617298208082, "grad_norm": 245.78648376464844, "learning_rate": 1.9999623610111826e-06, "loss": 24.5547, "step": 3453 }, { "epoch": 0.03269563900379587, "grad_norm": 700.4307861328125, "learning_rate": 1.9999620945429806e-06, "loss": 50.5938, "step": 3454 }, { "epoch": 0.03270510502551093, "grad_norm": 505.1499328613281, "learning_rate": 1.999961827134879e-06, "loss": 69.3438, "step": 3455 }, { "epoch": 0.032714571047225985, "grad_norm": 620.43603515625, "learning_rate": 1.999961558786879e-06, "loss": 60.1562, "step": 3456 }, { "epoch": 0.03272403706894104, "grad_norm": 400.2000427246094, "learning_rate": 1.9999612894989795e-06, "loss": 31.7812, "step": 3457 }, { "epoch": 0.03273350309065609, "grad_norm": 182.91452026367188, "learning_rate": 1.999961019271182e-06, "loss": 27.2969, "step": 3458 }, { "epoch": 0.032742969112371144, "grad_norm": 467.5648498535156, "learning_rate": 1.9999607481034857e-06, "loss": 27.25, "step": 3459 }, { "epoch": 0.0327524351340862, "grad_norm": 315.907470703125, "learning_rate": 1.999960475995891e-06, "loss": 24.8203, "step": 3460 }, { "epoch": 0.03276190115580125, "grad_norm": 3.5324161052703857, "learning_rate": 1.9999602029483997e-06, "loss": 0.8428, "step": 3461 }, { "epoch": 0.0327713671775163, "grad_norm": 307.3232421875, "learning_rate": 1.99995992896101e-06, "loss": 26.7656, "step": 3462 }, { "epoch": 0.032780833199231356, "grad_norm": 226.28004455566406, "learning_rate": 1.9999596540337232e-06, "loss": 30.7812, "step": 3463 }, { "epoch": 0.032790299220946416, "grad_norm": 464.5082092285156, "learning_rate": 1.9999593781665395e-06, "loss": 47.5625, "step": 3464 }, { "epoch": 0.03279976524266147, "grad_norm": 504.5574035644531, "learning_rate": 1.999959101359459e-06, "loss": 37.9219, "step": 3465 }, { "epoch": 0.03280923126437652, "grad_norm": 764.0435180664062, "learning_rate": 1.9999588236124816e-06, "loss": 58.6953, "step": 3466 }, { "epoch": 0.032818697286091575, "grad_norm": 934.8278198242188, "learning_rate": 1.9999585449256083e-06, "loss": 33.3438, "step": 3467 }, { "epoch": 0.03282816330780663, "grad_norm": 1059.745849609375, "learning_rate": 1.999958265298839e-06, "loss": 33.1172, "step": 3468 }, { "epoch": 0.03283762932952168, "grad_norm": 483.8174743652344, "learning_rate": 1.999957984732174e-06, "loss": 39.2734, "step": 3469 }, { "epoch": 0.032847095351236734, "grad_norm": 668.516845703125, "learning_rate": 1.999957703225614e-06, "loss": 39.2969, "step": 3470 }, { "epoch": 0.03285656137295179, "grad_norm": 388.02362060546875, "learning_rate": 1.999957420779158e-06, "loss": 31.4375, "step": 3471 }, { "epoch": 0.03286602739466684, "grad_norm": 746.9232177734375, "learning_rate": 1.9999571373928075e-06, "loss": 35.6953, "step": 3472 }, { "epoch": 0.0328754934163819, "grad_norm": 685.4097900390625, "learning_rate": 1.9999568530665624e-06, "loss": 47.4062, "step": 3473 }, { "epoch": 0.03288495943809695, "grad_norm": 353.22528076171875, "learning_rate": 1.999956567800423e-06, "loss": 39.9219, "step": 3474 }, { "epoch": 0.032894425459812006, "grad_norm": 406.44207763671875, "learning_rate": 1.999956281594389e-06, "loss": 28.0312, "step": 3475 }, { "epoch": 0.03290389148152706, "grad_norm": 860.3763427734375, "learning_rate": 1.9999559944484613e-06, "loss": 55.1484, "step": 3476 }, { "epoch": 0.03291335750324211, "grad_norm": 391.5418701171875, "learning_rate": 1.99995570636264e-06, "loss": 30.1094, "step": 3477 }, { "epoch": 0.032922823524957165, "grad_norm": 210.795166015625, "learning_rate": 1.999955417336926e-06, "loss": 25.2188, "step": 3478 }, { "epoch": 0.03293228954667222, "grad_norm": 240.2946319580078, "learning_rate": 1.9999551273713184e-06, "loss": 30.2031, "step": 3479 }, { "epoch": 0.03294175556838727, "grad_norm": 456.6177062988281, "learning_rate": 1.9999548364658177e-06, "loss": 22.9844, "step": 3480 }, { "epoch": 0.03295122159010233, "grad_norm": 1050.611328125, "learning_rate": 1.999954544620425e-06, "loss": 58.3828, "step": 3481 }, { "epoch": 0.032960687611817384, "grad_norm": 271.2879638671875, "learning_rate": 1.99995425183514e-06, "loss": 27.4688, "step": 3482 }, { "epoch": 0.03297015363353244, "grad_norm": 528.7586669921875, "learning_rate": 1.999953958109963e-06, "loss": 45.8047, "step": 3483 }, { "epoch": 0.03297961965524749, "grad_norm": 228.1358184814453, "learning_rate": 1.9999536634448944e-06, "loss": 20.75, "step": 3484 }, { "epoch": 0.03298908567696254, "grad_norm": 369.1788635253906, "learning_rate": 1.9999533678399346e-06, "loss": 28.7188, "step": 3485 }, { "epoch": 0.032998551698677596, "grad_norm": 419.4502258300781, "learning_rate": 1.999953071295083e-06, "loss": 50.9062, "step": 3486 }, { "epoch": 0.03300801772039265, "grad_norm": 310.33221435546875, "learning_rate": 1.9999527738103416e-06, "loss": 28.2812, "step": 3487 }, { "epoch": 0.0330174837421077, "grad_norm": 762.44189453125, "learning_rate": 1.9999524753857087e-06, "loss": 51.0391, "step": 3488 }, { "epoch": 0.033026949763822755, "grad_norm": 329.18975830078125, "learning_rate": 1.999952176021186e-06, "loss": 41.875, "step": 3489 }, { "epoch": 0.033036415785537815, "grad_norm": 234.03919982910156, "learning_rate": 1.999951875716773e-06, "loss": 25.2188, "step": 3490 }, { "epoch": 0.03304588180725287, "grad_norm": 3.6411685943603516, "learning_rate": 1.9999515744724705e-06, "loss": 0.8638, "step": 3491 }, { "epoch": 0.03305534782896792, "grad_norm": 1404.9874267578125, "learning_rate": 1.999951272288279e-06, "loss": 70.0586, "step": 3492 }, { "epoch": 0.033064813850682974, "grad_norm": 673.1476440429688, "learning_rate": 1.999950969164198e-06, "loss": 54.2031, "step": 3493 }, { "epoch": 0.03307427987239803, "grad_norm": 1212.3482666015625, "learning_rate": 1.9999506651002277e-06, "loss": 64.0469, "step": 3494 }, { "epoch": 0.03308374589411308, "grad_norm": 562.3330688476562, "learning_rate": 1.9999503600963693e-06, "loss": 60.0781, "step": 3495 }, { "epoch": 0.03309321191582813, "grad_norm": 2.981330394744873, "learning_rate": 1.9999500541526224e-06, "loss": 0.918, "step": 3496 }, { "epoch": 0.033102677937543186, "grad_norm": 310.5069885253906, "learning_rate": 1.999949747268988e-06, "loss": 31.3281, "step": 3497 }, { "epoch": 0.033112143959258246, "grad_norm": 254.0532989501953, "learning_rate": 1.999949439445465e-06, "loss": 27.7031, "step": 3498 }, { "epoch": 0.0331216099809733, "grad_norm": 228.4735107421875, "learning_rate": 1.9999491306820553e-06, "loss": 27.3594, "step": 3499 }, { "epoch": 0.03313107600268835, "grad_norm": 524.8206787109375, "learning_rate": 1.999948820978758e-06, "loss": 31.1719, "step": 3500 }, { "epoch": 0.033140542024403405, "grad_norm": 295.1111145019531, "learning_rate": 1.9999485103355742e-06, "loss": 31.625, "step": 3501 }, { "epoch": 0.03315000804611846, "grad_norm": 2.845839023590088, "learning_rate": 1.999948198752504e-06, "loss": 0.9683, "step": 3502 }, { "epoch": 0.03315947406783351, "grad_norm": 398.02520751953125, "learning_rate": 1.999947886229547e-06, "loss": 38.1875, "step": 3503 }, { "epoch": 0.033168940089548564, "grad_norm": 2246.5, "learning_rate": 1.9999475727667045e-06, "loss": 70.875, "step": 3504 }, { "epoch": 0.03317840611126362, "grad_norm": 2.946030616760254, "learning_rate": 1.9999472583639762e-06, "loss": 0.8477, "step": 3505 }, { "epoch": 0.03318787213297867, "grad_norm": 815.18994140625, "learning_rate": 1.9999469430213624e-06, "loss": 44.5938, "step": 3506 }, { "epoch": 0.03319733815469373, "grad_norm": 349.56695556640625, "learning_rate": 1.9999466267388635e-06, "loss": 52.9062, "step": 3507 }, { "epoch": 0.03320680417640878, "grad_norm": 348.4049072265625, "learning_rate": 1.99994630951648e-06, "loss": 37.2344, "step": 3508 }, { "epoch": 0.033216270198123836, "grad_norm": 366.2416687011719, "learning_rate": 1.999945991354212e-06, "loss": 33.9531, "step": 3509 }, { "epoch": 0.03322573621983889, "grad_norm": 1768.76904296875, "learning_rate": 1.9999456722520594e-06, "loss": 76.8438, "step": 3510 }, { "epoch": 0.03323520224155394, "grad_norm": 843.784423828125, "learning_rate": 1.9999453522100234e-06, "loss": 58.6719, "step": 3511 }, { "epoch": 0.033244668263268995, "grad_norm": 426.2342224121094, "learning_rate": 1.999945031228104e-06, "loss": 34.9688, "step": 3512 }, { "epoch": 0.03325413428498405, "grad_norm": 3.3563289642333984, "learning_rate": 1.999944709306301e-06, "loss": 0.8264, "step": 3513 }, { "epoch": 0.0332636003066991, "grad_norm": 1592.547119140625, "learning_rate": 1.999944386444615e-06, "loss": 62.3906, "step": 3514 }, { "epoch": 0.033273066328414154, "grad_norm": 373.6781005859375, "learning_rate": 1.9999440626430463e-06, "loss": 37.6406, "step": 3515 }, { "epoch": 0.033282532350129214, "grad_norm": 608.7158203125, "learning_rate": 1.9999437379015953e-06, "loss": 43.8906, "step": 3516 }, { "epoch": 0.03329199837184427, "grad_norm": 249.36276245117188, "learning_rate": 1.9999434122202625e-06, "loss": 25.5, "step": 3517 }, { "epoch": 0.03330146439355932, "grad_norm": 1094.357177734375, "learning_rate": 1.999943085599047e-06, "loss": 40.5312, "step": 3518 }, { "epoch": 0.03331093041527437, "grad_norm": 920.9158325195312, "learning_rate": 1.9999427580379513e-06, "loss": 30.1484, "step": 3519 }, { "epoch": 0.033320396436989426, "grad_norm": 249.7315216064453, "learning_rate": 1.9999424295369738e-06, "loss": 32.1094, "step": 3520 }, { "epoch": 0.03332986245870448, "grad_norm": 347.0660095214844, "learning_rate": 1.9999421000961157e-06, "loss": 27.1094, "step": 3521 }, { "epoch": 0.03333932848041953, "grad_norm": 621.0184936523438, "learning_rate": 1.999941769715377e-06, "loss": 58.8984, "step": 3522 }, { "epoch": 0.033348794502134585, "grad_norm": 603.5217895507812, "learning_rate": 1.999941438394758e-06, "loss": 61.1406, "step": 3523 }, { "epoch": 0.033358260523849645, "grad_norm": 276.02044677734375, "learning_rate": 1.999941106134259e-06, "loss": 34.6875, "step": 3524 }, { "epoch": 0.0333677265455647, "grad_norm": 299.7690124511719, "learning_rate": 1.9999407729338803e-06, "loss": 31.7344, "step": 3525 }, { "epoch": 0.03337719256727975, "grad_norm": 502.2052307128906, "learning_rate": 1.999940438793623e-06, "loss": 34.6016, "step": 3526 }, { "epoch": 0.033386658588994804, "grad_norm": 178.7254180908203, "learning_rate": 1.9999401037134863e-06, "loss": 25.2031, "step": 3527 }, { "epoch": 0.03339612461070986, "grad_norm": 336.6799621582031, "learning_rate": 1.999939767693471e-06, "loss": 27.6016, "step": 3528 }, { "epoch": 0.03340559063242491, "grad_norm": 864.380615234375, "learning_rate": 1.999939430733577e-06, "loss": 21.6016, "step": 3529 }, { "epoch": 0.03341505665413996, "grad_norm": 242.75253295898438, "learning_rate": 1.9999390928338053e-06, "loss": 30.4062, "step": 3530 }, { "epoch": 0.033424522675855016, "grad_norm": 493.6935729980469, "learning_rate": 1.999938753994156e-06, "loss": 38.0938, "step": 3531 }, { "epoch": 0.03343398869757007, "grad_norm": 1510.447998046875, "learning_rate": 1.9999384142146296e-06, "loss": 64.6641, "step": 3532 }, { "epoch": 0.03344345471928513, "grad_norm": 359.5534362792969, "learning_rate": 1.9999380734952255e-06, "loss": 32.375, "step": 3533 }, { "epoch": 0.03345292074100018, "grad_norm": 212.59149169921875, "learning_rate": 1.9999377318359447e-06, "loss": 26.7188, "step": 3534 }, { "epoch": 0.033462386762715235, "grad_norm": 561.9942626953125, "learning_rate": 1.9999373892367877e-06, "loss": 74.375, "step": 3535 }, { "epoch": 0.03347185278443029, "grad_norm": 693.9342651367188, "learning_rate": 1.999937045697755e-06, "loss": 53.3906, "step": 3536 }, { "epoch": 0.03348131880614534, "grad_norm": 250.42579650878906, "learning_rate": 1.999936701218846e-06, "loss": 31.4062, "step": 3537 }, { "epoch": 0.033490784827860394, "grad_norm": 396.32916259765625, "learning_rate": 1.9999363558000615e-06, "loss": 48.1875, "step": 3538 }, { "epoch": 0.03350025084957545, "grad_norm": 687.023681640625, "learning_rate": 1.999936009441402e-06, "loss": 42.5859, "step": 3539 }, { "epoch": 0.0335097168712905, "grad_norm": 180.9481658935547, "learning_rate": 1.999935662142868e-06, "loss": 29.2656, "step": 3540 }, { "epoch": 0.03351918289300556, "grad_norm": 344.0596008300781, "learning_rate": 1.9999353139044594e-06, "loss": 58.4375, "step": 3541 }, { "epoch": 0.03352864891472061, "grad_norm": 3.174522876739502, "learning_rate": 1.9999349647261765e-06, "loss": 0.8989, "step": 3542 }, { "epoch": 0.033538114936435666, "grad_norm": 430.52044677734375, "learning_rate": 1.99993461460802e-06, "loss": 38.6953, "step": 3543 }, { "epoch": 0.03354758095815072, "grad_norm": 424.6161193847656, "learning_rate": 1.99993426354999e-06, "loss": 30.1406, "step": 3544 }, { "epoch": 0.03355704697986577, "grad_norm": 559.4381713867188, "learning_rate": 1.9999339115520863e-06, "loss": 30.4844, "step": 3545 }, { "epoch": 0.033566513001580825, "grad_norm": 816.8515625, "learning_rate": 1.9999335586143103e-06, "loss": 65.6562, "step": 3546 }, { "epoch": 0.03357597902329588, "grad_norm": 550.7627563476562, "learning_rate": 1.999933204736662e-06, "loss": 51.6406, "step": 3547 }, { "epoch": 0.03358544504501093, "grad_norm": 308.96734619140625, "learning_rate": 1.9999328499191406e-06, "loss": 27.4219, "step": 3548 }, { "epoch": 0.033594911066725984, "grad_norm": 442.1771545410156, "learning_rate": 1.999932494161748e-06, "loss": 45.4219, "step": 3549 }, { "epoch": 0.033604377088441044, "grad_norm": 770.6197509765625, "learning_rate": 1.9999321374644838e-06, "loss": 50.0469, "step": 3550 }, { "epoch": 0.0336138431101561, "grad_norm": 786.3685302734375, "learning_rate": 1.999931779827349e-06, "loss": 63.2812, "step": 3551 }, { "epoch": 0.03362330913187115, "grad_norm": 658.7329711914062, "learning_rate": 1.9999314212503428e-06, "loss": 55.7812, "step": 3552 }, { "epoch": 0.0336327751535862, "grad_norm": 496.18408203125, "learning_rate": 1.999931061733466e-06, "loss": 26.5312, "step": 3553 }, { "epoch": 0.033642241175301256, "grad_norm": 537.0125122070312, "learning_rate": 1.9999307012767193e-06, "loss": 30.9922, "step": 3554 }, { "epoch": 0.03365170719701631, "grad_norm": 636.0557250976562, "learning_rate": 1.999930339880103e-06, "loss": 50.0156, "step": 3555 }, { "epoch": 0.03366117321873136, "grad_norm": 958.938720703125, "learning_rate": 1.9999299775436168e-06, "loss": 34.8906, "step": 3556 }, { "epoch": 0.033670639240446415, "grad_norm": 335.8818054199219, "learning_rate": 1.9999296142672615e-06, "loss": 35.8281, "step": 3557 }, { "epoch": 0.03368010526216147, "grad_norm": 304.2779846191406, "learning_rate": 1.9999292500510377e-06, "loss": 29.5625, "step": 3558 }, { "epoch": 0.03368957128387653, "grad_norm": 832.9119873046875, "learning_rate": 1.9999288848949452e-06, "loss": 65.3906, "step": 3559 }, { "epoch": 0.03369903730559158, "grad_norm": 777.8338623046875, "learning_rate": 1.999928518798985e-06, "loss": 36.3281, "step": 3560 }, { "epoch": 0.033708503327306634, "grad_norm": 511.39398193359375, "learning_rate": 1.9999281517631566e-06, "loss": 31.8906, "step": 3561 }, { "epoch": 0.03371796934902169, "grad_norm": 563.2837524414062, "learning_rate": 1.999927783787461e-06, "loss": 49.1719, "step": 3562 }, { "epoch": 0.03372743537073674, "grad_norm": 360.48883056640625, "learning_rate": 1.9999274148718987e-06, "loss": 30.25, "step": 3563 }, { "epoch": 0.03373690139245179, "grad_norm": 436.39898681640625, "learning_rate": 1.999927045016469e-06, "loss": 44.375, "step": 3564 }, { "epoch": 0.033746367414166846, "grad_norm": 332.16668701171875, "learning_rate": 1.9999266742211735e-06, "loss": 24.082, "step": 3565 }, { "epoch": 0.0337558334358819, "grad_norm": 674.393310546875, "learning_rate": 1.9999263024860114e-06, "loss": 53.0312, "step": 3566 }, { "epoch": 0.03376529945759696, "grad_norm": 3.50730299949646, "learning_rate": 1.9999259298109837e-06, "loss": 0.8396, "step": 3567 }, { "epoch": 0.03377476547931201, "grad_norm": 349.655517578125, "learning_rate": 1.999925556196091e-06, "loss": 26.7266, "step": 3568 }, { "epoch": 0.033784231501027065, "grad_norm": 683.425537109375, "learning_rate": 1.999925181641333e-06, "loss": 44.0781, "step": 3569 }, { "epoch": 0.03379369752274212, "grad_norm": 562.1268310546875, "learning_rate": 1.999924806146711e-06, "loss": 32.0859, "step": 3570 }, { "epoch": 0.03380316354445717, "grad_norm": 279.72198486328125, "learning_rate": 1.9999244297122244e-06, "loss": 31.8906, "step": 3571 }, { "epoch": 0.033812629566172224, "grad_norm": 457.8304443359375, "learning_rate": 1.9999240523378735e-06, "loss": 61.75, "step": 3572 }, { "epoch": 0.03382209558788728, "grad_norm": 266.1946716308594, "learning_rate": 1.9999236740236596e-06, "loss": 30.4375, "step": 3573 }, { "epoch": 0.03383156160960233, "grad_norm": 204.20361328125, "learning_rate": 1.999923294769582e-06, "loss": 30.4375, "step": 3574 }, { "epoch": 0.03384102763131738, "grad_norm": 1044.079345703125, "learning_rate": 1.999922914575642e-06, "loss": 34.4219, "step": 3575 }, { "epoch": 0.03385049365303244, "grad_norm": 538.369873046875, "learning_rate": 1.999922533441839e-06, "loss": 49.0625, "step": 3576 }, { "epoch": 0.033859959674747496, "grad_norm": 397.9170227050781, "learning_rate": 1.9999221513681744e-06, "loss": 44.0938, "step": 3577 }, { "epoch": 0.03386942569646255, "grad_norm": 341.1197509765625, "learning_rate": 1.9999217683546475e-06, "loss": 31.1406, "step": 3578 }, { "epoch": 0.0338788917181776, "grad_norm": 1179.20263671875, "learning_rate": 1.9999213844012596e-06, "loss": 54.5859, "step": 3579 }, { "epoch": 0.033888357739892655, "grad_norm": 329.9576416015625, "learning_rate": 1.9999209995080107e-06, "loss": 30.8281, "step": 3580 }, { "epoch": 0.03389782376160771, "grad_norm": 227.19229125976562, "learning_rate": 1.999920613674901e-06, "loss": 26.8906, "step": 3581 }, { "epoch": 0.03390728978332276, "grad_norm": 1775.5140380859375, "learning_rate": 1.999920226901931e-06, "loss": 36.0938, "step": 3582 }, { "epoch": 0.033916755805037814, "grad_norm": 588.108154296875, "learning_rate": 1.9999198391891007e-06, "loss": 56.5469, "step": 3583 }, { "epoch": 0.033926221826752874, "grad_norm": 402.7490234375, "learning_rate": 1.999919450536411e-06, "loss": 28.4844, "step": 3584 }, { "epoch": 0.03393568784846793, "grad_norm": 189.6348876953125, "learning_rate": 1.999919060943862e-06, "loss": 22.9688, "step": 3585 }, { "epoch": 0.03394515387018298, "grad_norm": 446.5243835449219, "learning_rate": 1.999918670411454e-06, "loss": 46.0781, "step": 3586 }, { "epoch": 0.03395461989189803, "grad_norm": 434.6544494628906, "learning_rate": 1.999918278939188e-06, "loss": 23.1016, "step": 3587 }, { "epoch": 0.033964085913613086, "grad_norm": 518.2042846679688, "learning_rate": 1.9999178865270637e-06, "loss": 40.4219, "step": 3588 }, { "epoch": 0.03397355193532814, "grad_norm": 760.6729736328125, "learning_rate": 1.9999174931750816e-06, "loss": 50.7969, "step": 3589 }, { "epoch": 0.03398301795704319, "grad_norm": 787.82666015625, "learning_rate": 1.999917098883242e-06, "loss": 51.7969, "step": 3590 }, { "epoch": 0.033992483978758245, "grad_norm": 3.397653102874756, "learning_rate": 1.9999167036515456e-06, "loss": 0.9446, "step": 3591 }, { "epoch": 0.0340019500004733, "grad_norm": 392.83538818359375, "learning_rate": 1.9999163074799925e-06, "loss": 44.7812, "step": 3592 }, { "epoch": 0.03401141602218836, "grad_norm": 455.41912841796875, "learning_rate": 1.9999159103685826e-06, "loss": 40.0781, "step": 3593 }, { "epoch": 0.03402088204390341, "grad_norm": 1039.4443359375, "learning_rate": 1.9999155123173172e-06, "loss": 77.9375, "step": 3594 }, { "epoch": 0.034030348065618464, "grad_norm": 246.38002014160156, "learning_rate": 1.9999151133261964e-06, "loss": 27.9062, "step": 3595 }, { "epoch": 0.03403981408733352, "grad_norm": 631.3529052734375, "learning_rate": 1.99991471339522e-06, "loss": 27.2109, "step": 3596 }, { "epoch": 0.03404928010904857, "grad_norm": 656.7232666015625, "learning_rate": 1.9999143125243896e-06, "loss": 68.3125, "step": 3597 }, { "epoch": 0.03405874613076362, "grad_norm": 3.0064713954925537, "learning_rate": 1.999913910713704e-06, "loss": 0.9326, "step": 3598 }, { "epoch": 0.034068212152478676, "grad_norm": 780.62939453125, "learning_rate": 1.9999135079631646e-06, "loss": 25.8594, "step": 3599 }, { "epoch": 0.03407767817419373, "grad_norm": 397.7160949707031, "learning_rate": 1.9999131042727715e-06, "loss": 42.125, "step": 3600 }, { "epoch": 0.03408714419590878, "grad_norm": 765.478271484375, "learning_rate": 1.9999126996425254e-06, "loss": 59.0, "step": 3601 }, { "epoch": 0.03409661021762384, "grad_norm": 411.4880676269531, "learning_rate": 1.9999122940724264e-06, "loss": 57.875, "step": 3602 }, { "epoch": 0.034106076239338895, "grad_norm": 268.0469970703125, "learning_rate": 1.999911887562475e-06, "loss": 28.5625, "step": 3603 }, { "epoch": 0.03411554226105395, "grad_norm": 848.6107177734375, "learning_rate": 1.999911480112671e-06, "loss": 27.2031, "step": 3604 }, { "epoch": 0.034125008282769, "grad_norm": 766.35693359375, "learning_rate": 1.9999110717230156e-06, "loss": 31.8125, "step": 3605 }, { "epoch": 0.034134474304484054, "grad_norm": 380.7010498046875, "learning_rate": 1.9999106623935087e-06, "loss": 45.8906, "step": 3606 }, { "epoch": 0.03414394032619911, "grad_norm": 231.3485565185547, "learning_rate": 1.9999102521241505e-06, "loss": 35.2656, "step": 3607 }, { "epoch": 0.03415340634791416, "grad_norm": 303.0037841796875, "learning_rate": 1.9999098409149424e-06, "loss": 33.4062, "step": 3608 }, { "epoch": 0.03416287236962921, "grad_norm": 1324.03662109375, "learning_rate": 1.999909428765884e-06, "loss": 73.0469, "step": 3609 }, { "epoch": 0.03417233839134427, "grad_norm": 708.4049682617188, "learning_rate": 1.9999090156769753e-06, "loss": 51.5938, "step": 3610 }, { "epoch": 0.034181804413059326, "grad_norm": 2235.0634765625, "learning_rate": 1.9999086016482172e-06, "loss": 35.8438, "step": 3611 }, { "epoch": 0.03419127043477438, "grad_norm": 346.41668701171875, "learning_rate": 1.9999081866796105e-06, "loss": 29.0469, "step": 3612 }, { "epoch": 0.03420073645648943, "grad_norm": 176.13023376464844, "learning_rate": 1.999907770771155e-06, "loss": 27.2969, "step": 3613 }, { "epoch": 0.034210202478204485, "grad_norm": 4.402038097381592, "learning_rate": 1.9999073539228513e-06, "loss": 0.9004, "step": 3614 }, { "epoch": 0.03421966849991954, "grad_norm": 190.398193359375, "learning_rate": 1.9999069361346997e-06, "loss": 27.0469, "step": 3615 }, { "epoch": 0.03422913452163459, "grad_norm": 780.4185180664062, "learning_rate": 1.9999065174067003e-06, "loss": 43.0859, "step": 3616 }, { "epoch": 0.034238600543349644, "grad_norm": 487.3522644042969, "learning_rate": 1.999906097738854e-06, "loss": 61.6562, "step": 3617 }, { "epoch": 0.0342480665650647, "grad_norm": 254.7242431640625, "learning_rate": 1.999905677131161e-06, "loss": 27.4219, "step": 3618 }, { "epoch": 0.03425753258677976, "grad_norm": 1969.132080078125, "learning_rate": 1.999905255583622e-06, "loss": 49.9531, "step": 3619 }, { "epoch": 0.03426699860849481, "grad_norm": 570.471923828125, "learning_rate": 1.999904833096237e-06, "loss": 66.3281, "step": 3620 }, { "epoch": 0.03427646463020986, "grad_norm": 1204.76025390625, "learning_rate": 1.9999044096690064e-06, "loss": 53.5781, "step": 3621 }, { "epoch": 0.034285930651924916, "grad_norm": 466.4956359863281, "learning_rate": 1.999903985301931e-06, "loss": 28.3125, "step": 3622 }, { "epoch": 0.03429539667363997, "grad_norm": 285.6156921386719, "learning_rate": 1.9999035599950105e-06, "loss": 26.3125, "step": 3623 }, { "epoch": 0.03430486269535502, "grad_norm": 460.23260498046875, "learning_rate": 1.9999031337482456e-06, "loss": 34.8438, "step": 3624 }, { "epoch": 0.034314328717070075, "grad_norm": 843.3438720703125, "learning_rate": 1.999902706561637e-06, "loss": 22.7422, "step": 3625 }, { "epoch": 0.03432379473878513, "grad_norm": 630.142822265625, "learning_rate": 1.9999022784351853e-06, "loss": 51.7344, "step": 3626 }, { "epoch": 0.03433326076050019, "grad_norm": 441.0840759277344, "learning_rate": 1.9999018493688896e-06, "loss": 30.9453, "step": 3627 }, { "epoch": 0.03434272678221524, "grad_norm": 191.78948974609375, "learning_rate": 1.999901419362752e-06, "loss": 27.4219, "step": 3628 }, { "epoch": 0.034352192803930294, "grad_norm": 282.5002136230469, "learning_rate": 1.999900988416772e-06, "loss": 30.4219, "step": 3629 }, { "epoch": 0.03436165882564535, "grad_norm": 205.9770050048828, "learning_rate": 1.99990055653095e-06, "loss": 30.1094, "step": 3630 }, { "epoch": 0.0343711248473604, "grad_norm": 173.59698486328125, "learning_rate": 1.999900123705286e-06, "loss": 29.1562, "step": 3631 }, { "epoch": 0.03438059086907545, "grad_norm": 413.0645446777344, "learning_rate": 1.999899689939782e-06, "loss": 23.4219, "step": 3632 }, { "epoch": 0.034390056890790506, "grad_norm": 996.1834716796875, "learning_rate": 1.9998992552344363e-06, "loss": 27.2188, "step": 3633 }, { "epoch": 0.03439952291250556, "grad_norm": 795.5692749023438, "learning_rate": 1.999898819589251e-06, "loss": 35.0547, "step": 3634 }, { "epoch": 0.03440898893422061, "grad_norm": 729.8290405273438, "learning_rate": 1.9998983830042257e-06, "loss": 53.3594, "step": 3635 }, { "epoch": 0.03441845495593567, "grad_norm": 530.6171264648438, "learning_rate": 1.999897945479361e-06, "loss": 28.6094, "step": 3636 }, { "epoch": 0.034427920977650725, "grad_norm": 2.7295055389404297, "learning_rate": 1.999897507014657e-06, "loss": 0.8506, "step": 3637 }, { "epoch": 0.03443738699936578, "grad_norm": 614.6702270507812, "learning_rate": 1.9998970676101146e-06, "loss": 70.6562, "step": 3638 }, { "epoch": 0.03444685302108083, "grad_norm": 505.9822082519531, "learning_rate": 1.9998966272657344e-06, "loss": 26.3438, "step": 3639 }, { "epoch": 0.034456319042795884, "grad_norm": 918.8512573242188, "learning_rate": 1.999896185981516e-06, "loss": 91.7656, "step": 3640 }, { "epoch": 0.03446578506451094, "grad_norm": 746.1356811523438, "learning_rate": 1.99989574375746e-06, "loss": 66.375, "step": 3641 }, { "epoch": 0.03447525108622599, "grad_norm": 299.886962890625, "learning_rate": 1.9998953005935677e-06, "loss": 34.0781, "step": 3642 }, { "epoch": 0.03448471710794104, "grad_norm": 353.05755615234375, "learning_rate": 1.9998948564898384e-06, "loss": 23.7031, "step": 3643 }, { "epoch": 0.034494183129656096, "grad_norm": 406.9053039550781, "learning_rate": 1.9998944114462727e-06, "loss": 32.4375, "step": 3644 }, { "epoch": 0.034503649151371156, "grad_norm": 320.47393798828125, "learning_rate": 1.999893965462872e-06, "loss": 31.1875, "step": 3645 }, { "epoch": 0.03451311517308621, "grad_norm": 585.9266357421875, "learning_rate": 1.999893518539636e-06, "loss": 55.8594, "step": 3646 }, { "epoch": 0.03452258119480126, "grad_norm": 513.8401489257812, "learning_rate": 1.9998930706765644e-06, "loss": 45.0938, "step": 3647 }, { "epoch": 0.034532047216516315, "grad_norm": 285.8667907714844, "learning_rate": 1.9998926218736585e-06, "loss": 31.9531, "step": 3648 }, { "epoch": 0.03454151323823137, "grad_norm": 198.36863708496094, "learning_rate": 1.999892172130919e-06, "loss": 26.9688, "step": 3649 }, { "epoch": 0.03455097925994642, "grad_norm": 877.510009765625, "learning_rate": 1.999891721448346e-06, "loss": 42.5938, "step": 3650 }, { "epoch": 0.034560445281661474, "grad_norm": 673.474853515625, "learning_rate": 1.9998912698259394e-06, "loss": 57.2656, "step": 3651 }, { "epoch": 0.03456991130337653, "grad_norm": 3.5064213275909424, "learning_rate": 1.9998908172637e-06, "loss": 0.9863, "step": 3652 }, { "epoch": 0.03457937732509159, "grad_norm": 597.4368896484375, "learning_rate": 1.9998903637616287e-06, "loss": 49.9844, "step": 3653 }, { "epoch": 0.03458884334680664, "grad_norm": 667.8759155273438, "learning_rate": 1.9998899093197255e-06, "loss": 64.2109, "step": 3654 }, { "epoch": 0.03459830936852169, "grad_norm": 517.8374633789062, "learning_rate": 1.9998894539379906e-06, "loss": 68.6406, "step": 3655 }, { "epoch": 0.034607775390236746, "grad_norm": 889.1074829101562, "learning_rate": 1.9998889976164247e-06, "loss": 75.4688, "step": 3656 }, { "epoch": 0.0346172414119518, "grad_norm": 475.484619140625, "learning_rate": 1.9998885403550284e-06, "loss": 27.75, "step": 3657 }, { "epoch": 0.03462670743366685, "grad_norm": 284.7806091308594, "learning_rate": 1.9998880821538016e-06, "loss": 24.5156, "step": 3658 }, { "epoch": 0.034636173455381905, "grad_norm": 611.3776245117188, "learning_rate": 1.9998876230127455e-06, "loss": 46.875, "step": 3659 }, { "epoch": 0.03464563947709696, "grad_norm": 274.3036804199219, "learning_rate": 1.99988716293186e-06, "loss": 32.5469, "step": 3660 }, { "epoch": 0.03465510549881201, "grad_norm": 519.9882202148438, "learning_rate": 1.9998867019111453e-06, "loss": 32.6172, "step": 3661 }, { "epoch": 0.03466457152052707, "grad_norm": 186.0958709716797, "learning_rate": 1.999886239950602e-06, "loss": 26.6406, "step": 3662 }, { "epoch": 0.034674037542242124, "grad_norm": 179.64280700683594, "learning_rate": 1.999885777050231e-06, "loss": 30.0469, "step": 3663 }, { "epoch": 0.03468350356395718, "grad_norm": 459.8203430175781, "learning_rate": 1.9998853132100325e-06, "loss": 61.8594, "step": 3664 }, { "epoch": 0.03469296958567223, "grad_norm": 1351.68359375, "learning_rate": 1.9998848484300062e-06, "loss": 42.6406, "step": 3665 }, { "epoch": 0.03470243560738728, "grad_norm": 160.0842742919922, "learning_rate": 1.999884382710154e-06, "loss": 24.9219, "step": 3666 }, { "epoch": 0.034711901629102336, "grad_norm": 277.4002990722656, "learning_rate": 1.999883916050475e-06, "loss": 29.2188, "step": 3667 }, { "epoch": 0.03472136765081739, "grad_norm": 304.5913391113281, "learning_rate": 1.9998834484509707e-06, "loss": 26.2188, "step": 3668 }, { "epoch": 0.03473083367253244, "grad_norm": 217.33444213867188, "learning_rate": 1.9998829799116407e-06, "loss": 28.9688, "step": 3669 }, { "epoch": 0.0347402996942475, "grad_norm": 422.07440185546875, "learning_rate": 1.9998825104324853e-06, "loss": 26.0625, "step": 3670 }, { "epoch": 0.034749765715962555, "grad_norm": 321.44134521484375, "learning_rate": 1.999882040013506e-06, "loss": 43.8906, "step": 3671 }, { "epoch": 0.03475923173767761, "grad_norm": 546.3546142578125, "learning_rate": 1.9998815686547024e-06, "loss": 56.0, "step": 3672 }, { "epoch": 0.03476869775939266, "grad_norm": 345.3348083496094, "learning_rate": 1.9998810963560754e-06, "loss": 24.375, "step": 3673 }, { "epoch": 0.034778163781107714, "grad_norm": 222.86767578125, "learning_rate": 1.9998806231176247e-06, "loss": 27.7812, "step": 3674 }, { "epoch": 0.03478762980282277, "grad_norm": 331.78265380859375, "learning_rate": 1.9998801489393516e-06, "loss": 26.1406, "step": 3675 }, { "epoch": 0.03479709582453782, "grad_norm": 598.6629638671875, "learning_rate": 1.9998796738212564e-06, "loss": 46.2188, "step": 3676 }, { "epoch": 0.03480656184625287, "grad_norm": 231.94032287597656, "learning_rate": 1.9998791977633392e-06, "loss": 21.7578, "step": 3677 }, { "epoch": 0.034816027867967926, "grad_norm": 210.1803436279297, "learning_rate": 1.9998787207656005e-06, "loss": 27.8594, "step": 3678 }, { "epoch": 0.034825493889682986, "grad_norm": 241.201904296875, "learning_rate": 1.999878242828041e-06, "loss": 32.25, "step": 3679 }, { "epoch": 0.03483495991139804, "grad_norm": 295.5946960449219, "learning_rate": 1.9998777639506607e-06, "loss": 33.4844, "step": 3680 }, { "epoch": 0.03484442593311309, "grad_norm": 3.016669273376465, "learning_rate": 1.9998772841334605e-06, "loss": 0.9214, "step": 3681 }, { "epoch": 0.034853891954828145, "grad_norm": 783.1906127929688, "learning_rate": 1.999876803376441e-06, "loss": 76.0, "step": 3682 }, { "epoch": 0.0348633579765432, "grad_norm": 2.7287237644195557, "learning_rate": 1.999876321679602e-06, "loss": 0.9014, "step": 3683 }, { "epoch": 0.03487282399825825, "grad_norm": 674.77587890625, "learning_rate": 1.9998758390429444e-06, "loss": 46.3672, "step": 3684 }, { "epoch": 0.034882290019973304, "grad_norm": 941.4509887695312, "learning_rate": 1.9998753554664684e-06, "loss": 27.1016, "step": 3685 }, { "epoch": 0.03489175604168836, "grad_norm": 390.5943603515625, "learning_rate": 1.9998748709501747e-06, "loss": 44.1562, "step": 3686 }, { "epoch": 0.03490122206340341, "grad_norm": 339.7454528808594, "learning_rate": 1.999874385494064e-06, "loss": 42.5938, "step": 3687 }, { "epoch": 0.03491068808511847, "grad_norm": 295.8238830566406, "learning_rate": 1.999873899098136e-06, "loss": 30.75, "step": 3688 }, { "epoch": 0.03492015410683352, "grad_norm": 313.1690979003906, "learning_rate": 1.999873411762392e-06, "loss": 29.6562, "step": 3689 }, { "epoch": 0.034929620128548576, "grad_norm": 495.5023498535156, "learning_rate": 1.9998729234868316e-06, "loss": 41.0312, "step": 3690 }, { "epoch": 0.03493908615026363, "grad_norm": 589.53125, "learning_rate": 1.9998724342714557e-06, "loss": 46.8594, "step": 3691 }, { "epoch": 0.03494855217197868, "grad_norm": 3.277158498764038, "learning_rate": 1.999871944116265e-06, "loss": 0.7588, "step": 3692 }, { "epoch": 0.034958018193693735, "grad_norm": 718.3163452148438, "learning_rate": 1.99987145302126e-06, "loss": 36.0, "step": 3693 }, { "epoch": 0.03496748421540879, "grad_norm": 156.0826416015625, "learning_rate": 1.99987096098644e-06, "loss": 24.7812, "step": 3694 }, { "epoch": 0.03497695023712384, "grad_norm": 302.34320068359375, "learning_rate": 1.999870468011807e-06, "loss": 28.8672, "step": 3695 }, { "epoch": 0.0349864162588389, "grad_norm": 239.62107849121094, "learning_rate": 1.9998699740973608e-06, "loss": 25.6406, "step": 3696 }, { "epoch": 0.034995882280553954, "grad_norm": 1100.376953125, "learning_rate": 1.999869479243102e-06, "loss": 85.4844, "step": 3697 }, { "epoch": 0.03500534830226901, "grad_norm": 683.9441528320312, "learning_rate": 1.9998689834490303e-06, "loss": 28.3828, "step": 3698 }, { "epoch": 0.03501481432398406, "grad_norm": 1305.52734375, "learning_rate": 1.9998684867151473e-06, "loss": 54.2656, "step": 3699 }, { "epoch": 0.03502428034569911, "grad_norm": 358.50189208984375, "learning_rate": 1.999867989041453e-06, "loss": 47.0781, "step": 3700 }, { "epoch": 0.035033746367414166, "grad_norm": 634.4055786132812, "learning_rate": 1.9998674904279474e-06, "loss": 32.5938, "step": 3701 }, { "epoch": 0.03504321238912922, "grad_norm": 268.000244140625, "learning_rate": 1.999866990874632e-06, "loss": 32.2031, "step": 3702 }, { "epoch": 0.03505267841084427, "grad_norm": 438.3104248046875, "learning_rate": 1.9998664903815065e-06, "loss": 62.3906, "step": 3703 }, { "epoch": 0.035062144432559325, "grad_norm": 526.880615234375, "learning_rate": 1.999865988948571e-06, "loss": 36.3438, "step": 3704 }, { "epoch": 0.035071610454274385, "grad_norm": 3.197324752807617, "learning_rate": 1.999865486575827e-06, "loss": 0.9087, "step": 3705 }, { "epoch": 0.03508107647598944, "grad_norm": 371.7588806152344, "learning_rate": 1.999864983263274e-06, "loss": 20.0703, "step": 3706 }, { "epoch": 0.03509054249770449, "grad_norm": 900.829833984375, "learning_rate": 1.9998644790109137e-06, "loss": 44.0781, "step": 3707 }, { "epoch": 0.035100008519419544, "grad_norm": 745.3340454101562, "learning_rate": 1.9998639738187456e-06, "loss": 66.2891, "step": 3708 }, { "epoch": 0.0351094745411346, "grad_norm": 255.58889770507812, "learning_rate": 1.9998634676867703e-06, "loss": 28.8906, "step": 3709 }, { "epoch": 0.03511894056284965, "grad_norm": 604.2584228515625, "learning_rate": 1.9998629606149887e-06, "loss": 67.9062, "step": 3710 }, { "epoch": 0.0351284065845647, "grad_norm": 408.2257080078125, "learning_rate": 1.9998624526034003e-06, "loss": 35.3516, "step": 3711 }, { "epoch": 0.035137872606279756, "grad_norm": 433.74017333984375, "learning_rate": 1.999861943652007e-06, "loss": 31.9688, "step": 3712 }, { "epoch": 0.035147338627994816, "grad_norm": 276.6903991699219, "learning_rate": 1.999861433760808e-06, "loss": 31.0469, "step": 3713 }, { "epoch": 0.03515680464970987, "grad_norm": 327.39892578125, "learning_rate": 1.9998609229298046e-06, "loss": 33.375, "step": 3714 }, { "epoch": 0.03516627067142492, "grad_norm": 474.4043884277344, "learning_rate": 1.9998604111589963e-06, "loss": 31.9688, "step": 3715 }, { "epoch": 0.035175736693139975, "grad_norm": 962.20361328125, "learning_rate": 1.999859898448385e-06, "loss": 90.9219, "step": 3716 }, { "epoch": 0.03518520271485503, "grad_norm": 996.0723266601562, "learning_rate": 1.99985938479797e-06, "loss": 66.0234, "step": 3717 }, { "epoch": 0.03519466873657008, "grad_norm": 300.168212890625, "learning_rate": 1.9998588702077524e-06, "loss": 34.2188, "step": 3718 }, { "epoch": 0.035204134758285134, "grad_norm": 322.2218017578125, "learning_rate": 1.9998583546777327e-06, "loss": 26.7344, "step": 3719 }, { "epoch": 0.03521360078000019, "grad_norm": 735.347900390625, "learning_rate": 1.999857838207911e-06, "loss": 48.9844, "step": 3720 }, { "epoch": 0.03522306680171524, "grad_norm": 443.6658935546875, "learning_rate": 1.999857320798288e-06, "loss": 29.7656, "step": 3721 }, { "epoch": 0.0352325328234303, "grad_norm": 524.095458984375, "learning_rate": 1.9998568024488643e-06, "loss": 31.7812, "step": 3722 }, { "epoch": 0.03524199884514535, "grad_norm": 507.543701171875, "learning_rate": 1.99985628315964e-06, "loss": 52.6094, "step": 3723 }, { "epoch": 0.035251464866860406, "grad_norm": 3.3106565475463867, "learning_rate": 1.999855762930616e-06, "loss": 0.8916, "step": 3724 }, { "epoch": 0.03526093088857546, "grad_norm": 649.0613403320312, "learning_rate": 1.9998552417617925e-06, "loss": 21.4414, "step": 3725 }, { "epoch": 0.03527039691029051, "grad_norm": 623.6490478515625, "learning_rate": 1.9998547196531703e-06, "loss": 28.2656, "step": 3726 }, { "epoch": 0.035279862932005565, "grad_norm": 3.064885139465332, "learning_rate": 1.9998541966047497e-06, "loss": 0.8857, "step": 3727 }, { "epoch": 0.03528932895372062, "grad_norm": 329.5775146484375, "learning_rate": 1.9998536726165313e-06, "loss": 28.7188, "step": 3728 }, { "epoch": 0.03529879497543567, "grad_norm": 243.40049743652344, "learning_rate": 1.999853147688515e-06, "loss": 26.4609, "step": 3729 }, { "epoch": 0.035308260997150724, "grad_norm": 527.098388671875, "learning_rate": 1.999852621820702e-06, "loss": 34.1094, "step": 3730 }, { "epoch": 0.035317727018865784, "grad_norm": 292.61505126953125, "learning_rate": 1.9998520950130927e-06, "loss": 25.8906, "step": 3731 }, { "epoch": 0.03532719304058084, "grad_norm": 375.8937683105469, "learning_rate": 1.999851567265688e-06, "loss": 29.7656, "step": 3732 }, { "epoch": 0.03533665906229589, "grad_norm": 482.8108215332031, "learning_rate": 1.999851038578487e-06, "loss": 28.1172, "step": 3733 }, { "epoch": 0.03534612508401094, "grad_norm": 383.53125, "learning_rate": 1.9998505089514913e-06, "loss": 14.668, "step": 3734 }, { "epoch": 0.035355591105725996, "grad_norm": 462.1753845214844, "learning_rate": 1.9998499783847013e-06, "loss": 26.9688, "step": 3735 }, { "epoch": 0.03536505712744105, "grad_norm": 224.41970825195312, "learning_rate": 1.9998494468781173e-06, "loss": 26.9375, "step": 3736 }, { "epoch": 0.0353745231491561, "grad_norm": 319.31756591796875, "learning_rate": 1.99984891443174e-06, "loss": 30.4336, "step": 3737 }, { "epoch": 0.035383989170871155, "grad_norm": 743.8668823242188, "learning_rate": 1.9998483810455695e-06, "loss": 70.8281, "step": 3738 }, { "epoch": 0.035393455192586215, "grad_norm": 599.521240234375, "learning_rate": 1.9998478467196066e-06, "loss": 31.6562, "step": 3739 }, { "epoch": 0.03540292121430127, "grad_norm": 870.069580078125, "learning_rate": 1.9998473114538518e-06, "loss": 37.625, "step": 3740 }, { "epoch": 0.03541238723601632, "grad_norm": 189.01417541503906, "learning_rate": 1.999846775248306e-06, "loss": 27.1094, "step": 3741 }, { "epoch": 0.035421853257731374, "grad_norm": 837.0806884765625, "learning_rate": 1.9998462381029684e-06, "loss": 28.2031, "step": 3742 }, { "epoch": 0.03543131927944643, "grad_norm": 699.5179443359375, "learning_rate": 1.999845700017841e-06, "loss": 38.7734, "step": 3743 }, { "epoch": 0.03544078530116148, "grad_norm": 498.6907653808594, "learning_rate": 1.999845160992924e-06, "loss": 63.3438, "step": 3744 }, { "epoch": 0.03545025132287653, "grad_norm": 463.3176574707031, "learning_rate": 1.999844621028217e-06, "loss": 43.6406, "step": 3745 }, { "epoch": 0.035459717344591586, "grad_norm": 652.0436401367188, "learning_rate": 1.999844080123721e-06, "loss": 44.0977, "step": 3746 }, { "epoch": 0.03546918336630664, "grad_norm": 245.450439453125, "learning_rate": 1.999843538279437e-06, "loss": 28.2188, "step": 3747 }, { "epoch": 0.0354786493880217, "grad_norm": 1018.388671875, "learning_rate": 1.9998429954953653e-06, "loss": 45.0938, "step": 3748 }, { "epoch": 0.03548811540973675, "grad_norm": 145.6427764892578, "learning_rate": 1.999842451771506e-06, "loss": 23.5938, "step": 3749 }, { "epoch": 0.035497581431451805, "grad_norm": 814.7943725585938, "learning_rate": 1.99984190710786e-06, "loss": 69.5156, "step": 3750 }, { "epoch": 0.03550704745316686, "grad_norm": 3.715550184249878, "learning_rate": 1.999841361504427e-06, "loss": 0.9067, "step": 3751 }, { "epoch": 0.03551651347488191, "grad_norm": 2.680546998977661, "learning_rate": 1.9998408149612087e-06, "loss": 0.8301, "step": 3752 }, { "epoch": 0.035525979496596964, "grad_norm": 3.027709722518921, "learning_rate": 1.999840267478205e-06, "loss": 1.0186, "step": 3753 }, { "epoch": 0.03553544551831202, "grad_norm": 728.4012451171875, "learning_rate": 1.999839719055417e-06, "loss": 55.9688, "step": 3754 }, { "epoch": 0.03554491154002707, "grad_norm": 532.959228515625, "learning_rate": 1.9998391696928437e-06, "loss": 41.5742, "step": 3755 }, { "epoch": 0.03555437756174213, "grad_norm": 501.513427734375, "learning_rate": 1.999838619390487e-06, "loss": 46.3906, "step": 3756 }, { "epoch": 0.03556384358345718, "grad_norm": 546.4718017578125, "learning_rate": 1.999838068148347e-06, "loss": 60.4062, "step": 3757 }, { "epoch": 0.035573309605172236, "grad_norm": 470.2256164550781, "learning_rate": 1.9998375159664245e-06, "loss": 29.0, "step": 3758 }, { "epoch": 0.03558277562688729, "grad_norm": 707.3917236328125, "learning_rate": 1.9998369628447196e-06, "loss": 41.0781, "step": 3759 }, { "epoch": 0.03559224164860234, "grad_norm": 735.7433471679688, "learning_rate": 1.999836408783233e-06, "loss": 42.0781, "step": 3760 }, { "epoch": 0.035601707670317395, "grad_norm": 465.1251220703125, "learning_rate": 1.9998358537819654e-06, "loss": 53.625, "step": 3761 }, { "epoch": 0.03561117369203245, "grad_norm": 251.24496459960938, "learning_rate": 1.9998352978409173e-06, "loss": 27.5938, "step": 3762 }, { "epoch": 0.0356206397137475, "grad_norm": 499.8003845214844, "learning_rate": 1.9998347409600887e-06, "loss": 47.9375, "step": 3763 }, { "epoch": 0.035630105735462554, "grad_norm": 1228.9794921875, "learning_rate": 1.999834183139481e-06, "loss": 76.3047, "step": 3764 }, { "epoch": 0.035639571757177614, "grad_norm": 184.41067504882812, "learning_rate": 1.9998336243790933e-06, "loss": 26.2344, "step": 3765 }, { "epoch": 0.03564903777889267, "grad_norm": 673.4531860351562, "learning_rate": 1.999833064678928e-06, "loss": 48.5859, "step": 3766 }, { "epoch": 0.03565850380060772, "grad_norm": 212.01304626464844, "learning_rate": 1.999832504038984e-06, "loss": 29.7344, "step": 3767 }, { "epoch": 0.03566796982232277, "grad_norm": 539.410888671875, "learning_rate": 1.9998319424592625e-06, "loss": 54.4844, "step": 3768 }, { "epoch": 0.035677435844037826, "grad_norm": 2037.66015625, "learning_rate": 1.9998313799397643e-06, "loss": 56.3672, "step": 3769 }, { "epoch": 0.03568690186575288, "grad_norm": 403.28643798828125, "learning_rate": 1.9998308164804895e-06, "loss": 34.3906, "step": 3770 }, { "epoch": 0.03569636788746793, "grad_norm": 273.6322326660156, "learning_rate": 1.9998302520814388e-06, "loss": 28.0, "step": 3771 }, { "epoch": 0.035705833909182985, "grad_norm": 315.01910400390625, "learning_rate": 1.9998296867426126e-06, "loss": 25.2188, "step": 3772 }, { "epoch": 0.03571529993089804, "grad_norm": 552.8152465820312, "learning_rate": 1.999829120464012e-06, "loss": 53.5, "step": 3773 }, { "epoch": 0.0357247659526131, "grad_norm": 307.8646240234375, "learning_rate": 1.9998285532456366e-06, "loss": 27.6094, "step": 3774 }, { "epoch": 0.03573423197432815, "grad_norm": 1011.8936157226562, "learning_rate": 1.9998279850874876e-06, "loss": 74.4688, "step": 3775 }, { "epoch": 0.035743697996043204, "grad_norm": 231.0419464111328, "learning_rate": 1.9998274159895652e-06, "loss": 24.7188, "step": 3776 }, { "epoch": 0.03575316401775826, "grad_norm": 285.2471008300781, "learning_rate": 1.99982684595187e-06, "loss": 24.3438, "step": 3777 }, { "epoch": 0.03576263003947331, "grad_norm": 448.3897705078125, "learning_rate": 1.9998262749744027e-06, "loss": 30.7812, "step": 3778 }, { "epoch": 0.03577209606118836, "grad_norm": 265.532470703125, "learning_rate": 1.9998257030571638e-06, "loss": 29.0312, "step": 3779 }, { "epoch": 0.035781562082903416, "grad_norm": 911.9351806640625, "learning_rate": 1.999825130200154e-06, "loss": 51.4688, "step": 3780 }, { "epoch": 0.03579102810461847, "grad_norm": 477.2528381347656, "learning_rate": 1.9998245564033732e-06, "loss": 28.0312, "step": 3781 }, { "epoch": 0.03580049412633353, "grad_norm": 694.262939453125, "learning_rate": 1.999823981666823e-06, "loss": 61.4688, "step": 3782 }, { "epoch": 0.03580996014804858, "grad_norm": 434.4980773925781, "learning_rate": 1.9998234059905025e-06, "loss": 37.6875, "step": 3783 }, { "epoch": 0.035819426169763635, "grad_norm": 181.52154541015625, "learning_rate": 1.9998228293744136e-06, "loss": 26.5, "step": 3784 }, { "epoch": 0.03582889219147869, "grad_norm": 203.62960815429688, "learning_rate": 1.999822251818556e-06, "loss": 25.5312, "step": 3785 }, { "epoch": 0.03583835821319374, "grad_norm": 523.548583984375, "learning_rate": 1.9998216733229305e-06, "loss": 55.6562, "step": 3786 }, { "epoch": 0.035847824234908794, "grad_norm": 458.91632080078125, "learning_rate": 1.999821093887538e-06, "loss": 26.0078, "step": 3787 }, { "epoch": 0.03585729025662385, "grad_norm": 517.0496215820312, "learning_rate": 1.9998205135123784e-06, "loss": 35.4062, "step": 3788 }, { "epoch": 0.0358667562783389, "grad_norm": 1582.4830322265625, "learning_rate": 1.999819932197453e-06, "loss": 43.3828, "step": 3789 }, { "epoch": 0.03587622230005395, "grad_norm": 1118.1650390625, "learning_rate": 1.9998193499427613e-06, "loss": 55.7344, "step": 3790 }, { "epoch": 0.03588568832176901, "grad_norm": 3.0942821502685547, "learning_rate": 1.9998187667483045e-06, "loss": 0.9614, "step": 3791 }, { "epoch": 0.035895154343484066, "grad_norm": 363.1468811035156, "learning_rate": 1.9998181826140836e-06, "loss": 23.3984, "step": 3792 }, { "epoch": 0.03590462036519912, "grad_norm": 611.9802856445312, "learning_rate": 1.9998175975400984e-06, "loss": 22.8828, "step": 3793 }, { "epoch": 0.03591408638691417, "grad_norm": 540.023193359375, "learning_rate": 1.9998170115263495e-06, "loss": 43.1328, "step": 3794 }, { "epoch": 0.035923552408629225, "grad_norm": 480.7174377441406, "learning_rate": 1.999816424572838e-06, "loss": 38.9844, "step": 3795 }, { "epoch": 0.03593301843034428, "grad_norm": 252.84548950195312, "learning_rate": 1.999815836679564e-06, "loss": 32.75, "step": 3796 }, { "epoch": 0.03594248445205933, "grad_norm": 368.3194885253906, "learning_rate": 1.999815247846528e-06, "loss": 25.1562, "step": 3797 }, { "epoch": 0.035951950473774384, "grad_norm": 3.242469072341919, "learning_rate": 1.9998146580737307e-06, "loss": 0.8354, "step": 3798 }, { "epoch": 0.03596141649548944, "grad_norm": 503.243408203125, "learning_rate": 1.999814067361173e-06, "loss": 48.4531, "step": 3799 }, { "epoch": 0.0359708825172045, "grad_norm": 336.7393798828125, "learning_rate": 1.999813475708855e-06, "loss": 31.8281, "step": 3800 }, { "epoch": 0.03598034853891955, "grad_norm": 273.2824401855469, "learning_rate": 1.999812883116777e-06, "loss": 32.5312, "step": 3801 }, { "epoch": 0.0359898145606346, "grad_norm": 620.4681396484375, "learning_rate": 1.99981228958494e-06, "loss": 65.1562, "step": 3802 }, { "epoch": 0.035999280582349656, "grad_norm": 457.119384765625, "learning_rate": 1.999811695113345e-06, "loss": 41.125, "step": 3803 }, { "epoch": 0.03600874660406471, "grad_norm": 581.7626953125, "learning_rate": 1.9998110997019917e-06, "loss": 36.5078, "step": 3804 }, { "epoch": 0.03601821262577976, "grad_norm": 200.71902465820312, "learning_rate": 1.999810503350881e-06, "loss": 24.2188, "step": 3805 }, { "epoch": 0.036027678647494815, "grad_norm": 519.7050170898438, "learning_rate": 1.9998099060600136e-06, "loss": 57.5938, "step": 3806 }, { "epoch": 0.03603714466920987, "grad_norm": 321.192138671875, "learning_rate": 1.99980930782939e-06, "loss": 32.7969, "step": 3807 }, { "epoch": 0.03604661069092493, "grad_norm": 345.9913330078125, "learning_rate": 1.9998087086590104e-06, "loss": 25.5156, "step": 3808 }, { "epoch": 0.03605607671263998, "grad_norm": 2.711761474609375, "learning_rate": 1.9998081085488757e-06, "loss": 0.8521, "step": 3809 }, { "epoch": 0.036065542734355034, "grad_norm": 552.8751220703125, "learning_rate": 1.9998075074989864e-06, "loss": 45.3438, "step": 3810 }, { "epoch": 0.03607500875607009, "grad_norm": 306.6983337402344, "learning_rate": 1.999806905509343e-06, "loss": 27.6562, "step": 3811 }, { "epoch": 0.03608447477778514, "grad_norm": 180.33729553222656, "learning_rate": 1.9998063025799466e-06, "loss": 26.0938, "step": 3812 }, { "epoch": 0.03609394079950019, "grad_norm": 1974.910888671875, "learning_rate": 1.999805698710797e-06, "loss": 39.5312, "step": 3813 }, { "epoch": 0.036103406821215246, "grad_norm": 737.060302734375, "learning_rate": 1.9998050939018953e-06, "loss": 60.0625, "step": 3814 }, { "epoch": 0.0361128728429303, "grad_norm": 236.17564392089844, "learning_rate": 1.9998044881532415e-06, "loss": 25.8594, "step": 3815 }, { "epoch": 0.03612233886464535, "grad_norm": 466.0262756347656, "learning_rate": 1.9998038814648367e-06, "loss": 35.3125, "step": 3816 }, { "epoch": 0.03613180488636041, "grad_norm": 211.784423828125, "learning_rate": 1.999803273836681e-06, "loss": 27.7969, "step": 3817 }, { "epoch": 0.036141270908075465, "grad_norm": 1029.55029296875, "learning_rate": 1.999802665268776e-06, "loss": 57.6094, "step": 3818 }, { "epoch": 0.03615073692979052, "grad_norm": 291.9998779296875, "learning_rate": 1.999802055761121e-06, "loss": 30.0156, "step": 3819 }, { "epoch": 0.03616020295150557, "grad_norm": 532.6057739257812, "learning_rate": 1.9998014453137172e-06, "loss": 50.3906, "step": 3820 }, { "epoch": 0.036169668973220624, "grad_norm": 242.8946990966797, "learning_rate": 1.999800833926565e-06, "loss": 33.4219, "step": 3821 }, { "epoch": 0.03617913499493568, "grad_norm": 368.21966552734375, "learning_rate": 1.999800221599665e-06, "loss": 26.0234, "step": 3822 }, { "epoch": 0.03618860101665073, "grad_norm": 186.35597229003906, "learning_rate": 1.9997996083330184e-06, "loss": 31.6719, "step": 3823 }, { "epoch": 0.03619806703836578, "grad_norm": 3.0203752517700195, "learning_rate": 1.9997989941266245e-06, "loss": 0.818, "step": 3824 }, { "epoch": 0.03620753306008084, "grad_norm": 514.2568359375, "learning_rate": 1.9997983789804853e-06, "loss": 32.0781, "step": 3825 }, { "epoch": 0.036216999081795896, "grad_norm": 275.585205078125, "learning_rate": 1.9997977628946002e-06, "loss": 24.5156, "step": 3826 }, { "epoch": 0.03622646510351095, "grad_norm": 2.9704911708831787, "learning_rate": 1.9997971458689703e-06, "loss": 0.7378, "step": 3827 }, { "epoch": 0.036235931125226, "grad_norm": 280.3058166503906, "learning_rate": 1.999796527903596e-06, "loss": 36.3906, "step": 3828 }, { "epoch": 0.036245397146941055, "grad_norm": 594.6834716796875, "learning_rate": 1.999795908998478e-06, "loss": 54.4531, "step": 3829 }, { "epoch": 0.03625486316865611, "grad_norm": 457.7345275878906, "learning_rate": 1.999795289153617e-06, "loss": 52.2812, "step": 3830 }, { "epoch": 0.03626432919037116, "grad_norm": 250.76795959472656, "learning_rate": 1.9997946683690137e-06, "loss": 28.6094, "step": 3831 }, { "epoch": 0.036273795212086214, "grad_norm": 180.85812377929688, "learning_rate": 1.999794046644668e-06, "loss": 26.2031, "step": 3832 }, { "epoch": 0.03628326123380127, "grad_norm": 745.2251586914062, "learning_rate": 1.9997934239805815e-06, "loss": 55.4062, "step": 3833 }, { "epoch": 0.03629272725551633, "grad_norm": 374.36993408203125, "learning_rate": 1.999792800376754e-06, "loss": 27.7969, "step": 3834 }, { "epoch": 0.03630219327723138, "grad_norm": 376.31781005859375, "learning_rate": 1.999792175833186e-06, "loss": 35.5312, "step": 3835 }, { "epoch": 0.03631165929894643, "grad_norm": 3.223672866821289, "learning_rate": 1.9997915503498788e-06, "loss": 0.8635, "step": 3836 }, { "epoch": 0.036321125320661486, "grad_norm": 3.3429481983184814, "learning_rate": 1.999790923926832e-06, "loss": 1.0273, "step": 3837 }, { "epoch": 0.03633059134237654, "grad_norm": 2911.716552734375, "learning_rate": 1.9997902965640476e-06, "loss": 34.6836, "step": 3838 }, { "epoch": 0.03634005736409159, "grad_norm": 749.7904052734375, "learning_rate": 1.9997896682615245e-06, "loss": 57.5156, "step": 3839 }, { "epoch": 0.036349523385806645, "grad_norm": 660.9031372070312, "learning_rate": 1.9997890390192645e-06, "loss": 27.375, "step": 3840 }, { "epoch": 0.0363589894075217, "grad_norm": 3.061295509338379, "learning_rate": 1.999788408837268e-06, "loss": 0.9614, "step": 3841 }, { "epoch": 0.03636845542923675, "grad_norm": 226.6290283203125, "learning_rate": 1.9997877777155352e-06, "loss": 25.0078, "step": 3842 }, { "epoch": 0.03637792145095181, "grad_norm": 537.0454711914062, "learning_rate": 1.9997871456540676e-06, "loss": 39.1094, "step": 3843 }, { "epoch": 0.036387387472666864, "grad_norm": 177.8740234375, "learning_rate": 1.9997865126528643e-06, "loss": 26.7969, "step": 3844 }, { "epoch": 0.03639685349438192, "grad_norm": 706.8685302734375, "learning_rate": 1.999785878711927e-06, "loss": 38.0312, "step": 3845 }, { "epoch": 0.03640631951609697, "grad_norm": 1075.628173828125, "learning_rate": 1.999785243831256e-06, "loss": 70.3359, "step": 3846 }, { "epoch": 0.03641578553781202, "grad_norm": 265.3126525878906, "learning_rate": 1.999784608010852e-06, "loss": 25.2188, "step": 3847 }, { "epoch": 0.036425251559527076, "grad_norm": 2.9868297576904297, "learning_rate": 1.9997839712507157e-06, "loss": 0.8403, "step": 3848 }, { "epoch": 0.03643471758124213, "grad_norm": 353.4396057128906, "learning_rate": 1.999783333550847e-06, "loss": 27.4688, "step": 3849 }, { "epoch": 0.03644418360295718, "grad_norm": 538.0933227539062, "learning_rate": 1.9997826949112474e-06, "loss": 22.6172, "step": 3850 }, { "epoch": 0.03645364962467224, "grad_norm": 509.90850830078125, "learning_rate": 1.9997820553319168e-06, "loss": 37.6875, "step": 3851 }, { "epoch": 0.036463115646387295, "grad_norm": 436.5459289550781, "learning_rate": 1.9997814148128565e-06, "loss": 31.1328, "step": 3852 }, { "epoch": 0.03647258166810235, "grad_norm": 265.6065979003906, "learning_rate": 1.9997807733540665e-06, "loss": 31.4844, "step": 3853 }, { "epoch": 0.0364820476898174, "grad_norm": 222.5419921875, "learning_rate": 1.9997801309555476e-06, "loss": 27.8906, "step": 3854 }, { "epoch": 0.036491513711532454, "grad_norm": 224.63125610351562, "learning_rate": 1.9997794876173004e-06, "loss": 21.8125, "step": 3855 }, { "epoch": 0.03650097973324751, "grad_norm": 710.0217895507812, "learning_rate": 1.9997788433393255e-06, "loss": 53.3906, "step": 3856 }, { "epoch": 0.03651044575496256, "grad_norm": 1082.8370361328125, "learning_rate": 1.9997781981216236e-06, "loss": 58.9688, "step": 3857 }, { "epoch": 0.03651991177667761, "grad_norm": 313.231689453125, "learning_rate": 1.9997775519641953e-06, "loss": 27.1719, "step": 3858 }, { "epoch": 0.036529377798392666, "grad_norm": 215.9691619873047, "learning_rate": 1.999776904867041e-06, "loss": 25.875, "step": 3859 }, { "epoch": 0.036538843820107726, "grad_norm": 475.8743591308594, "learning_rate": 1.999776256830161e-06, "loss": 33.6094, "step": 3860 }, { "epoch": 0.03654830984182278, "grad_norm": 448.6815490722656, "learning_rate": 1.9997756078535572e-06, "loss": 63.9453, "step": 3861 }, { "epoch": 0.03655777586353783, "grad_norm": 598.0234375, "learning_rate": 1.999774957937229e-06, "loss": 52.6719, "step": 3862 }, { "epoch": 0.036567241885252885, "grad_norm": 2.6018099784851074, "learning_rate": 1.9997743070811775e-06, "loss": 0.8071, "step": 3863 }, { "epoch": 0.03657670790696794, "grad_norm": 359.6696472167969, "learning_rate": 1.999773655285403e-06, "loss": 34.1719, "step": 3864 }, { "epoch": 0.03658617392868299, "grad_norm": 473.6632385253906, "learning_rate": 1.9997730025499065e-06, "loss": 27.8281, "step": 3865 }, { "epoch": 0.036595639950398044, "grad_norm": 563.3773803710938, "learning_rate": 1.999772348874688e-06, "loss": 54.4531, "step": 3866 }, { "epoch": 0.0366051059721131, "grad_norm": 300.0921325683594, "learning_rate": 1.999771694259749e-06, "loss": 25.1562, "step": 3867 }, { "epoch": 0.03661457199382816, "grad_norm": 436.8417053222656, "learning_rate": 1.9997710387050898e-06, "loss": 41.6797, "step": 3868 }, { "epoch": 0.03662403801554321, "grad_norm": 245.89419555664062, "learning_rate": 1.9997703822107103e-06, "loss": 22.7734, "step": 3869 }, { "epoch": 0.03663350403725826, "grad_norm": 362.1257629394531, "learning_rate": 1.999769724776612e-06, "loss": 27.3594, "step": 3870 }, { "epoch": 0.036642970058973316, "grad_norm": 661.2514038085938, "learning_rate": 1.999769066402795e-06, "loss": 43.125, "step": 3871 }, { "epoch": 0.03665243608068837, "grad_norm": 664.4239501953125, "learning_rate": 1.9997684070892605e-06, "loss": 35.8906, "step": 3872 }, { "epoch": 0.03666190210240342, "grad_norm": 526.2892456054688, "learning_rate": 1.9997677468360084e-06, "loss": 59.0625, "step": 3873 }, { "epoch": 0.036671368124118475, "grad_norm": 727.5333251953125, "learning_rate": 1.9997670856430397e-06, "loss": 31.5391, "step": 3874 }, { "epoch": 0.03668083414583353, "grad_norm": 443.4952087402344, "learning_rate": 1.999766423510355e-06, "loss": 23.25, "step": 3875 }, { "epoch": 0.03669030016754858, "grad_norm": 676.1340942382812, "learning_rate": 1.9997657604379548e-06, "loss": 44.3438, "step": 3876 }, { "epoch": 0.03669976618926364, "grad_norm": 694.0687255859375, "learning_rate": 1.9997650964258402e-06, "loss": 78.75, "step": 3877 }, { "epoch": 0.036709232210978694, "grad_norm": 807.38330078125, "learning_rate": 1.999764431474011e-06, "loss": 69.375, "step": 3878 }, { "epoch": 0.03671869823269375, "grad_norm": 265.9314880371094, "learning_rate": 1.9997637655824686e-06, "loss": 30.375, "step": 3879 }, { "epoch": 0.0367281642544088, "grad_norm": 215.1320037841797, "learning_rate": 1.9997630987512133e-06, "loss": 26.6719, "step": 3880 }, { "epoch": 0.03673763027612385, "grad_norm": 420.2221984863281, "learning_rate": 1.9997624309802456e-06, "loss": 29.2812, "step": 3881 }, { "epoch": 0.036747096297838906, "grad_norm": 995.6256713867188, "learning_rate": 1.999761762269566e-06, "loss": 67.6406, "step": 3882 }, { "epoch": 0.03675656231955396, "grad_norm": 230.09864807128906, "learning_rate": 1.999761092619176e-06, "loss": 22.3359, "step": 3883 }, { "epoch": 0.03676602834126901, "grad_norm": 509.7961120605469, "learning_rate": 1.999760422029075e-06, "loss": 31.3203, "step": 3884 }, { "epoch": 0.036775494362984065, "grad_norm": 542.6958618164062, "learning_rate": 1.9997597504992643e-06, "loss": 49.6406, "step": 3885 }, { "epoch": 0.036784960384699125, "grad_norm": 296.7717590332031, "learning_rate": 1.9997590780297447e-06, "loss": 36.4219, "step": 3886 }, { "epoch": 0.03679442640641418, "grad_norm": 2.9542412757873535, "learning_rate": 1.9997584046205166e-06, "loss": 0.8999, "step": 3887 }, { "epoch": 0.03680389242812923, "grad_norm": 275.5627746582031, "learning_rate": 1.999757730271581e-06, "loss": 39.2969, "step": 3888 }, { "epoch": 0.036813358449844284, "grad_norm": 536.58984375, "learning_rate": 1.9997570549829374e-06, "loss": 44.6094, "step": 3889 }, { "epoch": 0.03682282447155934, "grad_norm": 597.2581787109375, "learning_rate": 1.9997563787545876e-06, "loss": 55.9688, "step": 3890 }, { "epoch": 0.03683229049327439, "grad_norm": 261.741455078125, "learning_rate": 1.999755701586532e-06, "loss": 28.9219, "step": 3891 }, { "epoch": 0.03684175651498944, "grad_norm": 282.5636901855469, "learning_rate": 1.999755023478771e-06, "loss": 34.7344, "step": 3892 }, { "epoch": 0.036851222536704496, "grad_norm": 1203.3533935546875, "learning_rate": 1.999754344431305e-06, "loss": 73.2656, "step": 3893 }, { "epoch": 0.036860688558419556, "grad_norm": 186.94210815429688, "learning_rate": 1.9997536644441353e-06, "loss": 26.2344, "step": 3894 }, { "epoch": 0.03687015458013461, "grad_norm": 282.1566162109375, "learning_rate": 1.999752983517262e-06, "loss": 25.0781, "step": 3895 }, { "epoch": 0.03687962060184966, "grad_norm": 478.47918701171875, "learning_rate": 1.999752301650686e-06, "loss": 39.6875, "step": 3896 }, { "epoch": 0.036889086623564715, "grad_norm": 404.4712219238281, "learning_rate": 1.999751618844408e-06, "loss": 39.4375, "step": 3897 }, { "epoch": 0.03689855264527977, "grad_norm": 597.1972045898438, "learning_rate": 1.999750935098428e-06, "loss": 62.2188, "step": 3898 }, { "epoch": 0.03690801866699482, "grad_norm": 321.4482727050781, "learning_rate": 1.9997502504127478e-06, "loss": 28.9688, "step": 3899 }, { "epoch": 0.036917484688709874, "grad_norm": 352.9864196777344, "learning_rate": 1.999749564787367e-06, "loss": 25.0781, "step": 3900 }, { "epoch": 0.03692695071042493, "grad_norm": 224.22055053710938, "learning_rate": 1.999748878222287e-06, "loss": 31.7812, "step": 3901 }, { "epoch": 0.03693641673213998, "grad_norm": 253.4858856201172, "learning_rate": 1.999748190717508e-06, "loss": 25.9375, "step": 3902 }, { "epoch": 0.03694588275385504, "grad_norm": 511.7886047363281, "learning_rate": 1.9997475022730303e-06, "loss": 54.1562, "step": 3903 }, { "epoch": 0.03695534877557009, "grad_norm": 588.7847900390625, "learning_rate": 1.9997468128888557e-06, "loss": 54.4609, "step": 3904 }, { "epoch": 0.036964814797285146, "grad_norm": 338.9720458984375, "learning_rate": 1.9997461225649836e-06, "loss": 19.7969, "step": 3905 }, { "epoch": 0.0369742808190002, "grad_norm": 723.777099609375, "learning_rate": 1.9997454313014152e-06, "loss": 74.4062, "step": 3906 }, { "epoch": 0.03698374684071525, "grad_norm": 449.1311340332031, "learning_rate": 1.999744739098151e-06, "loss": 38.0156, "step": 3907 }, { "epoch": 0.036993212862430305, "grad_norm": 204.79257202148438, "learning_rate": 1.9997440459551924e-06, "loss": 24.5938, "step": 3908 }, { "epoch": 0.03700267888414536, "grad_norm": 239.16940307617188, "learning_rate": 1.999743351872539e-06, "loss": 32.0469, "step": 3909 }, { "epoch": 0.03701214490586041, "grad_norm": 714.3076171875, "learning_rate": 1.999742656850192e-06, "loss": 34.0195, "step": 3910 }, { "epoch": 0.03702161092757547, "grad_norm": 237.63482666015625, "learning_rate": 1.999741960888152e-06, "loss": 23.7266, "step": 3911 }, { "epoch": 0.037031076949290524, "grad_norm": 261.95404052734375, "learning_rate": 1.9997412639864194e-06, "loss": 30.5, "step": 3912 }, { "epoch": 0.03704054297100558, "grad_norm": 209.9373016357422, "learning_rate": 1.999740566144995e-06, "loss": 21.0703, "step": 3913 }, { "epoch": 0.03705000899272063, "grad_norm": 2.9716081619262695, "learning_rate": 1.99973986736388e-06, "loss": 0.8838, "step": 3914 }, { "epoch": 0.03705947501443568, "grad_norm": 731.6339111328125, "learning_rate": 1.9997391676430737e-06, "loss": 49.6562, "step": 3915 }, { "epoch": 0.037068941036150736, "grad_norm": 843.6861572265625, "learning_rate": 1.9997384669825786e-06, "loss": 49.2344, "step": 3916 }, { "epoch": 0.03707840705786579, "grad_norm": 254.0948486328125, "learning_rate": 1.999737765382394e-06, "loss": 26.5312, "step": 3917 }, { "epoch": 0.03708787307958084, "grad_norm": 408.58837890625, "learning_rate": 1.999737062842521e-06, "loss": 63.7656, "step": 3918 }, { "epoch": 0.037097339101295895, "grad_norm": 252.3920440673828, "learning_rate": 1.9997363593629595e-06, "loss": 27.3047, "step": 3919 }, { "epoch": 0.037106805123010955, "grad_norm": 337.0741882324219, "learning_rate": 1.9997356549437116e-06, "loss": 23.6875, "step": 3920 }, { "epoch": 0.03711627114472601, "grad_norm": 3.3655593395233154, "learning_rate": 1.999734949584777e-06, "loss": 0.9087, "step": 3921 }, { "epoch": 0.03712573716644106, "grad_norm": 241.12258911132812, "learning_rate": 1.9997342432861564e-06, "loss": 29.2188, "step": 3922 }, { "epoch": 0.037135203188156114, "grad_norm": 336.6657409667969, "learning_rate": 1.999733536047851e-06, "loss": 27.5, "step": 3923 }, { "epoch": 0.03714466920987117, "grad_norm": 528.3167724609375, "learning_rate": 1.999732827869861e-06, "loss": 66.0234, "step": 3924 }, { "epoch": 0.03715413523158622, "grad_norm": 470.5655212402344, "learning_rate": 1.999732118752187e-06, "loss": 49.2188, "step": 3925 }, { "epoch": 0.03716360125330127, "grad_norm": 267.2923889160156, "learning_rate": 1.9997314086948305e-06, "loss": 24.9766, "step": 3926 }, { "epoch": 0.037173067275016326, "grad_norm": 765.3546752929688, "learning_rate": 1.999730697697791e-06, "loss": 41.2422, "step": 3927 }, { "epoch": 0.03718253329673138, "grad_norm": 610.8989868164062, "learning_rate": 1.9997299857610696e-06, "loss": 63.875, "step": 3928 }, { "epoch": 0.03719199931844644, "grad_norm": 430.7619934082031, "learning_rate": 1.999729272884667e-06, "loss": 28.9453, "step": 3929 }, { "epoch": 0.03720146534016149, "grad_norm": 645.8624267578125, "learning_rate": 1.9997285590685843e-06, "loss": 36.4961, "step": 3930 }, { "epoch": 0.037210931361876545, "grad_norm": 226.1885986328125, "learning_rate": 1.9997278443128214e-06, "loss": 24.4688, "step": 3931 }, { "epoch": 0.0372203973835916, "grad_norm": 557.6259765625, "learning_rate": 1.9997271286173797e-06, "loss": 39.875, "step": 3932 }, { "epoch": 0.03722986340530665, "grad_norm": 3.5745017528533936, "learning_rate": 1.999726411982259e-06, "loss": 1.0464, "step": 3933 }, { "epoch": 0.037239329427021704, "grad_norm": 3.126005172729492, "learning_rate": 1.999725694407461e-06, "loss": 0.8364, "step": 3934 }, { "epoch": 0.03724879544873676, "grad_norm": 672.6364135742188, "learning_rate": 1.999724975892986e-06, "loss": 46.1406, "step": 3935 }, { "epoch": 0.03725826147045181, "grad_norm": 422.240966796875, "learning_rate": 1.9997242564388342e-06, "loss": 63.75, "step": 3936 }, { "epoch": 0.03726772749216687, "grad_norm": 229.31187438964844, "learning_rate": 1.999723536045007e-06, "loss": 19.3438, "step": 3937 }, { "epoch": 0.03727719351388192, "grad_norm": 445.7690124511719, "learning_rate": 1.9997228147115043e-06, "loss": 41.4844, "step": 3938 }, { "epoch": 0.037286659535596976, "grad_norm": 353.1162414550781, "learning_rate": 1.9997220924383275e-06, "loss": 30.3281, "step": 3939 }, { "epoch": 0.03729612555731203, "grad_norm": 835.0104370117188, "learning_rate": 1.9997213692254764e-06, "loss": 37.7578, "step": 3940 }, { "epoch": 0.03730559157902708, "grad_norm": 517.5938110351562, "learning_rate": 1.9997206450729528e-06, "loss": 36.5234, "step": 3941 }, { "epoch": 0.037315057600742135, "grad_norm": 288.86279296875, "learning_rate": 1.9997199199807566e-06, "loss": 28.8125, "step": 3942 }, { "epoch": 0.03732452362245719, "grad_norm": 272.73089599609375, "learning_rate": 1.9997191939488892e-06, "loss": 45.625, "step": 3943 }, { "epoch": 0.03733398964417224, "grad_norm": 2.424023389816284, "learning_rate": 1.99971846697735e-06, "loss": 0.74, "step": 3944 }, { "epoch": 0.037343455665887294, "grad_norm": 618.109619140625, "learning_rate": 1.999717739066141e-06, "loss": 33.7266, "step": 3945 }, { "epoch": 0.037352921687602354, "grad_norm": 516.643798828125, "learning_rate": 1.999717010215262e-06, "loss": 33.4844, "step": 3946 }, { "epoch": 0.03736238770931741, "grad_norm": 355.71246337890625, "learning_rate": 1.9997162804247145e-06, "loss": 28.1719, "step": 3947 }, { "epoch": 0.03737185373103246, "grad_norm": 200.2396240234375, "learning_rate": 1.9997155496944985e-06, "loss": 22.2969, "step": 3948 }, { "epoch": 0.03738131975274751, "grad_norm": 367.6573181152344, "learning_rate": 1.999714818024615e-06, "loss": 29.25, "step": 3949 }, { "epoch": 0.037390785774462566, "grad_norm": 437.60101318359375, "learning_rate": 1.9997140854150646e-06, "loss": 30.8906, "step": 3950 }, { "epoch": 0.03740025179617762, "grad_norm": 1148.0882568359375, "learning_rate": 1.9997133518658476e-06, "loss": 55.5547, "step": 3951 }, { "epoch": 0.03740971781789267, "grad_norm": 353.47894287109375, "learning_rate": 1.9997126173769657e-06, "loss": 25.875, "step": 3952 }, { "epoch": 0.037419183839607725, "grad_norm": 3.892012357711792, "learning_rate": 1.999711881948419e-06, "loss": 1.0361, "step": 3953 }, { "epoch": 0.037428649861322785, "grad_norm": 728.363037109375, "learning_rate": 1.9997111455802076e-06, "loss": 40.4141, "step": 3954 }, { "epoch": 0.03743811588303784, "grad_norm": 187.90052795410156, "learning_rate": 1.999710408272333e-06, "loss": 17.1719, "step": 3955 }, { "epoch": 0.03744758190475289, "grad_norm": 424.6152038574219, "learning_rate": 1.999709670024796e-06, "loss": 34.8281, "step": 3956 }, { "epoch": 0.037457047926467944, "grad_norm": 750.8565673828125, "learning_rate": 1.9997089308375962e-06, "loss": 81.4062, "step": 3957 }, { "epoch": 0.037466513948183, "grad_norm": 175.21832275390625, "learning_rate": 1.9997081907107356e-06, "loss": 25.4062, "step": 3958 }, { "epoch": 0.03747597996989805, "grad_norm": 952.3078002929688, "learning_rate": 1.999707449644214e-06, "loss": 49.4062, "step": 3959 }, { "epoch": 0.0374854459916131, "grad_norm": 350.1123046875, "learning_rate": 1.9997067076380326e-06, "loss": 33.5938, "step": 3960 }, { "epoch": 0.037494912013328156, "grad_norm": 652.3302612304688, "learning_rate": 1.9997059646921925e-06, "loss": 31.6641, "step": 3961 }, { "epoch": 0.03750437803504321, "grad_norm": 1013.5723876953125, "learning_rate": 1.999705220806693e-06, "loss": 36.5312, "step": 3962 }, { "epoch": 0.03751384405675827, "grad_norm": 638.8154907226562, "learning_rate": 1.9997044759815358e-06, "loss": 27.7344, "step": 3963 }, { "epoch": 0.03752331007847332, "grad_norm": 314.95135498046875, "learning_rate": 1.9997037302167217e-06, "loss": 29.6016, "step": 3964 }, { "epoch": 0.037532776100188375, "grad_norm": 482.0820617675781, "learning_rate": 1.999702983512251e-06, "loss": 44.3906, "step": 3965 }, { "epoch": 0.03754224212190343, "grad_norm": 390.84503173828125, "learning_rate": 1.9997022358681248e-06, "loss": 34.7969, "step": 3966 }, { "epoch": 0.03755170814361848, "grad_norm": 285.4569396972656, "learning_rate": 1.999701487284343e-06, "loss": 23.0156, "step": 3967 }, { "epoch": 0.037561174165333534, "grad_norm": 275.0978088378906, "learning_rate": 1.999700737760907e-06, "loss": 21.7188, "step": 3968 }, { "epoch": 0.03757064018704859, "grad_norm": 428.83599853515625, "learning_rate": 1.9996999872978174e-06, "loss": 34.0312, "step": 3969 }, { "epoch": 0.03758010620876364, "grad_norm": 4.7636895179748535, "learning_rate": 1.999699235895075e-06, "loss": 1.0586, "step": 3970 }, { "epoch": 0.03758957223047869, "grad_norm": 225.14913940429688, "learning_rate": 1.99969848355268e-06, "loss": 25.0, "step": 3971 }, { "epoch": 0.03759903825219375, "grad_norm": 3.120903491973877, "learning_rate": 1.999697730270634e-06, "loss": 0.897, "step": 3972 }, { "epoch": 0.037608504273908806, "grad_norm": 547.7289428710938, "learning_rate": 1.9996969760489366e-06, "loss": 49.9688, "step": 3973 }, { "epoch": 0.03761797029562386, "grad_norm": 216.97418212890625, "learning_rate": 1.9996962208875892e-06, "loss": 24.0938, "step": 3974 }, { "epoch": 0.03762743631733891, "grad_norm": 387.2430114746094, "learning_rate": 1.9996954647865926e-06, "loss": 27.3438, "step": 3975 }, { "epoch": 0.037636902339053965, "grad_norm": 292.34246826171875, "learning_rate": 1.9996947077459473e-06, "loss": 25.5, "step": 3976 }, { "epoch": 0.03764636836076902, "grad_norm": 571.392333984375, "learning_rate": 1.999693949765654e-06, "loss": 46.875, "step": 3977 }, { "epoch": 0.03765583438248407, "grad_norm": 724.0386962890625, "learning_rate": 1.999693190845713e-06, "loss": 61.125, "step": 3978 }, { "epoch": 0.037665300404199124, "grad_norm": 230.63526916503906, "learning_rate": 1.999692430986126e-06, "loss": 33.0156, "step": 3979 }, { "epoch": 0.037674766425914184, "grad_norm": 721.3485717773438, "learning_rate": 1.999691670186893e-06, "loss": 73.125, "step": 3980 }, { "epoch": 0.03768423244762924, "grad_norm": 686.45556640625, "learning_rate": 1.9996909084480145e-06, "loss": 55.2812, "step": 3981 }, { "epoch": 0.03769369846934429, "grad_norm": 146.12081909179688, "learning_rate": 1.999690145769492e-06, "loss": 21.4375, "step": 3982 }, { "epoch": 0.03770316449105934, "grad_norm": 391.4363098144531, "learning_rate": 1.9996893821513257e-06, "loss": 58.1562, "step": 3983 }, { "epoch": 0.037712630512774396, "grad_norm": 610.6328125, "learning_rate": 1.999688617593516e-06, "loss": 52.625, "step": 3984 }, { "epoch": 0.03772209653448945, "grad_norm": 417.68218994140625, "learning_rate": 1.999687852096065e-06, "loss": 44.0625, "step": 3985 }, { "epoch": 0.0377315625562045, "grad_norm": 1586.3619384765625, "learning_rate": 1.9996870856589715e-06, "loss": 81.9531, "step": 3986 }, { "epoch": 0.037741028577919555, "grad_norm": 570.1763916015625, "learning_rate": 1.999686318282238e-06, "loss": 67.9688, "step": 3987 }, { "epoch": 0.03775049459963461, "grad_norm": 151.45359802246094, "learning_rate": 1.9996855499658637e-06, "loss": 26.375, "step": 3988 }, { "epoch": 0.03775996062134967, "grad_norm": 634.9603881835938, "learning_rate": 1.9996847807098504e-06, "loss": 30.6406, "step": 3989 }, { "epoch": 0.03776942664306472, "grad_norm": 2.9788503646850586, "learning_rate": 1.999684010514198e-06, "loss": 0.8584, "step": 3990 }, { "epoch": 0.037778892664779774, "grad_norm": 465.9812927246094, "learning_rate": 1.999683239378908e-06, "loss": 52.7188, "step": 3991 }, { "epoch": 0.03778835868649483, "grad_norm": 531.2736206054688, "learning_rate": 1.9996824673039808e-06, "loss": 47.625, "step": 3992 }, { "epoch": 0.03779782470820988, "grad_norm": 1159.8486328125, "learning_rate": 1.999681694289417e-06, "loss": 54.5547, "step": 3993 }, { "epoch": 0.03780729072992493, "grad_norm": 760.0477905273438, "learning_rate": 1.9996809203352174e-06, "loss": 48.0, "step": 3994 }, { "epoch": 0.037816756751639986, "grad_norm": 457.92926025390625, "learning_rate": 1.999680145441383e-06, "loss": 24.3047, "step": 3995 }, { "epoch": 0.03782622277335504, "grad_norm": 347.4511413574219, "learning_rate": 1.999679369607914e-06, "loss": 41.7812, "step": 3996 }, { "epoch": 0.0378356887950701, "grad_norm": 462.1757507324219, "learning_rate": 1.9996785928348115e-06, "loss": 45.5156, "step": 3997 }, { "epoch": 0.03784515481678515, "grad_norm": 413.759521484375, "learning_rate": 1.9996778151220767e-06, "loss": 43.5469, "step": 3998 }, { "epoch": 0.037854620838500205, "grad_norm": 210.07760620117188, "learning_rate": 1.9996770364697092e-06, "loss": 22.3281, "step": 3999 }, { "epoch": 0.03786408686021526, "grad_norm": 517.9163818359375, "learning_rate": 1.9996762568777107e-06, "loss": 38.3125, "step": 4000 }, { "epoch": 0.03787355288193031, "grad_norm": 359.9374084472656, "learning_rate": 1.9996754763460812e-06, "loss": 33.7344, "step": 4001 }, { "epoch": 0.037883018903645364, "grad_norm": 348.8034362792969, "learning_rate": 1.999674694874822e-06, "loss": 34.1875, "step": 4002 }, { "epoch": 0.03789248492536042, "grad_norm": 682.2151489257812, "learning_rate": 1.9996739124639334e-06, "loss": 34.0547, "step": 4003 }, { "epoch": 0.03790195094707547, "grad_norm": 642.5120849609375, "learning_rate": 1.9996731291134164e-06, "loss": 25.4688, "step": 4004 }, { "epoch": 0.03791141696879052, "grad_norm": 236.0697784423828, "learning_rate": 1.9996723448232717e-06, "loss": 23.2266, "step": 4005 }, { "epoch": 0.03792088299050558, "grad_norm": 374.46002197265625, "learning_rate": 1.9996715595935003e-06, "loss": 44.9062, "step": 4006 }, { "epoch": 0.037930349012220636, "grad_norm": 187.06724548339844, "learning_rate": 1.999670773424102e-06, "loss": 25.4688, "step": 4007 }, { "epoch": 0.03793981503393569, "grad_norm": 306.43218994140625, "learning_rate": 1.9996699863150787e-06, "loss": 21.7031, "step": 4008 }, { "epoch": 0.03794928105565074, "grad_norm": 2.5986990928649902, "learning_rate": 1.9996691982664308e-06, "loss": 0.9175, "step": 4009 }, { "epoch": 0.037958747077365795, "grad_norm": 422.97735595703125, "learning_rate": 1.9996684092781586e-06, "loss": 27.1875, "step": 4010 }, { "epoch": 0.03796821309908085, "grad_norm": 367.4107360839844, "learning_rate": 1.9996676193502634e-06, "loss": 32.0938, "step": 4011 }, { "epoch": 0.0379776791207959, "grad_norm": 154.57363891601562, "learning_rate": 1.9996668284827453e-06, "loss": 23.6406, "step": 4012 }, { "epoch": 0.037987145142510954, "grad_norm": 3.1948070526123047, "learning_rate": 1.9996660366756055e-06, "loss": 0.9961, "step": 4013 }, { "epoch": 0.03799661116422601, "grad_norm": 866.84716796875, "learning_rate": 1.999665243928845e-06, "loss": 55.3125, "step": 4014 }, { "epoch": 0.03800607718594107, "grad_norm": 1062.6695556640625, "learning_rate": 1.9996644502424637e-06, "loss": 65.8125, "step": 4015 }, { "epoch": 0.03801554320765612, "grad_norm": 1309.7294921875, "learning_rate": 1.999663655616463e-06, "loss": 45.9688, "step": 4016 }, { "epoch": 0.03802500922937117, "grad_norm": 2.63024640083313, "learning_rate": 1.999662860050844e-06, "loss": 0.8809, "step": 4017 }, { "epoch": 0.038034475251086226, "grad_norm": 663.9353637695312, "learning_rate": 1.9996620635456063e-06, "loss": 43.3906, "step": 4018 }, { "epoch": 0.03804394127280128, "grad_norm": 488.9586181640625, "learning_rate": 1.9996612661007516e-06, "loss": 43.5312, "step": 4019 }, { "epoch": 0.03805340729451633, "grad_norm": 340.201904296875, "learning_rate": 1.9996604677162802e-06, "loss": 39.1797, "step": 4020 }, { "epoch": 0.038062873316231385, "grad_norm": 702.12109375, "learning_rate": 1.9996596683921935e-06, "loss": 67.6484, "step": 4021 }, { "epoch": 0.03807233933794644, "grad_norm": 441.03387451171875, "learning_rate": 1.999658868128491e-06, "loss": 52.7344, "step": 4022 }, { "epoch": 0.0380818053596615, "grad_norm": 746.7889404296875, "learning_rate": 1.999658066925175e-06, "loss": 46.5625, "step": 4023 }, { "epoch": 0.03809127138137655, "grad_norm": 313.63427734375, "learning_rate": 1.9996572647822443e-06, "loss": 30.1562, "step": 4024 }, { "epoch": 0.038100737403091604, "grad_norm": 324.4981384277344, "learning_rate": 1.9996564616997018e-06, "loss": 17.0469, "step": 4025 }, { "epoch": 0.03811020342480666, "grad_norm": 252.85865783691406, "learning_rate": 1.999655657677547e-06, "loss": 27.8438, "step": 4026 }, { "epoch": 0.03811966944652171, "grad_norm": 204.45559692382812, "learning_rate": 1.9996548527157808e-06, "loss": 25.4219, "step": 4027 }, { "epoch": 0.03812913546823676, "grad_norm": 664.2033081054688, "learning_rate": 1.999654046814404e-06, "loss": 31.7422, "step": 4028 }, { "epoch": 0.038138601489951816, "grad_norm": 270.58209228515625, "learning_rate": 1.9996532399734174e-06, "loss": 22.4531, "step": 4029 }, { "epoch": 0.03814806751166687, "grad_norm": 441.8704833984375, "learning_rate": 1.999652432192822e-06, "loss": 49.3906, "step": 4030 }, { "epoch": 0.03815753353338192, "grad_norm": 3.2485833168029785, "learning_rate": 1.9996516234726186e-06, "loss": 0.8062, "step": 4031 }, { "epoch": 0.03816699955509698, "grad_norm": 1031.78369140625, "learning_rate": 1.999650813812807e-06, "loss": 29.4375, "step": 4032 }, { "epoch": 0.038176465576812035, "grad_norm": 764.5628662109375, "learning_rate": 1.9996500032133893e-06, "loss": 37.8789, "step": 4033 }, { "epoch": 0.03818593159852709, "grad_norm": 219.71282958984375, "learning_rate": 1.9996491916743654e-06, "loss": 29.6719, "step": 4034 }, { "epoch": 0.03819539762024214, "grad_norm": 533.7113647460938, "learning_rate": 1.9996483791957363e-06, "loss": 45.9219, "step": 4035 }, { "epoch": 0.038204863641957194, "grad_norm": 136.07557678222656, "learning_rate": 1.9996475657775025e-06, "loss": 24.1094, "step": 4036 }, { "epoch": 0.03821432966367225, "grad_norm": 1543.44189453125, "learning_rate": 1.999646751419665e-06, "loss": 34.8203, "step": 4037 }, { "epoch": 0.0382237956853873, "grad_norm": 218.6106414794922, "learning_rate": 1.9996459361222252e-06, "loss": 26.1406, "step": 4038 }, { "epoch": 0.03823326170710235, "grad_norm": 340.34344482421875, "learning_rate": 1.999645119885183e-06, "loss": 27.8125, "step": 4039 }, { "epoch": 0.03824272772881741, "grad_norm": 255.4000701904297, "learning_rate": 1.999644302708539e-06, "loss": 23.0781, "step": 4040 }, { "epoch": 0.038252193750532466, "grad_norm": 764.486083984375, "learning_rate": 1.999643484592295e-06, "loss": 97.9922, "step": 4041 }, { "epoch": 0.03826165977224752, "grad_norm": 380.1627502441406, "learning_rate": 1.9996426655364505e-06, "loss": 23.5625, "step": 4042 }, { "epoch": 0.03827112579396257, "grad_norm": 289.04022216796875, "learning_rate": 1.9996418455410075e-06, "loss": 29.9297, "step": 4043 }, { "epoch": 0.038280591815677625, "grad_norm": 4.1252121925354, "learning_rate": 1.999641024605966e-06, "loss": 0.9768, "step": 4044 }, { "epoch": 0.03829005783739268, "grad_norm": 229.93797302246094, "learning_rate": 1.9996402027313268e-06, "loss": 25.3594, "step": 4045 }, { "epoch": 0.03829952385910773, "grad_norm": 448.14874267578125, "learning_rate": 1.999639379917091e-06, "loss": 40.7031, "step": 4046 }, { "epoch": 0.038308989880822784, "grad_norm": 170.48074340820312, "learning_rate": 1.9996385561632592e-06, "loss": 24.4062, "step": 4047 }, { "epoch": 0.03831845590253784, "grad_norm": 510.7287902832031, "learning_rate": 1.999637731469832e-06, "loss": 53.6484, "step": 4048 }, { "epoch": 0.0383279219242529, "grad_norm": 611.3519897460938, "learning_rate": 1.9996369058368103e-06, "loss": 56.5156, "step": 4049 }, { "epoch": 0.03833738794596795, "grad_norm": 569.4838256835938, "learning_rate": 1.9996360792641955e-06, "loss": 31.0312, "step": 4050 }, { "epoch": 0.038346853967683, "grad_norm": 255.91615295410156, "learning_rate": 1.9996352517519878e-06, "loss": 26.4062, "step": 4051 }, { "epoch": 0.038356319989398056, "grad_norm": 911.7078857421875, "learning_rate": 1.9996344233001875e-06, "loss": 45.1094, "step": 4052 }, { "epoch": 0.03836578601111311, "grad_norm": 350.1571960449219, "learning_rate": 1.999633593908796e-06, "loss": 26.1094, "step": 4053 }, { "epoch": 0.03837525203282816, "grad_norm": 251.94821166992188, "learning_rate": 1.999632763577814e-06, "loss": 28.1406, "step": 4054 }, { "epoch": 0.038384718054543215, "grad_norm": 417.5997314453125, "learning_rate": 1.9996319323072422e-06, "loss": 39.0938, "step": 4055 }, { "epoch": 0.03839418407625827, "grad_norm": 350.8194274902344, "learning_rate": 1.9996311000970814e-06, "loss": 34.7812, "step": 4056 }, { "epoch": 0.03840365009797332, "grad_norm": 255.92897033691406, "learning_rate": 1.9996302669473324e-06, "loss": 27.0312, "step": 4057 }, { "epoch": 0.03841311611968838, "grad_norm": 1600.46826171875, "learning_rate": 1.999629432857996e-06, "loss": 32.625, "step": 4058 }, { "epoch": 0.038422582141403434, "grad_norm": 193.08352661132812, "learning_rate": 1.999628597829073e-06, "loss": 28.9688, "step": 4059 }, { "epoch": 0.03843204816311849, "grad_norm": 3.2147865295410156, "learning_rate": 1.9996277618605644e-06, "loss": 1.0015, "step": 4060 }, { "epoch": 0.03844151418483354, "grad_norm": 266.9574890136719, "learning_rate": 1.9996269249524705e-06, "loss": 29.2031, "step": 4061 }, { "epoch": 0.03845098020654859, "grad_norm": 285.736328125, "learning_rate": 1.999626087104792e-06, "loss": 29.9688, "step": 4062 }, { "epoch": 0.038460446228263646, "grad_norm": 746.8775634765625, "learning_rate": 1.9996252483175306e-06, "loss": 49.4219, "step": 4063 }, { "epoch": 0.0384699122499787, "grad_norm": 281.80279541015625, "learning_rate": 1.999624408590686e-06, "loss": 29.2031, "step": 4064 }, { "epoch": 0.03847937827169375, "grad_norm": 182.1901092529297, "learning_rate": 1.99962356792426e-06, "loss": 18.7812, "step": 4065 }, { "epoch": 0.03848884429340881, "grad_norm": 305.5857849121094, "learning_rate": 1.9996227263182522e-06, "loss": 29.4375, "step": 4066 }, { "epoch": 0.038498310315123865, "grad_norm": 323.11883544921875, "learning_rate": 1.9996218837726645e-06, "loss": 24.9531, "step": 4067 }, { "epoch": 0.03850777633683892, "grad_norm": 556.8881225585938, "learning_rate": 1.999621040287497e-06, "loss": 37.8594, "step": 4068 }, { "epoch": 0.03851724235855397, "grad_norm": 372.77874755859375, "learning_rate": 1.9996201958627513e-06, "loss": 33.4844, "step": 4069 }, { "epoch": 0.038526708380269024, "grad_norm": 905.8208618164062, "learning_rate": 1.999619350498427e-06, "loss": 34.9922, "step": 4070 }, { "epoch": 0.03853617440198408, "grad_norm": 153.20530700683594, "learning_rate": 1.999618504194526e-06, "loss": 24.2812, "step": 4071 }, { "epoch": 0.03854564042369913, "grad_norm": 730.8421630859375, "learning_rate": 1.999617656951048e-06, "loss": 34.9297, "step": 4072 }, { "epoch": 0.03855510644541418, "grad_norm": 187.9532470703125, "learning_rate": 1.9996168087679953e-06, "loss": 29.3906, "step": 4073 }, { "epoch": 0.038564572467129236, "grad_norm": 2.7699813842773438, "learning_rate": 1.9996159596453674e-06, "loss": 0.8574, "step": 4074 }, { "epoch": 0.038574038488844296, "grad_norm": 384.8652038574219, "learning_rate": 1.9996151095831656e-06, "loss": 23.7422, "step": 4075 }, { "epoch": 0.03858350451055935, "grad_norm": 564.9271240234375, "learning_rate": 1.999614258581391e-06, "loss": 52.7969, "step": 4076 }, { "epoch": 0.0385929705322744, "grad_norm": 380.8847961425781, "learning_rate": 1.999613406640043e-06, "loss": 25.2266, "step": 4077 }, { "epoch": 0.038602436553989455, "grad_norm": 522.300048828125, "learning_rate": 1.999612553759124e-06, "loss": 47.2812, "step": 4078 }, { "epoch": 0.03861190257570451, "grad_norm": 300.40521240234375, "learning_rate": 1.9996116999386346e-06, "loss": 24.5156, "step": 4079 }, { "epoch": 0.03862136859741956, "grad_norm": 185.93521118164062, "learning_rate": 1.9996108451785746e-06, "loss": 26.4531, "step": 4080 }, { "epoch": 0.038630834619134614, "grad_norm": 773.1136474609375, "learning_rate": 1.999609989478946e-06, "loss": 47.8438, "step": 4081 }, { "epoch": 0.03864030064084967, "grad_norm": 3.478426456451416, "learning_rate": 1.9996091328397484e-06, "loss": 0.9595, "step": 4082 }, { "epoch": 0.03864976666256473, "grad_norm": 329.6343688964844, "learning_rate": 1.999608275260984e-06, "loss": 28.6094, "step": 4083 }, { "epoch": 0.03865923268427978, "grad_norm": 392.0374755859375, "learning_rate": 1.9996074167426523e-06, "loss": 28.5625, "step": 4084 }, { "epoch": 0.03866869870599483, "grad_norm": 931.4622802734375, "learning_rate": 1.999606557284755e-06, "loss": 64.0156, "step": 4085 }, { "epoch": 0.038678164727709886, "grad_norm": 412.564697265625, "learning_rate": 1.9996056968872922e-06, "loss": 44.0469, "step": 4086 }, { "epoch": 0.03868763074942494, "grad_norm": 303.4408264160156, "learning_rate": 1.9996048355502654e-06, "loss": 36.5, "step": 4087 }, { "epoch": 0.03869709677113999, "grad_norm": 152.13156127929688, "learning_rate": 1.999603973273675e-06, "loss": 23.2812, "step": 4088 }, { "epoch": 0.038706562792855045, "grad_norm": 553.5531616210938, "learning_rate": 1.9996031100575215e-06, "loss": 40.8281, "step": 4089 }, { "epoch": 0.0387160288145701, "grad_norm": 666.1461181640625, "learning_rate": 1.9996022459018066e-06, "loss": 61.7344, "step": 4090 }, { "epoch": 0.03872549483628515, "grad_norm": 357.75823974609375, "learning_rate": 1.9996013808065306e-06, "loss": 24.9844, "step": 4091 }, { "epoch": 0.03873496085800021, "grad_norm": 367.1924133300781, "learning_rate": 1.9996005147716943e-06, "loss": 47.25, "step": 4092 }, { "epoch": 0.038744426879715264, "grad_norm": 583.586181640625, "learning_rate": 1.999599647797298e-06, "loss": 64.7656, "step": 4093 }, { "epoch": 0.03875389290143032, "grad_norm": 421.32550048828125, "learning_rate": 1.9995987798833438e-06, "loss": 62.0312, "step": 4094 }, { "epoch": 0.03876335892314537, "grad_norm": 193.2220458984375, "learning_rate": 1.9995979110298313e-06, "loss": 26.0312, "step": 4095 }, { "epoch": 0.03877282494486042, "grad_norm": 681.11669921875, "learning_rate": 1.999597041236762e-06, "loss": 67.9375, "step": 4096 }, { "epoch": 0.038782290966575476, "grad_norm": 3.226996898651123, "learning_rate": 1.9995961705041364e-06, "loss": 0.9541, "step": 4097 }, { "epoch": 0.03879175698829053, "grad_norm": 3.3791606426239014, "learning_rate": 1.9995952988319557e-06, "loss": 0.8025, "step": 4098 }, { "epoch": 0.03880122301000558, "grad_norm": 520.2837524414062, "learning_rate": 1.99959442622022e-06, "loss": 40.9375, "step": 4099 }, { "epoch": 0.038810689031720635, "grad_norm": 392.1064147949219, "learning_rate": 1.999593552668931e-06, "loss": 41.5781, "step": 4100 }, { "epoch": 0.038820155053435695, "grad_norm": 1083.9539794921875, "learning_rate": 1.999592678178089e-06, "loss": 46.3711, "step": 4101 }, { "epoch": 0.03882962107515075, "grad_norm": 1068.4798583984375, "learning_rate": 1.9995918027476946e-06, "loss": 33.2188, "step": 4102 }, { "epoch": 0.0388390870968658, "grad_norm": 408.39105224609375, "learning_rate": 1.999590926377749e-06, "loss": 28.375, "step": 4103 }, { "epoch": 0.038848553118580854, "grad_norm": 505.2362365722656, "learning_rate": 1.999590049068253e-06, "loss": 29.7266, "step": 4104 }, { "epoch": 0.03885801914029591, "grad_norm": 654.9423217773438, "learning_rate": 1.9995891708192077e-06, "loss": 58.7031, "step": 4105 }, { "epoch": 0.03886748516201096, "grad_norm": 157.72303771972656, "learning_rate": 1.9995882916306134e-06, "loss": 25.4531, "step": 4106 }, { "epoch": 0.03887695118372601, "grad_norm": 478.8612976074219, "learning_rate": 1.999587411502471e-06, "loss": 28.5625, "step": 4107 }, { "epoch": 0.038886417205441066, "grad_norm": 1775.6209716796875, "learning_rate": 1.9995865304347815e-06, "loss": 37.2578, "step": 4108 }, { "epoch": 0.038895883227156126, "grad_norm": 248.40603637695312, "learning_rate": 1.999585648427546e-06, "loss": 23.8672, "step": 4109 }, { "epoch": 0.03890534924887118, "grad_norm": 728.8482666015625, "learning_rate": 1.9995847654807645e-06, "loss": 43.0938, "step": 4110 }, { "epoch": 0.03891481527058623, "grad_norm": 292.21759033203125, "learning_rate": 1.999583881594439e-06, "loss": 39.7812, "step": 4111 }, { "epoch": 0.038924281292301285, "grad_norm": 203.60977172851562, "learning_rate": 1.999582996768569e-06, "loss": 25.3906, "step": 4112 }, { "epoch": 0.03893374731401634, "grad_norm": 362.2775573730469, "learning_rate": 1.9995821110031564e-06, "loss": 22.7266, "step": 4113 }, { "epoch": 0.03894321333573139, "grad_norm": 198.60812377929688, "learning_rate": 1.9995812242982017e-06, "loss": 24.5625, "step": 4114 }, { "epoch": 0.038952679357446444, "grad_norm": 215.81008911132812, "learning_rate": 1.9995803366537054e-06, "loss": 26.8281, "step": 4115 }, { "epoch": 0.0389621453791615, "grad_norm": 269.4090270996094, "learning_rate": 1.9995794480696687e-06, "loss": 13.7422, "step": 4116 }, { "epoch": 0.03897161140087655, "grad_norm": 3.0936386585235596, "learning_rate": 1.9995785585460925e-06, "loss": 0.8953, "step": 4117 }, { "epoch": 0.03898107742259161, "grad_norm": 215.70358276367188, "learning_rate": 1.9995776680829772e-06, "loss": 24.2578, "step": 4118 }, { "epoch": 0.03899054344430666, "grad_norm": 192.2714080810547, "learning_rate": 1.999576776680324e-06, "loss": 31.2969, "step": 4119 }, { "epoch": 0.039000009466021716, "grad_norm": 633.2465209960938, "learning_rate": 1.999575884338134e-06, "loss": 50.5156, "step": 4120 }, { "epoch": 0.03900947548773677, "grad_norm": 334.0137939453125, "learning_rate": 1.9995749910564076e-06, "loss": 21.8984, "step": 4121 }, { "epoch": 0.03901894150945182, "grad_norm": 580.3615112304688, "learning_rate": 1.9995740968351456e-06, "loss": 48.2188, "step": 4122 }, { "epoch": 0.039028407531166875, "grad_norm": 440.4383239746094, "learning_rate": 1.9995732016743487e-06, "loss": 46.3906, "step": 4123 }, { "epoch": 0.03903787355288193, "grad_norm": 1218.3585205078125, "learning_rate": 1.9995723055740182e-06, "loss": 57.6719, "step": 4124 }, { "epoch": 0.03904733957459698, "grad_norm": 719.4497680664062, "learning_rate": 1.999571408534155e-06, "loss": 48.4844, "step": 4125 }, { "epoch": 0.03905680559631204, "grad_norm": 177.07229614257812, "learning_rate": 1.9995705105547594e-06, "loss": 29.8125, "step": 4126 }, { "epoch": 0.039066271618027094, "grad_norm": 954.8977661132812, "learning_rate": 1.9995696116358328e-06, "loss": 60.0664, "step": 4127 }, { "epoch": 0.03907573763974215, "grad_norm": 196.576904296875, "learning_rate": 1.9995687117773755e-06, "loss": 23.8281, "step": 4128 }, { "epoch": 0.0390852036614572, "grad_norm": 289.41314697265625, "learning_rate": 1.999567810979389e-06, "loss": 35.4844, "step": 4129 }, { "epoch": 0.03909466968317225, "grad_norm": 428.90985107421875, "learning_rate": 1.9995669092418732e-06, "loss": 42.0938, "step": 4130 }, { "epoch": 0.039104135704887306, "grad_norm": 610.4267578125, "learning_rate": 1.9995660065648304e-06, "loss": 26.5664, "step": 4131 }, { "epoch": 0.03911360172660236, "grad_norm": 436.2392578125, "learning_rate": 1.9995651029482603e-06, "loss": 68.2031, "step": 4132 }, { "epoch": 0.03912306774831741, "grad_norm": 279.411376953125, "learning_rate": 1.9995641983921634e-06, "loss": 25.2188, "step": 4133 }, { "epoch": 0.039132533770032465, "grad_norm": 436.38671875, "learning_rate": 1.9995632928965417e-06, "loss": 41.5703, "step": 4134 }, { "epoch": 0.039141999791747525, "grad_norm": 379.0028991699219, "learning_rate": 1.9995623864613954e-06, "loss": 42.2344, "step": 4135 }, { "epoch": 0.03915146581346258, "grad_norm": 245.71591186523438, "learning_rate": 1.9995614790867256e-06, "loss": 29.1406, "step": 4136 }, { "epoch": 0.03916093183517763, "grad_norm": 210.48085021972656, "learning_rate": 1.999560570772533e-06, "loss": 22.8594, "step": 4137 }, { "epoch": 0.039170397856892684, "grad_norm": 701.0407104492188, "learning_rate": 1.9995596615188182e-06, "loss": 45.7266, "step": 4138 }, { "epoch": 0.03917986387860774, "grad_norm": 458.08837890625, "learning_rate": 1.999558751325583e-06, "loss": 53.1562, "step": 4139 }, { "epoch": 0.03918932990032279, "grad_norm": 603.677734375, "learning_rate": 1.999557840192827e-06, "loss": 60.0, "step": 4140 }, { "epoch": 0.03919879592203784, "grad_norm": 668.92724609375, "learning_rate": 1.999556928120552e-06, "loss": 25.3477, "step": 4141 }, { "epoch": 0.039208261943752896, "grad_norm": 450.9991760253906, "learning_rate": 1.9995560151087583e-06, "loss": 38.0469, "step": 4142 }, { "epoch": 0.03921772796546795, "grad_norm": 313.8189697265625, "learning_rate": 1.999555101157447e-06, "loss": 25.375, "step": 4143 }, { "epoch": 0.03922719398718301, "grad_norm": 414.12823486328125, "learning_rate": 1.9995541862666188e-06, "loss": 53.5234, "step": 4144 }, { "epoch": 0.03923666000889806, "grad_norm": 532.7650756835938, "learning_rate": 1.999553270436275e-06, "loss": 41.4414, "step": 4145 }, { "epoch": 0.039246126030613115, "grad_norm": 440.55364990234375, "learning_rate": 1.999552353666416e-06, "loss": 34.2031, "step": 4146 }, { "epoch": 0.03925559205232817, "grad_norm": 322.4089050292969, "learning_rate": 1.9995514359570426e-06, "loss": 21.0938, "step": 4147 }, { "epoch": 0.03926505807404322, "grad_norm": 460.5558776855469, "learning_rate": 1.9995505173081564e-06, "loss": 23.6523, "step": 4148 }, { "epoch": 0.039274524095758274, "grad_norm": 2.789703607559204, "learning_rate": 1.9995495977197572e-06, "loss": 0.875, "step": 4149 }, { "epoch": 0.03928399011747333, "grad_norm": 383.8794860839844, "learning_rate": 1.999548677191847e-06, "loss": 29.0781, "step": 4150 }, { "epoch": 0.03929345613918838, "grad_norm": 177.30848693847656, "learning_rate": 1.9995477557244256e-06, "loss": 25.5625, "step": 4151 }, { "epoch": 0.03930292216090344, "grad_norm": 448.502197265625, "learning_rate": 1.9995468333174942e-06, "loss": 29.6875, "step": 4152 }, { "epoch": 0.03931238818261849, "grad_norm": 592.1004028320312, "learning_rate": 1.9995459099710543e-06, "loss": 44.625, "step": 4153 }, { "epoch": 0.039321854204333546, "grad_norm": 240.15240478515625, "learning_rate": 1.9995449856851058e-06, "loss": 29.9062, "step": 4154 }, { "epoch": 0.0393313202260486, "grad_norm": 628.1829223632812, "learning_rate": 1.9995440604596503e-06, "loss": 37.6445, "step": 4155 }, { "epoch": 0.03934078624776365, "grad_norm": 459.3580322265625, "learning_rate": 1.9995431342946883e-06, "loss": 49.2031, "step": 4156 }, { "epoch": 0.039350252269478705, "grad_norm": 609.18408203125, "learning_rate": 1.9995422071902207e-06, "loss": 46.3594, "step": 4157 }, { "epoch": 0.03935971829119376, "grad_norm": 501.2630310058594, "learning_rate": 1.999541279146249e-06, "loss": 32.1094, "step": 4158 }, { "epoch": 0.03936918431290881, "grad_norm": 379.45849609375, "learning_rate": 1.999540350162773e-06, "loss": 25.5859, "step": 4159 }, { "epoch": 0.039378650334623864, "grad_norm": 414.5447082519531, "learning_rate": 1.999539420239794e-06, "loss": 23.3203, "step": 4160 }, { "epoch": 0.039388116356338924, "grad_norm": 454.16644287109375, "learning_rate": 1.999538489377313e-06, "loss": 25.0117, "step": 4161 }, { "epoch": 0.03939758237805398, "grad_norm": 466.9858703613281, "learning_rate": 1.9995375575753313e-06, "loss": 41.0625, "step": 4162 }, { "epoch": 0.03940704839976903, "grad_norm": 762.39794921875, "learning_rate": 1.999536624833849e-06, "loss": 57.8125, "step": 4163 }, { "epoch": 0.03941651442148408, "grad_norm": 319.0615539550781, "learning_rate": 1.9995356911528675e-06, "loss": 25.0938, "step": 4164 }, { "epoch": 0.039425980443199136, "grad_norm": 550.6720581054688, "learning_rate": 1.9995347565323873e-06, "loss": 51.3125, "step": 4165 }, { "epoch": 0.03943544646491419, "grad_norm": 880.4418334960938, "learning_rate": 1.9995338209724094e-06, "loss": 33.1094, "step": 4166 }, { "epoch": 0.03944491248662924, "grad_norm": 3.1767024993896484, "learning_rate": 1.999532884472935e-06, "loss": 0.6909, "step": 4167 }, { "epoch": 0.039454378508344295, "grad_norm": 336.6927185058594, "learning_rate": 1.9995319470339644e-06, "loss": 41.3281, "step": 4168 }, { "epoch": 0.03946384453005935, "grad_norm": 376.623046875, "learning_rate": 1.999531008655499e-06, "loss": 25.8516, "step": 4169 }, { "epoch": 0.03947331055177441, "grad_norm": 717.5015258789062, "learning_rate": 1.999530069337539e-06, "loss": 38.5, "step": 4170 }, { "epoch": 0.03948277657348946, "grad_norm": 268.05059814453125, "learning_rate": 1.9995291290800863e-06, "loss": 36.2969, "step": 4171 }, { "epoch": 0.039492242595204514, "grad_norm": 301.7518310546875, "learning_rate": 1.999528187883141e-06, "loss": 23.1875, "step": 4172 }, { "epoch": 0.03950170861691957, "grad_norm": 242.39727783203125, "learning_rate": 1.999527245746704e-06, "loss": 28.3281, "step": 4173 }, { "epoch": 0.03951117463863462, "grad_norm": 265.41217041015625, "learning_rate": 1.9995263026707772e-06, "loss": 30.0156, "step": 4174 }, { "epoch": 0.039520640660349673, "grad_norm": 984.6447143554688, "learning_rate": 1.9995253586553604e-06, "loss": 55.7031, "step": 4175 }, { "epoch": 0.039530106682064726, "grad_norm": 622.18505859375, "learning_rate": 1.9995244137004543e-06, "loss": 53.7812, "step": 4176 }, { "epoch": 0.03953957270377978, "grad_norm": 522.2833251953125, "learning_rate": 1.9995234678060605e-06, "loss": 29.5156, "step": 4177 }, { "epoch": 0.03954903872549484, "grad_norm": 407.25677490234375, "learning_rate": 1.9995225209721796e-06, "loss": 30.0156, "step": 4178 }, { "epoch": 0.03955850474720989, "grad_norm": 255.40220642089844, "learning_rate": 1.999521573198813e-06, "loss": 35.9609, "step": 4179 }, { "epoch": 0.039567970768924945, "grad_norm": 275.59222412109375, "learning_rate": 1.9995206244859605e-06, "loss": 22.9219, "step": 4180 }, { "epoch": 0.03957743679064, "grad_norm": 426.9004821777344, "learning_rate": 1.999519674833624e-06, "loss": 32.9375, "step": 4181 }, { "epoch": 0.03958690281235505, "grad_norm": 236.41775512695312, "learning_rate": 1.9995187242418044e-06, "loss": 24.5469, "step": 4182 }, { "epoch": 0.039596368834070104, "grad_norm": 462.0470886230469, "learning_rate": 1.9995177727105016e-06, "loss": 36.4922, "step": 4183 }, { "epoch": 0.03960583485578516, "grad_norm": 525.508056640625, "learning_rate": 1.9995168202397172e-06, "loss": 39.7266, "step": 4184 }, { "epoch": 0.03961530087750021, "grad_norm": 545.737060546875, "learning_rate": 1.999515866829452e-06, "loss": 60.2344, "step": 4185 }, { "epoch": 0.039624766899215264, "grad_norm": 212.68032836914062, "learning_rate": 1.9995149124797073e-06, "loss": 28.25, "step": 4186 }, { "epoch": 0.03963423292093032, "grad_norm": 247.36636352539062, "learning_rate": 1.9995139571904835e-06, "loss": 27.1094, "step": 4187 }, { "epoch": 0.039643698942645376, "grad_norm": 255.08346557617188, "learning_rate": 1.9995130009617816e-06, "loss": 26.3281, "step": 4188 }, { "epoch": 0.03965316496436043, "grad_norm": 1396.5218505859375, "learning_rate": 1.9995120437936024e-06, "loss": 87.7188, "step": 4189 }, { "epoch": 0.03966263098607548, "grad_norm": 224.7928924560547, "learning_rate": 1.9995110856859467e-06, "loss": 21.0781, "step": 4190 }, { "epoch": 0.039672097007790535, "grad_norm": 240.50411987304688, "learning_rate": 1.9995101266388155e-06, "loss": 27.875, "step": 4191 }, { "epoch": 0.03968156302950559, "grad_norm": 516.4314575195312, "learning_rate": 1.9995091666522104e-06, "loss": 66.3125, "step": 4192 }, { "epoch": 0.03969102905122064, "grad_norm": 305.9889831542969, "learning_rate": 1.9995082057261314e-06, "loss": 38.125, "step": 4193 }, { "epoch": 0.039700495072935695, "grad_norm": 1036.724853515625, "learning_rate": 1.9995072438605798e-06, "loss": 45.8438, "step": 4194 }, { "epoch": 0.039709961094650754, "grad_norm": 167.87290954589844, "learning_rate": 1.999506281055556e-06, "loss": 23.25, "step": 4195 }, { "epoch": 0.03971942711636581, "grad_norm": 465.1285705566406, "learning_rate": 1.999505317311062e-06, "loss": 53.3281, "step": 4196 }, { "epoch": 0.03972889313808086, "grad_norm": 502.166748046875, "learning_rate": 1.9995043526270972e-06, "loss": 26.2578, "step": 4197 }, { "epoch": 0.039738359159795913, "grad_norm": 469.6846618652344, "learning_rate": 1.999503387003664e-06, "loss": 38.5156, "step": 4198 }, { "epoch": 0.039747825181510966, "grad_norm": 518.8941040039062, "learning_rate": 1.999502420440762e-06, "loss": 62.9375, "step": 4199 }, { "epoch": 0.03975729120322602, "grad_norm": 414.495849609375, "learning_rate": 1.9995014529383938e-06, "loss": 24.3438, "step": 4200 }, { "epoch": 0.03976675722494107, "grad_norm": 343.53009033203125, "learning_rate": 1.999500484496558e-06, "loss": 26.375, "step": 4201 }, { "epoch": 0.039776223246656126, "grad_norm": 278.9809875488281, "learning_rate": 1.9994995151152576e-06, "loss": 23.6562, "step": 4202 }, { "epoch": 0.03978568926837118, "grad_norm": 638.329345703125, "learning_rate": 1.9994985447944926e-06, "loss": 38.5781, "step": 4203 }, { "epoch": 0.03979515529008624, "grad_norm": 577.7850341796875, "learning_rate": 1.999497573534264e-06, "loss": 57.0938, "step": 4204 }, { "epoch": 0.03980462131180129, "grad_norm": 1127.9384765625, "learning_rate": 1.9994966013345724e-06, "loss": 55.1406, "step": 4205 }, { "epoch": 0.039814087333516344, "grad_norm": 210.2228240966797, "learning_rate": 1.9994956281954193e-06, "loss": 20.5156, "step": 4206 }, { "epoch": 0.0398235533552314, "grad_norm": 786.2171630859375, "learning_rate": 1.999494654116805e-06, "loss": 43.8594, "step": 4207 }, { "epoch": 0.03983301937694645, "grad_norm": 307.8618469238281, "learning_rate": 1.999493679098731e-06, "loss": 37.4531, "step": 4208 }, { "epoch": 0.039842485398661504, "grad_norm": 320.9873046875, "learning_rate": 1.999492703141198e-06, "loss": 25.5781, "step": 4209 }, { "epoch": 0.039851951420376557, "grad_norm": 291.4000244140625, "learning_rate": 1.9994917262442067e-06, "loss": 27.3594, "step": 4210 }, { "epoch": 0.03986141744209161, "grad_norm": 270.46466064453125, "learning_rate": 1.9994907484077583e-06, "loss": 32.0938, "step": 4211 }, { "epoch": 0.03987088346380666, "grad_norm": 560.9937744140625, "learning_rate": 1.9994897696318537e-06, "loss": 27.1094, "step": 4212 }, { "epoch": 0.03988034948552172, "grad_norm": 181.7898712158203, "learning_rate": 1.9994887899164934e-06, "loss": 23.7344, "step": 4213 }, { "epoch": 0.039889815507236775, "grad_norm": 244.48843383789062, "learning_rate": 1.999487809261679e-06, "loss": 23.7812, "step": 4214 }, { "epoch": 0.03989928152895183, "grad_norm": 1049.4578857421875, "learning_rate": 1.999486827667411e-06, "loss": 22.8906, "step": 4215 }, { "epoch": 0.03990874755066688, "grad_norm": 290.2458190917969, "learning_rate": 1.9994858451336905e-06, "loss": 33.6094, "step": 4216 }, { "epoch": 0.039918213572381935, "grad_norm": 446.4381103515625, "learning_rate": 1.999484861660518e-06, "loss": 26.4062, "step": 4217 }, { "epoch": 0.03992767959409699, "grad_norm": 260.1158752441406, "learning_rate": 1.999483877247895e-06, "loss": 37.9062, "step": 4218 }, { "epoch": 0.03993714561581204, "grad_norm": 1259.6085205078125, "learning_rate": 1.999482891895822e-06, "loss": 40.8281, "step": 4219 }, { "epoch": 0.039946611637527094, "grad_norm": 596.5026245117188, "learning_rate": 1.999481905604301e-06, "loss": 59.2188, "step": 4220 }, { "epoch": 0.039956077659242153, "grad_norm": 743.5089111328125, "learning_rate": 1.9994809183733307e-06, "loss": 51.9531, "step": 4221 }, { "epoch": 0.039965543680957206, "grad_norm": 3.3426012992858887, "learning_rate": 1.999479930202914e-06, "loss": 0.8433, "step": 4222 }, { "epoch": 0.03997500970267226, "grad_norm": 594.6080322265625, "learning_rate": 1.999478941093051e-06, "loss": 53.7812, "step": 4223 }, { "epoch": 0.03998447572438731, "grad_norm": 369.2522888183594, "learning_rate": 1.999477951043743e-06, "loss": 28.4141, "step": 4224 }, { "epoch": 0.039993941746102366, "grad_norm": 810.8480224609375, "learning_rate": 1.9994769600549906e-06, "loss": 52.3438, "step": 4225 }, { "epoch": 0.04000340776781742, "grad_norm": 349.6993103027344, "learning_rate": 1.999475968126795e-06, "loss": 29.0938, "step": 4226 }, { "epoch": 0.04001287378953247, "grad_norm": 356.4378356933594, "learning_rate": 1.999474975259157e-06, "loss": 25.6484, "step": 4227 }, { "epoch": 0.040022339811247525, "grad_norm": 469.2930908203125, "learning_rate": 1.9994739814520776e-06, "loss": 28.7188, "step": 4228 }, { "epoch": 0.04003180583296258, "grad_norm": 297.62274169921875, "learning_rate": 1.9994729867055574e-06, "loss": 21.6641, "step": 4229 }, { "epoch": 0.04004127185467764, "grad_norm": 249.31524658203125, "learning_rate": 1.999471991019598e-06, "loss": 22.9062, "step": 4230 }, { "epoch": 0.04005073787639269, "grad_norm": 231.9744873046875, "learning_rate": 1.9994709943941995e-06, "loss": 25.8281, "step": 4231 }, { "epoch": 0.040060203898107744, "grad_norm": 349.85498046875, "learning_rate": 1.999469996829364e-06, "loss": 28.0781, "step": 4232 }, { "epoch": 0.040069669919822797, "grad_norm": 594.4486694335938, "learning_rate": 1.999468998325091e-06, "loss": 67.6875, "step": 4233 }, { "epoch": 0.04007913594153785, "grad_norm": 491.7440185546875, "learning_rate": 1.9994679988813825e-06, "loss": 35.2109, "step": 4234 }, { "epoch": 0.0400886019632529, "grad_norm": 381.0583801269531, "learning_rate": 1.9994669984982388e-06, "loss": 30.9766, "step": 4235 }, { "epoch": 0.040098067984967956, "grad_norm": 539.0879516601562, "learning_rate": 1.9994659971756614e-06, "loss": 29.6797, "step": 4236 }, { "epoch": 0.04010753400668301, "grad_norm": 168.40057373046875, "learning_rate": 1.999464994913651e-06, "loss": 30.9375, "step": 4237 }, { "epoch": 0.04011700002839807, "grad_norm": 325.1187438964844, "learning_rate": 1.9994639917122086e-06, "loss": 48.0469, "step": 4238 }, { "epoch": 0.04012646605011312, "grad_norm": 540.296142578125, "learning_rate": 1.9994629875713345e-06, "loss": 47.8906, "step": 4239 }, { "epoch": 0.040135932071828175, "grad_norm": 379.606201171875, "learning_rate": 1.999461982491031e-06, "loss": 24.9062, "step": 4240 }, { "epoch": 0.04014539809354323, "grad_norm": 448.7223205566406, "learning_rate": 1.999460976471298e-06, "loss": 25.5, "step": 4241 }, { "epoch": 0.04015486411525828, "grad_norm": 505.912353515625, "learning_rate": 1.9994599695121365e-06, "loss": 27.7266, "step": 4242 }, { "epoch": 0.040164330136973334, "grad_norm": 254.7139892578125, "learning_rate": 1.9994589616135477e-06, "loss": 26.75, "step": 4243 }, { "epoch": 0.04017379615868839, "grad_norm": 2.907914876937866, "learning_rate": 1.999457952775533e-06, "loss": 0.8037, "step": 4244 }, { "epoch": 0.04018326218040344, "grad_norm": 2.5216822624206543, "learning_rate": 1.999456942998092e-06, "loss": 0.7498, "step": 4245 }, { "epoch": 0.04019272820211849, "grad_norm": 864.3170166015625, "learning_rate": 1.9994559322812267e-06, "loss": 44.3438, "step": 4246 }, { "epoch": 0.04020219422383355, "grad_norm": 238.27125549316406, "learning_rate": 1.999454920624938e-06, "loss": 27.3906, "step": 4247 }, { "epoch": 0.040211660245548606, "grad_norm": 306.498046875, "learning_rate": 1.9994539080292266e-06, "loss": 27.0312, "step": 4248 }, { "epoch": 0.04022112626726366, "grad_norm": 381.7127990722656, "learning_rate": 1.999452894494094e-06, "loss": 28.5625, "step": 4249 }, { "epoch": 0.04023059228897871, "grad_norm": 725.77099609375, "learning_rate": 1.9994518800195402e-06, "loss": 43.9219, "step": 4250 }, { "epoch": 0.040240058310693765, "grad_norm": 438.6523742675781, "learning_rate": 1.999450864605567e-06, "loss": 40.4531, "step": 4251 }, { "epoch": 0.04024952433240882, "grad_norm": 603.806396484375, "learning_rate": 1.9994498482521747e-06, "loss": 47.3438, "step": 4252 }, { "epoch": 0.04025899035412387, "grad_norm": 398.61273193359375, "learning_rate": 1.9994488309593645e-06, "loss": 32.25, "step": 4253 }, { "epoch": 0.040268456375838924, "grad_norm": 289.6053466796875, "learning_rate": 1.9994478127271376e-06, "loss": 28.1953, "step": 4254 }, { "epoch": 0.04027792239755398, "grad_norm": 599.3291625976562, "learning_rate": 1.999446793555495e-06, "loss": 39.7344, "step": 4255 }, { "epoch": 0.040287388419269037, "grad_norm": 550.9402465820312, "learning_rate": 1.999445773444437e-06, "loss": 43.375, "step": 4256 }, { "epoch": 0.04029685444098409, "grad_norm": 240.4545135498047, "learning_rate": 1.999444752393965e-06, "loss": 24.9531, "step": 4257 }, { "epoch": 0.04030632046269914, "grad_norm": 270.84912109375, "learning_rate": 1.9994437304040803e-06, "loss": 27.1094, "step": 4258 }, { "epoch": 0.040315786484414196, "grad_norm": 217.29348754882812, "learning_rate": 1.9994427074747834e-06, "loss": 24.4844, "step": 4259 }, { "epoch": 0.04032525250612925, "grad_norm": 476.41644287109375, "learning_rate": 1.9994416836060753e-06, "loss": 36.8711, "step": 4260 }, { "epoch": 0.0403347185278443, "grad_norm": 317.719482421875, "learning_rate": 1.9994406587979573e-06, "loss": 23.6797, "step": 4261 }, { "epoch": 0.040344184549559355, "grad_norm": 3.473365545272827, "learning_rate": 1.9994396330504298e-06, "loss": 0.9214, "step": 4262 }, { "epoch": 0.04035365057127441, "grad_norm": 746.1533203125, "learning_rate": 1.999438606363494e-06, "loss": 26.9766, "step": 4263 }, { "epoch": 0.04036311659298947, "grad_norm": 818.9674682617188, "learning_rate": 1.999437578737151e-06, "loss": 25.5781, "step": 4264 }, { "epoch": 0.04037258261470452, "grad_norm": 3.1164755821228027, "learning_rate": 1.999436550171402e-06, "loss": 0.9507, "step": 4265 }, { "epoch": 0.040382048636419574, "grad_norm": 396.6611633300781, "learning_rate": 1.9994355206662473e-06, "loss": 42.3594, "step": 4266 }, { "epoch": 0.04039151465813463, "grad_norm": 487.7198791503906, "learning_rate": 1.9994344902216883e-06, "loss": 32.4609, "step": 4267 }, { "epoch": 0.04040098067984968, "grad_norm": 518.678466796875, "learning_rate": 1.999433458837726e-06, "loss": 19.7578, "step": 4268 }, { "epoch": 0.04041044670156473, "grad_norm": 329.0465087890625, "learning_rate": 1.999432426514361e-06, "loss": 21.4375, "step": 4269 }, { "epoch": 0.040419912723279786, "grad_norm": 774.3861083984375, "learning_rate": 1.9994313932515948e-06, "loss": 34.5625, "step": 4270 }, { "epoch": 0.04042937874499484, "grad_norm": 427.5816650390625, "learning_rate": 1.999430359049428e-06, "loss": 54.8125, "step": 4271 }, { "epoch": 0.04043884476670989, "grad_norm": 418.9459533691406, "learning_rate": 1.999429323907862e-06, "loss": 22.0, "step": 4272 }, { "epoch": 0.04044831078842495, "grad_norm": 730.1962280273438, "learning_rate": 1.999428287826897e-06, "loss": 49.6445, "step": 4273 }, { "epoch": 0.040457776810140005, "grad_norm": 276.2663879394531, "learning_rate": 1.9994272508065344e-06, "loss": 25.9375, "step": 4274 }, { "epoch": 0.04046724283185506, "grad_norm": 166.65969848632812, "learning_rate": 1.9994262128467755e-06, "loss": 23.3281, "step": 4275 }, { "epoch": 0.04047670885357011, "grad_norm": 3.2345097064971924, "learning_rate": 1.999425173947621e-06, "loss": 1.0698, "step": 4276 }, { "epoch": 0.040486174875285164, "grad_norm": 183.41107177734375, "learning_rate": 1.9994241341090715e-06, "loss": 28.0625, "step": 4277 }, { "epoch": 0.04049564089700022, "grad_norm": 2.9851529598236084, "learning_rate": 1.9994230933311286e-06, "loss": 0.8945, "step": 4278 }, { "epoch": 0.04050510691871527, "grad_norm": 690.9226684570312, "learning_rate": 1.999422051613793e-06, "loss": 26.9531, "step": 4279 }, { "epoch": 0.04051457294043032, "grad_norm": 290.13714599609375, "learning_rate": 1.999421008957066e-06, "loss": 16.8789, "step": 4280 }, { "epoch": 0.04052403896214538, "grad_norm": 267.4854431152344, "learning_rate": 1.9994199653609476e-06, "loss": 28.2031, "step": 4281 }, { "epoch": 0.040533504983860436, "grad_norm": 267.61773681640625, "learning_rate": 1.99941892082544e-06, "loss": 27.0312, "step": 4282 }, { "epoch": 0.04054297100557549, "grad_norm": 361.3482971191406, "learning_rate": 1.9994178753505433e-06, "loss": 20.7266, "step": 4283 }, { "epoch": 0.04055243702729054, "grad_norm": 455.2206115722656, "learning_rate": 1.999416828936259e-06, "loss": 47.8047, "step": 4284 }, { "epoch": 0.040561903049005595, "grad_norm": 447.7338562011719, "learning_rate": 1.999415781582588e-06, "loss": 46.7656, "step": 4285 }, { "epoch": 0.04057136907072065, "grad_norm": 846.3236694335938, "learning_rate": 1.999414733289531e-06, "loss": 40.2812, "step": 4286 }, { "epoch": 0.0405808350924357, "grad_norm": 818.1011962890625, "learning_rate": 1.999413684057089e-06, "loss": 17.5703, "step": 4287 }, { "epoch": 0.040590301114150754, "grad_norm": 395.9693298339844, "learning_rate": 1.999412633885263e-06, "loss": 27.8438, "step": 4288 }, { "epoch": 0.04059976713586581, "grad_norm": 484.3722229003906, "learning_rate": 1.999411582774055e-06, "loss": 42.0156, "step": 4289 }, { "epoch": 0.04060923315758087, "grad_norm": 279.8345031738281, "learning_rate": 1.999410530723464e-06, "loss": 35.6562, "step": 4290 }, { "epoch": 0.04061869917929592, "grad_norm": 437.7147216796875, "learning_rate": 1.999409477733493e-06, "loss": 26.2656, "step": 4291 }, { "epoch": 0.04062816520101097, "grad_norm": 225.82614135742188, "learning_rate": 1.9994084238041418e-06, "loss": 25.9531, "step": 4292 }, { "epoch": 0.040637631222726026, "grad_norm": 380.1561279296875, "learning_rate": 1.999407368935412e-06, "loss": 41.6875, "step": 4293 }, { "epoch": 0.04064709724444108, "grad_norm": 311.6032409667969, "learning_rate": 1.9994063131273038e-06, "loss": 24.6172, "step": 4294 }, { "epoch": 0.04065656326615613, "grad_norm": 828.9896850585938, "learning_rate": 1.9994052563798188e-06, "loss": 29.6562, "step": 4295 }, { "epoch": 0.040666029287871185, "grad_norm": 279.74530029296875, "learning_rate": 1.999404198692958e-06, "loss": 27.6641, "step": 4296 }, { "epoch": 0.04067549530958624, "grad_norm": 261.7665710449219, "learning_rate": 1.9994031400667223e-06, "loss": 25.3906, "step": 4297 }, { "epoch": 0.04068496133130129, "grad_norm": 445.43133544921875, "learning_rate": 1.9994020805011125e-06, "loss": 39.3281, "step": 4298 }, { "epoch": 0.04069442735301635, "grad_norm": 424.6083984375, "learning_rate": 1.9994010199961297e-06, "loss": 36.5156, "step": 4299 }, { "epoch": 0.040703893374731404, "grad_norm": 3.1747403144836426, "learning_rate": 1.9993999585517754e-06, "loss": 0.9341, "step": 4300 }, { "epoch": 0.04071335939644646, "grad_norm": 198.1969757080078, "learning_rate": 1.9993988961680497e-06, "loss": 26.9062, "step": 4301 }, { "epoch": 0.04072282541816151, "grad_norm": 232.04188537597656, "learning_rate": 1.9993978328449544e-06, "loss": 31.0625, "step": 4302 }, { "epoch": 0.04073229143987656, "grad_norm": 723.4873657226562, "learning_rate": 1.99939676858249e-06, "loss": 30.1719, "step": 4303 }, { "epoch": 0.040741757461591616, "grad_norm": 1181.4915771484375, "learning_rate": 1.9993957033806575e-06, "loss": 50.1484, "step": 4304 }, { "epoch": 0.04075122348330667, "grad_norm": 443.60736083984375, "learning_rate": 1.999394637239458e-06, "loss": 44.5938, "step": 4305 }, { "epoch": 0.04076068950502172, "grad_norm": 514.5774536132812, "learning_rate": 1.999393570158893e-06, "loss": 37.2812, "step": 4306 }, { "epoch": 0.04077015552673678, "grad_norm": 181.84347534179688, "learning_rate": 1.9993925021389625e-06, "loss": 22.2578, "step": 4307 }, { "epoch": 0.040779621548451835, "grad_norm": 761.321044921875, "learning_rate": 1.999391433179669e-06, "loss": 33.3828, "step": 4308 }, { "epoch": 0.04078908757016689, "grad_norm": 405.2384033203125, "learning_rate": 1.9993903632810114e-06, "loss": 31.25, "step": 4309 }, { "epoch": 0.04079855359188194, "grad_norm": 764.4010620117188, "learning_rate": 1.9993892924429923e-06, "loss": 47.625, "step": 4310 }, { "epoch": 0.040808019613596994, "grad_norm": 490.5711669921875, "learning_rate": 1.9993882206656124e-06, "loss": 56.5156, "step": 4311 }, { "epoch": 0.04081748563531205, "grad_norm": 3.4029462337493896, "learning_rate": 1.9993871479488725e-06, "loss": 1.0498, "step": 4312 }, { "epoch": 0.0408269516570271, "grad_norm": 2.9011237621307373, "learning_rate": 1.999386074292774e-06, "loss": 0.8789, "step": 4313 }, { "epoch": 0.04083641767874215, "grad_norm": 1028.3140869140625, "learning_rate": 1.9993849996973175e-06, "loss": 37.1836, "step": 4314 }, { "epoch": 0.040845883700457206, "grad_norm": 303.9735412597656, "learning_rate": 1.9993839241625037e-06, "loss": 29.4531, "step": 4315 }, { "epoch": 0.040855349722172266, "grad_norm": 540.9823608398438, "learning_rate": 1.999382847688334e-06, "loss": 37.1719, "step": 4316 }, { "epoch": 0.04086481574388732, "grad_norm": 854.4536743164062, "learning_rate": 1.99938177027481e-06, "loss": 26.375, "step": 4317 }, { "epoch": 0.04087428176560237, "grad_norm": 3.248136520385742, "learning_rate": 1.999380691921932e-06, "loss": 0.8896, "step": 4318 }, { "epoch": 0.040883747787317425, "grad_norm": 212.701171875, "learning_rate": 1.999379612629701e-06, "loss": 26.4531, "step": 4319 }, { "epoch": 0.04089321380903248, "grad_norm": 279.94635009765625, "learning_rate": 1.999378532398118e-06, "loss": 27.8594, "step": 4320 }, { "epoch": 0.04090267983074753, "grad_norm": 269.3084411621094, "learning_rate": 1.9993774512271847e-06, "loss": 27.8906, "step": 4321 }, { "epoch": 0.040912145852462584, "grad_norm": 555.3165283203125, "learning_rate": 1.999376369116901e-06, "loss": 37.7148, "step": 4322 }, { "epoch": 0.04092161187417764, "grad_norm": 282.8454895019531, "learning_rate": 1.999375286067269e-06, "loss": 23.4609, "step": 4323 }, { "epoch": 0.0409310778958927, "grad_norm": 2.6049394607543945, "learning_rate": 1.9993742020782892e-06, "loss": 0.8857, "step": 4324 }, { "epoch": 0.04094054391760775, "grad_norm": 464.0082702636719, "learning_rate": 1.9993731171499626e-06, "loss": 37.0, "step": 4325 }, { "epoch": 0.0409500099393228, "grad_norm": 599.52587890625, "learning_rate": 1.99937203128229e-06, "loss": 41.9375, "step": 4326 }, { "epoch": 0.040959475961037856, "grad_norm": 847.5388793945312, "learning_rate": 1.9993709444752735e-06, "loss": 51.6172, "step": 4327 }, { "epoch": 0.04096894198275291, "grad_norm": 220.5702362060547, "learning_rate": 1.9993698567289123e-06, "loss": 30.1094, "step": 4328 }, { "epoch": 0.04097840800446796, "grad_norm": 246.2312774658203, "learning_rate": 1.999368768043209e-06, "loss": 26.8125, "step": 4329 }, { "epoch": 0.040987874026183015, "grad_norm": 298.9996337890625, "learning_rate": 1.9993676784181642e-06, "loss": 24.8828, "step": 4330 }, { "epoch": 0.04099734004789807, "grad_norm": 316.9929504394531, "learning_rate": 1.9993665878537786e-06, "loss": 26.5469, "step": 4331 }, { "epoch": 0.04100680606961312, "grad_norm": 301.24517822265625, "learning_rate": 1.9993654963500534e-06, "loss": 34.3594, "step": 4332 }, { "epoch": 0.04101627209132818, "grad_norm": 270.9178466796875, "learning_rate": 1.9993644039069893e-06, "loss": 25.1406, "step": 4333 }, { "epoch": 0.041025738113043234, "grad_norm": 503.1211242675781, "learning_rate": 1.9993633105245882e-06, "loss": 57.2188, "step": 4334 }, { "epoch": 0.04103520413475829, "grad_norm": 324.36163330078125, "learning_rate": 1.9993622162028504e-06, "loss": 27.125, "step": 4335 }, { "epoch": 0.04104467015647334, "grad_norm": 282.72357177734375, "learning_rate": 1.9993611209417773e-06, "loss": 35.8594, "step": 4336 }, { "epoch": 0.04105413617818839, "grad_norm": 2.889517307281494, "learning_rate": 1.9993600247413695e-06, "loss": 0.9834, "step": 4337 }, { "epoch": 0.041063602199903446, "grad_norm": 532.3208618164062, "learning_rate": 1.9993589276016285e-06, "loss": 30.3555, "step": 4338 }, { "epoch": 0.0410730682216185, "grad_norm": 276.74212646484375, "learning_rate": 1.999357829522555e-06, "loss": 29.9688, "step": 4339 }, { "epoch": 0.04108253424333355, "grad_norm": 734.364013671875, "learning_rate": 1.99935673050415e-06, "loss": 32.9922, "step": 4340 }, { "epoch": 0.041092000265048605, "grad_norm": 767.2222290039062, "learning_rate": 1.999355630546415e-06, "loss": 42.5781, "step": 4341 }, { "epoch": 0.041101466286763665, "grad_norm": 572.6967163085938, "learning_rate": 1.9993545296493507e-06, "loss": 24.8047, "step": 4342 }, { "epoch": 0.04111093230847872, "grad_norm": 329.046630859375, "learning_rate": 1.999353427812958e-06, "loss": 29.75, "step": 4343 }, { "epoch": 0.04112039833019377, "grad_norm": 2.8309812545776367, "learning_rate": 1.999352325037238e-06, "loss": 0.9062, "step": 4344 }, { "epoch": 0.041129864351908824, "grad_norm": 360.21795654296875, "learning_rate": 1.999351221322192e-06, "loss": 24.8516, "step": 4345 }, { "epoch": 0.04113933037362388, "grad_norm": 451.837646484375, "learning_rate": 1.999350116667821e-06, "loss": 52.6094, "step": 4346 }, { "epoch": 0.04114879639533893, "grad_norm": 572.2239990234375, "learning_rate": 1.9993490110741256e-06, "loss": 33.3008, "step": 4347 }, { "epoch": 0.04115826241705398, "grad_norm": 855.2001953125, "learning_rate": 1.9993479045411073e-06, "loss": 69.1562, "step": 4348 }, { "epoch": 0.041167728438769036, "grad_norm": 593.4892578125, "learning_rate": 1.999346797068767e-06, "loss": 32.0938, "step": 4349 }, { "epoch": 0.041177194460484096, "grad_norm": 367.2267761230469, "learning_rate": 1.9993456886571055e-06, "loss": 36.7578, "step": 4350 }, { "epoch": 0.04118666048219915, "grad_norm": 734.198486328125, "learning_rate": 1.9993445793061246e-06, "loss": 33.0, "step": 4351 }, { "epoch": 0.0411961265039142, "grad_norm": 343.13177490234375, "learning_rate": 1.9993434690158244e-06, "loss": 31.0625, "step": 4352 }, { "epoch": 0.041205592525629255, "grad_norm": 310.1025390625, "learning_rate": 1.999342357786206e-06, "loss": 28.5156, "step": 4353 }, { "epoch": 0.04121505854734431, "grad_norm": 3.1348001956939697, "learning_rate": 1.9993412456172715e-06, "loss": 0.8623, "step": 4354 }, { "epoch": 0.04122452456905936, "grad_norm": 239.41494750976562, "learning_rate": 1.999340132509021e-06, "loss": 24.7812, "step": 4355 }, { "epoch": 0.041233990590774414, "grad_norm": 372.00067138671875, "learning_rate": 1.9993390184614554e-06, "loss": 15.4844, "step": 4356 }, { "epoch": 0.04124345661248947, "grad_norm": 558.8966674804688, "learning_rate": 1.9993379034745765e-06, "loss": 31.25, "step": 4357 }, { "epoch": 0.04125292263420452, "grad_norm": 479.0018310546875, "learning_rate": 1.999336787548385e-06, "loss": 29.4844, "step": 4358 }, { "epoch": 0.04126238865591958, "grad_norm": 542.1004638671875, "learning_rate": 1.999335670682882e-06, "loss": 57.3438, "step": 4359 }, { "epoch": 0.04127185467763463, "grad_norm": 300.1023254394531, "learning_rate": 1.9993345528780683e-06, "loss": 32.7656, "step": 4360 }, { "epoch": 0.041281320699349686, "grad_norm": 309.31231689453125, "learning_rate": 1.999333434133945e-06, "loss": 23.9844, "step": 4361 }, { "epoch": 0.04129078672106474, "grad_norm": 268.0833740234375, "learning_rate": 1.9993323144505137e-06, "loss": 14.6328, "step": 4362 }, { "epoch": 0.04130025274277979, "grad_norm": 528.9525146484375, "learning_rate": 1.999331193827775e-06, "loss": 44.625, "step": 4363 }, { "epoch": 0.041309718764494845, "grad_norm": 354.7735290527344, "learning_rate": 1.9993300722657294e-06, "loss": 29.75, "step": 4364 }, { "epoch": 0.0413191847862099, "grad_norm": 805.229248046875, "learning_rate": 1.999328949764379e-06, "loss": 79.1562, "step": 4365 }, { "epoch": 0.04132865080792495, "grad_norm": 354.97613525390625, "learning_rate": 1.999327826323724e-06, "loss": 30.8594, "step": 4366 }, { "epoch": 0.04133811682964001, "grad_norm": 407.6391296386719, "learning_rate": 1.9993267019437665e-06, "loss": 48.3906, "step": 4367 }, { "epoch": 0.041347582851355064, "grad_norm": 412.95086669921875, "learning_rate": 1.9993255766245065e-06, "loss": 25.1484, "step": 4368 }, { "epoch": 0.04135704887307012, "grad_norm": 349.15826416015625, "learning_rate": 1.999324450365946e-06, "loss": 23.6406, "step": 4369 }, { "epoch": 0.04136651489478517, "grad_norm": 440.78814697265625, "learning_rate": 1.9993233231680844e-06, "loss": 37.2656, "step": 4370 }, { "epoch": 0.04137598091650022, "grad_norm": 401.6226501464844, "learning_rate": 1.999322195030925e-06, "loss": 47.2344, "step": 4371 }, { "epoch": 0.041385446938215276, "grad_norm": 167.1709442138672, "learning_rate": 1.9993210659544674e-06, "loss": 28.5625, "step": 4372 }, { "epoch": 0.04139491295993033, "grad_norm": 344.2583312988281, "learning_rate": 1.999319935938713e-06, "loss": 38.9062, "step": 4373 }, { "epoch": 0.04140437898164538, "grad_norm": 2.8705453872680664, "learning_rate": 1.9993188049836624e-06, "loss": 0.6968, "step": 4374 }, { "epoch": 0.041413845003360435, "grad_norm": 3.5838558673858643, "learning_rate": 1.9993176730893176e-06, "loss": 0.9438, "step": 4375 }, { "epoch": 0.041423311025075495, "grad_norm": 453.57855224609375, "learning_rate": 1.999316540255679e-06, "loss": 46.1094, "step": 4376 }, { "epoch": 0.04143277704679055, "grad_norm": 3.1955060958862305, "learning_rate": 1.999315406482748e-06, "loss": 0.8608, "step": 4377 }, { "epoch": 0.0414422430685056, "grad_norm": 627.7334594726562, "learning_rate": 1.9993142717705254e-06, "loss": 33.5859, "step": 4378 }, { "epoch": 0.041451709090220654, "grad_norm": 610.233154296875, "learning_rate": 1.9993131361190126e-06, "loss": 51.7656, "step": 4379 }, { "epoch": 0.04146117511193571, "grad_norm": 220.92617797851562, "learning_rate": 1.9993119995282107e-06, "loss": 33.9219, "step": 4380 }, { "epoch": 0.04147064113365076, "grad_norm": 866.8368530273438, "learning_rate": 1.9993108619981203e-06, "loss": 60.1328, "step": 4381 }, { "epoch": 0.04148010715536581, "grad_norm": 1408.882568359375, "learning_rate": 1.9993097235287424e-06, "loss": 54.7031, "step": 4382 }, { "epoch": 0.041489573177080866, "grad_norm": 519.2298583984375, "learning_rate": 1.9993085841200786e-06, "loss": 38.7656, "step": 4383 }, { "epoch": 0.04149903919879592, "grad_norm": 313.7239685058594, "learning_rate": 1.9993074437721295e-06, "loss": 22.9609, "step": 4384 }, { "epoch": 0.04150850522051098, "grad_norm": 325.1258544921875, "learning_rate": 1.999306302484897e-06, "loss": 33.3984, "step": 4385 }, { "epoch": 0.04151797124222603, "grad_norm": 469.89154052734375, "learning_rate": 1.999305160258381e-06, "loss": 43.3281, "step": 4386 }, { "epoch": 0.041527437263941085, "grad_norm": 392.89080810546875, "learning_rate": 1.9993040170925834e-06, "loss": 17.6953, "step": 4387 }, { "epoch": 0.04153690328565614, "grad_norm": 491.6268310546875, "learning_rate": 1.999302872987505e-06, "loss": 40.4062, "step": 4388 }, { "epoch": 0.04154636930737119, "grad_norm": 770.7777709960938, "learning_rate": 1.999301727943147e-06, "loss": 63.4766, "step": 4389 }, { "epoch": 0.041555835329086244, "grad_norm": 192.2635040283203, "learning_rate": 1.99930058195951e-06, "loss": 25.1328, "step": 4390 }, { "epoch": 0.0415653013508013, "grad_norm": 3.407944679260254, "learning_rate": 1.999299435036596e-06, "loss": 0.9165, "step": 4391 }, { "epoch": 0.04157476737251635, "grad_norm": 650.1852416992188, "learning_rate": 1.9992982871744052e-06, "loss": 43.9219, "step": 4392 }, { "epoch": 0.04158423339423141, "grad_norm": 872.5902709960938, "learning_rate": 1.999297138372939e-06, "loss": 51.125, "step": 4393 }, { "epoch": 0.04159369941594646, "grad_norm": 456.514892578125, "learning_rate": 1.9992959886321986e-06, "loss": 26.6094, "step": 4394 }, { "epoch": 0.041603165437661516, "grad_norm": 222.48138427734375, "learning_rate": 1.999294837952185e-06, "loss": 26.8359, "step": 4395 }, { "epoch": 0.04161263145937657, "grad_norm": 1133.62060546875, "learning_rate": 1.9992936863328994e-06, "loss": 47.5078, "step": 4396 }, { "epoch": 0.04162209748109162, "grad_norm": 381.217041015625, "learning_rate": 1.9992925337743427e-06, "loss": 25.125, "step": 4397 }, { "epoch": 0.041631563502806675, "grad_norm": 803.9599609375, "learning_rate": 1.9992913802765157e-06, "loss": 47.6016, "step": 4398 }, { "epoch": 0.04164102952452173, "grad_norm": 216.14227294921875, "learning_rate": 1.9992902258394203e-06, "loss": 23.5938, "step": 4399 }, { "epoch": 0.04165049554623678, "grad_norm": 364.2700500488281, "learning_rate": 1.999289070463057e-06, "loss": 31.75, "step": 4400 }, { "epoch": 0.041659961567951834, "grad_norm": 294.52203369140625, "learning_rate": 1.9992879141474266e-06, "loss": 28.5938, "step": 4401 }, { "epoch": 0.041669427589666894, "grad_norm": 187.77114868164062, "learning_rate": 1.9992867568925308e-06, "loss": 25.8906, "step": 4402 }, { "epoch": 0.04167889361138195, "grad_norm": 682.9649658203125, "learning_rate": 1.9992855986983703e-06, "loss": 43.7891, "step": 4403 }, { "epoch": 0.041688359633097, "grad_norm": 222.41531372070312, "learning_rate": 1.9992844395649465e-06, "loss": 30.2891, "step": 4404 }, { "epoch": 0.04169782565481205, "grad_norm": 179.41566467285156, "learning_rate": 1.9992832794922605e-06, "loss": 23.8594, "step": 4405 }, { "epoch": 0.041707291676527106, "grad_norm": 459.00335693359375, "learning_rate": 1.9992821184803128e-06, "loss": 68.4062, "step": 4406 }, { "epoch": 0.04171675769824216, "grad_norm": 264.3018798828125, "learning_rate": 1.9992809565291055e-06, "loss": 26.2344, "step": 4407 }, { "epoch": 0.04172622371995721, "grad_norm": 496.1560363769531, "learning_rate": 1.999279793638638e-06, "loss": 26.375, "step": 4408 }, { "epoch": 0.041735689741672265, "grad_norm": 663.3277587890625, "learning_rate": 1.999278629808914e-06, "loss": 40.6562, "step": 4409 }, { "epoch": 0.041745155763387325, "grad_norm": 230.322021484375, "learning_rate": 1.999277465039932e-06, "loss": 23.8047, "step": 4410 }, { "epoch": 0.04175462178510238, "grad_norm": 344.6085510253906, "learning_rate": 1.9992762993316945e-06, "loss": 47.4688, "step": 4411 }, { "epoch": 0.04176408780681743, "grad_norm": 639.6849365234375, "learning_rate": 1.9992751326842024e-06, "loss": 43.4648, "step": 4412 }, { "epoch": 0.041773553828532484, "grad_norm": 1151.350341796875, "learning_rate": 1.9992739650974566e-06, "loss": 79.6406, "step": 4413 }, { "epoch": 0.04178301985024754, "grad_norm": 554.1392822265625, "learning_rate": 1.9992727965714585e-06, "loss": 64.5781, "step": 4414 }, { "epoch": 0.04179248587196259, "grad_norm": 505.5003662109375, "learning_rate": 1.9992716271062088e-06, "loss": 54.3281, "step": 4415 }, { "epoch": 0.04180195189367764, "grad_norm": 247.5537567138672, "learning_rate": 1.9992704567017084e-06, "loss": 26.2812, "step": 4416 }, { "epoch": 0.041811417915392696, "grad_norm": 267.4849853515625, "learning_rate": 1.9992692853579594e-06, "loss": 27.8594, "step": 4417 }, { "epoch": 0.04182088393710775, "grad_norm": 1203.4268798828125, "learning_rate": 1.999268113074962e-06, "loss": 29.4102, "step": 4418 }, { "epoch": 0.04183034995882281, "grad_norm": 1704.06103515625, "learning_rate": 1.9992669398527175e-06, "loss": 32.1484, "step": 4419 }, { "epoch": 0.04183981598053786, "grad_norm": 185.63137817382812, "learning_rate": 1.9992657656912275e-06, "loss": 24.6094, "step": 4420 }, { "epoch": 0.041849282002252915, "grad_norm": 862.5624389648438, "learning_rate": 1.9992645905904922e-06, "loss": 59.4922, "step": 4421 }, { "epoch": 0.04185874802396797, "grad_norm": 217.8831329345703, "learning_rate": 1.9992634145505135e-06, "loss": 30.0625, "step": 4422 }, { "epoch": 0.04186821404568302, "grad_norm": 259.8060607910156, "learning_rate": 1.9992622375712917e-06, "loss": 24.3984, "step": 4423 }, { "epoch": 0.041877680067398074, "grad_norm": 427.56866455078125, "learning_rate": 1.999261059652829e-06, "loss": 26.3125, "step": 4424 }, { "epoch": 0.04188714608911313, "grad_norm": 418.6398620605469, "learning_rate": 1.9992598807951257e-06, "loss": 33.3203, "step": 4425 }, { "epoch": 0.04189661211082818, "grad_norm": 516.4111328125, "learning_rate": 1.9992587009981828e-06, "loss": 24.0938, "step": 4426 }, { "epoch": 0.04190607813254323, "grad_norm": 643.7626953125, "learning_rate": 1.9992575202620022e-06, "loss": 38.7422, "step": 4427 }, { "epoch": 0.04191554415425829, "grad_norm": 473.3150634765625, "learning_rate": 1.9992563385865846e-06, "loss": 27.0469, "step": 4428 }, { "epoch": 0.041925010175973346, "grad_norm": 786.8938598632812, "learning_rate": 1.99925515597193e-06, "loss": 42.9141, "step": 4429 }, { "epoch": 0.0419344761976884, "grad_norm": 210.9734344482422, "learning_rate": 1.999253972418042e-06, "loss": 26.625, "step": 4430 }, { "epoch": 0.04194394221940345, "grad_norm": 612.1520385742188, "learning_rate": 1.9992527879249193e-06, "loss": 30.1094, "step": 4431 }, { "epoch": 0.041953408241118505, "grad_norm": 298.0303039550781, "learning_rate": 1.999251602492564e-06, "loss": 28.3438, "step": 4432 }, { "epoch": 0.04196287426283356, "grad_norm": 823.0400390625, "learning_rate": 1.999250416120978e-06, "loss": 53.0, "step": 4433 }, { "epoch": 0.04197234028454861, "grad_norm": 454.8136901855469, "learning_rate": 1.9992492288101613e-06, "loss": 47.6719, "step": 4434 }, { "epoch": 0.041981806306263664, "grad_norm": 473.9920959472656, "learning_rate": 1.999248040560115e-06, "loss": 21.2578, "step": 4435 }, { "epoch": 0.041991272327978724, "grad_norm": 350.37896728515625, "learning_rate": 1.999246851370841e-06, "loss": 26.1406, "step": 4436 }, { "epoch": 0.04200073834969378, "grad_norm": 360.17376708984375, "learning_rate": 1.9992456612423393e-06, "loss": 48.6562, "step": 4437 }, { "epoch": 0.04201020437140883, "grad_norm": 285.65875244140625, "learning_rate": 1.999244470174612e-06, "loss": 31.5781, "step": 4438 }, { "epoch": 0.04201967039312388, "grad_norm": 445.0261535644531, "learning_rate": 1.99924327816766e-06, "loss": 49.8438, "step": 4439 }, { "epoch": 0.042029136414838936, "grad_norm": 452.4782409667969, "learning_rate": 1.9992420852214842e-06, "loss": 34.9375, "step": 4440 }, { "epoch": 0.04203860243655399, "grad_norm": 674.9003295898438, "learning_rate": 1.9992408913360862e-06, "loss": 49.4688, "step": 4441 }, { "epoch": 0.04204806845826904, "grad_norm": 331.1766052246094, "learning_rate": 1.9992396965114663e-06, "loss": 21.5859, "step": 4442 }, { "epoch": 0.042057534479984095, "grad_norm": 910.8904418945312, "learning_rate": 1.999238500747626e-06, "loss": 67.2188, "step": 4443 }, { "epoch": 0.04206700050169915, "grad_norm": 3.47652006149292, "learning_rate": 1.999237304044567e-06, "loss": 0.9858, "step": 4444 }, { "epoch": 0.04207646652341421, "grad_norm": 302.6944580078125, "learning_rate": 1.99923610640229e-06, "loss": 32.3906, "step": 4445 }, { "epoch": 0.04208593254512926, "grad_norm": 652.419677734375, "learning_rate": 1.9992349078207956e-06, "loss": 24.5781, "step": 4446 }, { "epoch": 0.042095398566844314, "grad_norm": 265.12969970703125, "learning_rate": 1.9992337083000853e-06, "loss": 25.6875, "step": 4447 }, { "epoch": 0.04210486458855937, "grad_norm": 355.6609802246094, "learning_rate": 1.9992325078401607e-06, "loss": 21.4375, "step": 4448 }, { "epoch": 0.04211433061027442, "grad_norm": 395.69952392578125, "learning_rate": 1.9992313064410226e-06, "loss": 17.7578, "step": 4449 }, { "epoch": 0.04212379663198947, "grad_norm": 262.7294921875, "learning_rate": 1.999230104102672e-06, "loss": 21.7266, "step": 4450 }, { "epoch": 0.042133262653704526, "grad_norm": 964.8975219726562, "learning_rate": 1.99922890082511e-06, "loss": 49.25, "step": 4451 }, { "epoch": 0.04214272867541958, "grad_norm": 362.2992858886719, "learning_rate": 1.9992276966083377e-06, "loss": 46.4688, "step": 4452 }, { "epoch": 0.04215219469713464, "grad_norm": 250.45574951171875, "learning_rate": 1.9992264914523566e-06, "loss": 23.7344, "step": 4453 }, { "epoch": 0.04216166071884969, "grad_norm": 1058.556884765625, "learning_rate": 1.9992252853571675e-06, "loss": 61.6328, "step": 4454 }, { "epoch": 0.042171126740564745, "grad_norm": 3.6030843257904053, "learning_rate": 1.999224078322772e-06, "loss": 0.8398, "step": 4455 }, { "epoch": 0.0421805927622798, "grad_norm": 254.81800842285156, "learning_rate": 1.9992228703491703e-06, "loss": 24.5781, "step": 4456 }, { "epoch": 0.04219005878399485, "grad_norm": 448.5887451171875, "learning_rate": 1.9992216614363642e-06, "loss": 54.2031, "step": 4457 }, { "epoch": 0.042199524805709904, "grad_norm": 408.8059997558594, "learning_rate": 1.9992204515843554e-06, "loss": 22.5469, "step": 4458 }, { "epoch": 0.04220899082742496, "grad_norm": 397.5091247558594, "learning_rate": 1.9992192407931436e-06, "loss": 43.1719, "step": 4459 }, { "epoch": 0.04221845684914001, "grad_norm": 444.2850341796875, "learning_rate": 1.9992180290627308e-06, "loss": 21.875, "step": 4460 }, { "epoch": 0.04222792287085506, "grad_norm": 278.0747985839844, "learning_rate": 1.9992168163931184e-06, "loss": 37.2812, "step": 4461 }, { "epoch": 0.04223738889257012, "grad_norm": 176.15139770507812, "learning_rate": 1.999215602784307e-06, "loss": 24.875, "step": 4462 }, { "epoch": 0.042246854914285176, "grad_norm": 968.7682495117188, "learning_rate": 1.999214388236298e-06, "loss": 50.5781, "step": 4463 }, { "epoch": 0.04225632093600023, "grad_norm": 243.7685546875, "learning_rate": 1.9992131727490924e-06, "loss": 25.7578, "step": 4464 }, { "epoch": 0.04226578695771528, "grad_norm": 321.9754333496094, "learning_rate": 1.9992119563226916e-06, "loss": 24.2031, "step": 4465 }, { "epoch": 0.042275252979430335, "grad_norm": 218.00486755371094, "learning_rate": 1.9992107389570965e-06, "loss": 30.4531, "step": 4466 }, { "epoch": 0.04228471900114539, "grad_norm": 1358.9815673828125, "learning_rate": 1.999209520652308e-06, "loss": 59.9805, "step": 4467 }, { "epoch": 0.04229418502286044, "grad_norm": 431.9957275390625, "learning_rate": 1.9992083014083277e-06, "loss": 35.7812, "step": 4468 }, { "epoch": 0.042303651044575494, "grad_norm": 362.8274841308594, "learning_rate": 1.999207081225157e-06, "loss": 17.2969, "step": 4469 }, { "epoch": 0.04231311706629055, "grad_norm": 236.18557739257812, "learning_rate": 1.9992058601027963e-06, "loss": 21.8906, "step": 4470 }, { "epoch": 0.04232258308800561, "grad_norm": 777.0045166015625, "learning_rate": 1.999204638041247e-06, "loss": 41.5703, "step": 4471 }, { "epoch": 0.04233204910972066, "grad_norm": 255.174072265625, "learning_rate": 1.9992034150405107e-06, "loss": 25.6719, "step": 4472 }, { "epoch": 0.04234151513143571, "grad_norm": 547.6278686523438, "learning_rate": 1.999202191100588e-06, "loss": 37.7344, "step": 4473 }, { "epoch": 0.042350981153150766, "grad_norm": 542.1714477539062, "learning_rate": 1.9992009662214802e-06, "loss": 26.2305, "step": 4474 }, { "epoch": 0.04236044717486582, "grad_norm": 228.31687927246094, "learning_rate": 1.9991997404031883e-06, "loss": 23.125, "step": 4475 }, { "epoch": 0.04236991319658087, "grad_norm": 504.2222595214844, "learning_rate": 1.9991985136457142e-06, "loss": 52.9219, "step": 4476 }, { "epoch": 0.042379379218295925, "grad_norm": 3.0417490005493164, "learning_rate": 1.999197285949058e-06, "loss": 0.9863, "step": 4477 }, { "epoch": 0.04238884524001098, "grad_norm": 571.474365234375, "learning_rate": 1.9991960573132216e-06, "loss": 47.4844, "step": 4478 }, { "epoch": 0.04239831126172604, "grad_norm": 453.37255859375, "learning_rate": 1.999194827738206e-06, "loss": 57.1094, "step": 4479 }, { "epoch": 0.04240777728344109, "grad_norm": 296.50775146484375, "learning_rate": 1.999193597224012e-06, "loss": 21.6875, "step": 4480 }, { "epoch": 0.042417243305156144, "grad_norm": 245.28614807128906, "learning_rate": 1.999192365770641e-06, "loss": 22.5938, "step": 4481 }, { "epoch": 0.0424267093268712, "grad_norm": 155.96205139160156, "learning_rate": 1.9991911333780945e-06, "loss": 23.875, "step": 4482 }, { "epoch": 0.04243617534858625, "grad_norm": 271.29742431640625, "learning_rate": 1.9991899000463726e-06, "loss": 31.3125, "step": 4483 }, { "epoch": 0.0424456413703013, "grad_norm": 456.900146484375, "learning_rate": 1.999188665775478e-06, "loss": 29.1406, "step": 4484 }, { "epoch": 0.042455107392016356, "grad_norm": 233.85520935058594, "learning_rate": 1.999187430565411e-06, "loss": 22.6797, "step": 4485 }, { "epoch": 0.04246457341373141, "grad_norm": 400.45684814453125, "learning_rate": 1.9991861944161722e-06, "loss": 24.9141, "step": 4486 }, { "epoch": 0.04247403943544646, "grad_norm": 350.6846923828125, "learning_rate": 1.999184957327764e-06, "loss": 38.6406, "step": 4487 }, { "epoch": 0.04248350545716152, "grad_norm": 701.9118041992188, "learning_rate": 1.999183719300187e-06, "loss": 31.6875, "step": 4488 }, { "epoch": 0.042492971478876575, "grad_norm": 1477.4775390625, "learning_rate": 1.9991824803334415e-06, "loss": 89.9844, "step": 4489 }, { "epoch": 0.04250243750059163, "grad_norm": 922.721923828125, "learning_rate": 1.9991812404275305e-06, "loss": 52.2344, "step": 4490 }, { "epoch": 0.04251190352230668, "grad_norm": 3.14371395111084, "learning_rate": 1.9991799995824536e-06, "loss": 0.9141, "step": 4491 }, { "epoch": 0.042521369544021734, "grad_norm": 503.524658203125, "learning_rate": 1.9991787577982123e-06, "loss": 43.4219, "step": 4492 }, { "epoch": 0.04253083556573679, "grad_norm": 159.06707763671875, "learning_rate": 1.999177515074808e-06, "loss": 23.2344, "step": 4493 }, { "epoch": 0.04254030158745184, "grad_norm": 889.537353515625, "learning_rate": 1.9991762714122423e-06, "loss": 25.5625, "step": 4494 }, { "epoch": 0.04254976760916689, "grad_norm": 267.9351501464844, "learning_rate": 1.9991750268105158e-06, "loss": 23.2344, "step": 4495 }, { "epoch": 0.042559233630881946, "grad_norm": 356.55194091796875, "learning_rate": 1.999173781269629e-06, "loss": 30.9531, "step": 4496 }, { "epoch": 0.042568699652597006, "grad_norm": 1323.8984375, "learning_rate": 1.9991725347895846e-06, "loss": 68.8438, "step": 4497 }, { "epoch": 0.04257816567431206, "grad_norm": 408.9671325683594, "learning_rate": 1.999171287370383e-06, "loss": 32.5391, "step": 4498 }, { "epoch": 0.04258763169602711, "grad_norm": 337.346923828125, "learning_rate": 1.9991700390120254e-06, "loss": 36.1875, "step": 4499 }, { "epoch": 0.042597097717742165, "grad_norm": 288.4118347167969, "learning_rate": 1.999168789714513e-06, "loss": 21.8984, "step": 4500 }, { "epoch": 0.04260656373945722, "grad_norm": 326.4237060546875, "learning_rate": 1.9991675394778464e-06, "loss": 42.3594, "step": 4501 }, { "epoch": 0.04261602976117227, "grad_norm": 581.9338989257812, "learning_rate": 1.9991662883020278e-06, "loss": 28.2031, "step": 4502 }, { "epoch": 0.042625495782887324, "grad_norm": 529.5408935546875, "learning_rate": 1.999165036187058e-06, "loss": 55.9141, "step": 4503 }, { "epoch": 0.04263496180460238, "grad_norm": 276.9686584472656, "learning_rate": 1.9991637831329375e-06, "loss": 30.625, "step": 4504 }, { "epoch": 0.04264442782631744, "grad_norm": 301.5123596191406, "learning_rate": 1.9991625291396687e-06, "loss": 34.1328, "step": 4505 }, { "epoch": 0.04265389384803249, "grad_norm": 323.87310791015625, "learning_rate": 1.9991612742072514e-06, "loss": 21.5781, "step": 4506 }, { "epoch": 0.04266335986974754, "grad_norm": 580.73291015625, "learning_rate": 1.999160018335688e-06, "loss": 32.2812, "step": 4507 }, { "epoch": 0.042672825891462596, "grad_norm": 276.14825439453125, "learning_rate": 1.9991587615249792e-06, "loss": 29.2188, "step": 4508 }, { "epoch": 0.04268229191317765, "grad_norm": 1146.98779296875, "learning_rate": 1.999157503775126e-06, "loss": 67.6094, "step": 4509 }, { "epoch": 0.0426917579348927, "grad_norm": 361.0499267578125, "learning_rate": 1.9991562450861295e-06, "loss": 21.3125, "step": 4510 }, { "epoch": 0.042701223956607755, "grad_norm": 233.27719116210938, "learning_rate": 1.9991549854579915e-06, "loss": 25.8906, "step": 4511 }, { "epoch": 0.04271068997832281, "grad_norm": 393.32611083984375, "learning_rate": 1.999153724890713e-06, "loss": 28.5938, "step": 4512 }, { "epoch": 0.04272015600003786, "grad_norm": 577.8670654296875, "learning_rate": 1.9991524633842945e-06, "loss": 54.4219, "step": 4513 }, { "epoch": 0.04272962202175292, "grad_norm": 338.1861877441406, "learning_rate": 1.999151200938738e-06, "loss": 24.9375, "step": 4514 }, { "epoch": 0.042739088043467974, "grad_norm": 207.7797088623047, "learning_rate": 1.9991499375540445e-06, "loss": 27.2969, "step": 4515 }, { "epoch": 0.04274855406518303, "grad_norm": 442.0766296386719, "learning_rate": 1.9991486732302146e-06, "loss": 64.5156, "step": 4516 }, { "epoch": 0.04275802008689808, "grad_norm": 546.8529052734375, "learning_rate": 1.9991474079672504e-06, "loss": 30.0781, "step": 4517 }, { "epoch": 0.04276748610861313, "grad_norm": 631.74609375, "learning_rate": 1.9991461417651524e-06, "loss": 51.2812, "step": 4518 }, { "epoch": 0.042776952130328186, "grad_norm": 3.7441279888153076, "learning_rate": 1.999144874623922e-06, "loss": 0.9434, "step": 4519 }, { "epoch": 0.04278641815204324, "grad_norm": 238.37525939941406, "learning_rate": 1.9991436065435606e-06, "loss": 24.8281, "step": 4520 }, { "epoch": 0.04279588417375829, "grad_norm": 272.838623046875, "learning_rate": 1.999142337524069e-06, "loss": 26.4453, "step": 4521 }, { "epoch": 0.04280535019547335, "grad_norm": 325.722900390625, "learning_rate": 1.9991410675654483e-06, "loss": 27.4844, "step": 4522 }, { "epoch": 0.042814816217188405, "grad_norm": 208.0537567138672, "learning_rate": 1.9991397966677004e-06, "loss": 22.4531, "step": 4523 }, { "epoch": 0.04282428223890346, "grad_norm": 236.81649780273438, "learning_rate": 1.9991385248308263e-06, "loss": 24.8125, "step": 4524 }, { "epoch": 0.04283374826061851, "grad_norm": 322.5990295410156, "learning_rate": 1.999137252054827e-06, "loss": 27.0, "step": 4525 }, { "epoch": 0.042843214282333564, "grad_norm": 403.4289855957031, "learning_rate": 1.9991359783397033e-06, "loss": 40.0547, "step": 4526 }, { "epoch": 0.04285268030404862, "grad_norm": 296.2005615234375, "learning_rate": 1.999134703685457e-06, "loss": 21.3164, "step": 4527 }, { "epoch": 0.04286214632576367, "grad_norm": 417.1848449707031, "learning_rate": 1.9991334280920888e-06, "loss": 20.5156, "step": 4528 }, { "epoch": 0.04287161234747872, "grad_norm": 481.565673828125, "learning_rate": 1.9991321515596003e-06, "loss": 23.9766, "step": 4529 }, { "epoch": 0.042881078369193776, "grad_norm": 867.62890625, "learning_rate": 1.9991308740879928e-06, "loss": 62.8594, "step": 4530 }, { "epoch": 0.042890544390908836, "grad_norm": 208.0439910888672, "learning_rate": 1.9991295956772667e-06, "loss": 31.25, "step": 4531 }, { "epoch": 0.04290001041262389, "grad_norm": 424.4955139160156, "learning_rate": 1.9991283163274243e-06, "loss": 47.1406, "step": 4532 }, { "epoch": 0.04290947643433894, "grad_norm": 368.1078796386719, "learning_rate": 1.9991270360384666e-06, "loss": 23.6914, "step": 4533 }, { "epoch": 0.042918942456053995, "grad_norm": 919.9414672851562, "learning_rate": 1.9991257548103938e-06, "loss": 93.2734, "step": 4534 }, { "epoch": 0.04292840847776905, "grad_norm": 260.6031494140625, "learning_rate": 1.9991244726432083e-06, "loss": 26.5, "step": 4535 }, { "epoch": 0.0429378744994841, "grad_norm": 534.1109008789062, "learning_rate": 1.9991231895369107e-06, "loss": 27.6406, "step": 4536 }, { "epoch": 0.042947340521199154, "grad_norm": 682.5186767578125, "learning_rate": 1.9991219054915017e-06, "loss": 46.2578, "step": 4537 }, { "epoch": 0.04295680654291421, "grad_norm": 397.214111328125, "learning_rate": 1.9991206205069834e-06, "loss": 42.375, "step": 4538 }, { "epoch": 0.04296627256462926, "grad_norm": 249.9923858642578, "learning_rate": 1.999119334583357e-06, "loss": 25.7344, "step": 4539 }, { "epoch": 0.04297573858634432, "grad_norm": 791.0979614257812, "learning_rate": 1.9991180477206234e-06, "loss": 50.0469, "step": 4540 }, { "epoch": 0.04298520460805937, "grad_norm": 510.8815002441406, "learning_rate": 1.999116759918784e-06, "loss": 51.5625, "step": 4541 }, { "epoch": 0.042994670629774426, "grad_norm": 360.553955078125, "learning_rate": 1.999115471177839e-06, "loss": 30.1719, "step": 4542 }, { "epoch": 0.04300413665148948, "grad_norm": 197.01373291015625, "learning_rate": 1.9991141814977912e-06, "loss": 25.4219, "step": 4543 }, { "epoch": 0.04301360267320453, "grad_norm": 791.6771850585938, "learning_rate": 1.999112890878641e-06, "loss": 42.6094, "step": 4544 }, { "epoch": 0.043023068694919585, "grad_norm": 2.9372646808624268, "learning_rate": 1.9991115993203892e-06, "loss": 0.9067, "step": 4545 }, { "epoch": 0.04303253471663464, "grad_norm": 414.1605529785156, "learning_rate": 1.9991103068230377e-06, "loss": 31.9219, "step": 4546 }, { "epoch": 0.04304200073834969, "grad_norm": 600.2257080078125, "learning_rate": 1.999109013386588e-06, "loss": 58.7969, "step": 4547 }, { "epoch": 0.04305146676006475, "grad_norm": 321.1924133300781, "learning_rate": 1.9991077190110404e-06, "loss": 22.3125, "step": 4548 }, { "epoch": 0.043060932781779804, "grad_norm": 1358.6087646484375, "learning_rate": 1.9991064236963968e-06, "loss": 57.3125, "step": 4549 }, { "epoch": 0.04307039880349486, "grad_norm": 1433.7969970703125, "learning_rate": 1.999105127442658e-06, "loss": 72.375, "step": 4550 }, { "epoch": 0.04307986482520991, "grad_norm": 311.6475830078125, "learning_rate": 1.999103830249825e-06, "loss": 32.7734, "step": 4551 }, { "epoch": 0.04308933084692496, "grad_norm": 1732.8831787109375, "learning_rate": 1.9991025321179e-06, "loss": 51.6797, "step": 4552 }, { "epoch": 0.043098796868640016, "grad_norm": 814.1118774414062, "learning_rate": 1.9991012330468835e-06, "loss": 38.9844, "step": 4553 }, { "epoch": 0.04310826289035507, "grad_norm": 930.5918579101562, "learning_rate": 1.999099933036776e-06, "loss": 25.8984, "step": 4554 }, { "epoch": 0.04311772891207012, "grad_norm": 299.3908386230469, "learning_rate": 1.999098632087581e-06, "loss": 20.8672, "step": 4555 }, { "epoch": 0.043127194933785175, "grad_norm": 283.8505554199219, "learning_rate": 1.9990973301992974e-06, "loss": 29.0312, "step": 4556 }, { "epoch": 0.043136660955500235, "grad_norm": 279.1064453125, "learning_rate": 1.9990960273719277e-06, "loss": 23.625, "step": 4557 }, { "epoch": 0.04314612697721529, "grad_norm": 812.0074462890625, "learning_rate": 1.9990947236054724e-06, "loss": 60.8906, "step": 4558 }, { "epoch": 0.04315559299893034, "grad_norm": 1061.6507568359375, "learning_rate": 1.999093418899933e-06, "loss": 45.5469, "step": 4559 }, { "epoch": 0.043165059020645394, "grad_norm": 336.2097473144531, "learning_rate": 1.999092113255311e-06, "loss": 33.375, "step": 4560 }, { "epoch": 0.04317452504236045, "grad_norm": 393.3794250488281, "learning_rate": 1.999090806671607e-06, "loss": 29.1797, "step": 4561 }, { "epoch": 0.0431839910640755, "grad_norm": 423.390625, "learning_rate": 1.999089499148823e-06, "loss": 26.2109, "step": 4562 }, { "epoch": 0.04319345708579055, "grad_norm": 404.48382568359375, "learning_rate": 1.99908819068696e-06, "loss": 24.7812, "step": 4563 }, { "epoch": 0.043202923107505606, "grad_norm": 609.75390625, "learning_rate": 1.9990868812860187e-06, "loss": 42.7031, "step": 4564 }, { "epoch": 0.043212389129220666, "grad_norm": 647.4332275390625, "learning_rate": 1.999085570946001e-06, "loss": 54.6562, "step": 4565 }, { "epoch": 0.04322185515093572, "grad_norm": 409.1749267578125, "learning_rate": 1.999084259666908e-06, "loss": 41.0938, "step": 4566 }, { "epoch": 0.04323132117265077, "grad_norm": 793.9755859375, "learning_rate": 1.9990829474487404e-06, "loss": 67.4375, "step": 4567 }, { "epoch": 0.043240787194365825, "grad_norm": 938.6893920898438, "learning_rate": 1.9990816342915e-06, "loss": 43.3125, "step": 4568 }, { "epoch": 0.04325025321608088, "grad_norm": 171.53858947753906, "learning_rate": 1.999080320195188e-06, "loss": 24.7656, "step": 4569 }, { "epoch": 0.04325971923779593, "grad_norm": 181.8933563232422, "learning_rate": 1.9990790051598055e-06, "loss": 22.6875, "step": 4570 }, { "epoch": 0.043269185259510984, "grad_norm": 524.4183349609375, "learning_rate": 1.999077689185353e-06, "loss": 36.0469, "step": 4571 }, { "epoch": 0.04327865128122604, "grad_norm": 1159.501953125, "learning_rate": 1.9990763722718335e-06, "loss": 29.5312, "step": 4572 }, { "epoch": 0.04328811730294109, "grad_norm": 3.701143264770508, "learning_rate": 1.9990750544192466e-06, "loss": 0.9385, "step": 4573 }, { "epoch": 0.04329758332465615, "grad_norm": 345.0570373535156, "learning_rate": 1.9990737356275944e-06, "loss": 28.1094, "step": 4574 }, { "epoch": 0.0433070493463712, "grad_norm": 2.64680814743042, "learning_rate": 1.9990724158968775e-06, "loss": 0.772, "step": 4575 }, { "epoch": 0.043316515368086256, "grad_norm": 292.7619323730469, "learning_rate": 1.9990710952270983e-06, "loss": 34.5, "step": 4576 }, { "epoch": 0.04332598138980131, "grad_norm": 1110.42724609375, "learning_rate": 1.9990697736182565e-06, "loss": 79.5156, "step": 4577 }, { "epoch": 0.04333544741151636, "grad_norm": 291.5158996582031, "learning_rate": 1.9990684510703546e-06, "loss": 23.6094, "step": 4578 }, { "epoch": 0.043344913433231415, "grad_norm": 310.3685302734375, "learning_rate": 1.999067127583393e-06, "loss": 32.7188, "step": 4579 }, { "epoch": 0.04335437945494647, "grad_norm": 405.0980224609375, "learning_rate": 1.9990658031573734e-06, "loss": 49.9062, "step": 4580 }, { "epoch": 0.04336384547666152, "grad_norm": 667.5455932617188, "learning_rate": 1.999064477792297e-06, "loss": 27.6016, "step": 4581 }, { "epoch": 0.043373311498376574, "grad_norm": 279.98175048828125, "learning_rate": 1.999063151488165e-06, "loss": 31.8438, "step": 4582 }, { "epoch": 0.043382777520091634, "grad_norm": 625.1840209960938, "learning_rate": 1.9990618242449785e-06, "loss": 63.7344, "step": 4583 }, { "epoch": 0.04339224354180669, "grad_norm": 1308.4615478515625, "learning_rate": 1.999060496062739e-06, "loss": 60.75, "step": 4584 }, { "epoch": 0.04340170956352174, "grad_norm": 305.14569091796875, "learning_rate": 1.9990591669414474e-06, "loss": 21.8828, "step": 4585 }, { "epoch": 0.04341117558523679, "grad_norm": 508.3394470214844, "learning_rate": 1.9990578368811057e-06, "loss": 41.7812, "step": 4586 }, { "epoch": 0.043420641606951846, "grad_norm": 643.6275024414062, "learning_rate": 1.999056505881714e-06, "loss": 56.1094, "step": 4587 }, { "epoch": 0.0434301076286669, "grad_norm": 356.34478759765625, "learning_rate": 1.9990551739432745e-06, "loss": 47.5312, "step": 4588 }, { "epoch": 0.04343957365038195, "grad_norm": 423.7814636230469, "learning_rate": 1.9990538410657884e-06, "loss": 22.5859, "step": 4589 }, { "epoch": 0.043449039672097005, "grad_norm": 483.5542297363281, "learning_rate": 1.9990525072492562e-06, "loss": 28.7109, "step": 4590 }, { "epoch": 0.043458505693812065, "grad_norm": 780.8350219726562, "learning_rate": 1.99905117249368e-06, "loss": 19.8203, "step": 4591 }, { "epoch": 0.04346797171552712, "grad_norm": 533.1849975585938, "learning_rate": 1.9990498367990606e-06, "loss": 42.7812, "step": 4592 }, { "epoch": 0.04347743773724217, "grad_norm": 487.14422607421875, "learning_rate": 1.999048500165399e-06, "loss": 46.75, "step": 4593 }, { "epoch": 0.043486903758957224, "grad_norm": 324.9021301269531, "learning_rate": 1.999047162592697e-06, "loss": 33.9844, "step": 4594 }, { "epoch": 0.04349636978067228, "grad_norm": 2.7335622310638428, "learning_rate": 1.9990458240809556e-06, "loss": 1.0059, "step": 4595 }, { "epoch": 0.04350583580238733, "grad_norm": 2.9918503761291504, "learning_rate": 1.9990444846301763e-06, "loss": 0.8687, "step": 4596 }, { "epoch": 0.04351530182410238, "grad_norm": 290.5342102050781, "learning_rate": 1.9990431442403605e-06, "loss": 20.7188, "step": 4597 }, { "epoch": 0.043524767845817436, "grad_norm": 387.83148193359375, "learning_rate": 1.9990418029115083e-06, "loss": 24.1484, "step": 4598 }, { "epoch": 0.04353423386753249, "grad_norm": 202.5797576904297, "learning_rate": 1.9990404606436223e-06, "loss": 22.1719, "step": 4599 }, { "epoch": 0.04354369988924755, "grad_norm": 203.2494659423828, "learning_rate": 1.9990391174367033e-06, "loss": 29.5078, "step": 4600 }, { "epoch": 0.0435531659109626, "grad_norm": 497.4156188964844, "learning_rate": 1.9990377732907525e-06, "loss": 45.2812, "step": 4601 }, { "epoch": 0.043562631932677655, "grad_norm": 349.16070556640625, "learning_rate": 1.999036428205771e-06, "loss": 25.0156, "step": 4602 }, { "epoch": 0.04357209795439271, "grad_norm": 283.7783203125, "learning_rate": 1.9990350821817604e-06, "loss": 22.3281, "step": 4603 }, { "epoch": 0.04358156397610776, "grad_norm": 602.7009887695312, "learning_rate": 1.9990337352187216e-06, "loss": 41.4141, "step": 4604 }, { "epoch": 0.043591029997822814, "grad_norm": 611.7061157226562, "learning_rate": 1.9990323873166566e-06, "loss": 42.75, "step": 4605 }, { "epoch": 0.04360049601953787, "grad_norm": 188.66864013671875, "learning_rate": 1.9990310384755658e-06, "loss": 23.0625, "step": 4606 }, { "epoch": 0.04360996204125292, "grad_norm": 706.3943481445312, "learning_rate": 1.9990296886954504e-06, "loss": 65.7344, "step": 4607 }, { "epoch": 0.04361942806296798, "grad_norm": 282.3215637207031, "learning_rate": 1.9990283379763127e-06, "loss": 25.0781, "step": 4608 }, { "epoch": 0.04362889408468303, "grad_norm": 376.2720031738281, "learning_rate": 1.9990269863181533e-06, "loss": 41.875, "step": 4609 }, { "epoch": 0.043638360106398086, "grad_norm": 421.55670166015625, "learning_rate": 1.9990256337209732e-06, "loss": 28.125, "step": 4610 }, { "epoch": 0.04364782612811314, "grad_norm": 485.9654235839844, "learning_rate": 1.999024280184774e-06, "loss": 40.8594, "step": 4611 }, { "epoch": 0.04365729214982819, "grad_norm": 716.7913208007812, "learning_rate": 1.9990229257095573e-06, "loss": 54.5156, "step": 4612 }, { "epoch": 0.043666758171543245, "grad_norm": 825.0210571289062, "learning_rate": 1.9990215702953243e-06, "loss": 27.6172, "step": 4613 }, { "epoch": 0.0436762241932583, "grad_norm": 1013.9514770507812, "learning_rate": 1.9990202139420753e-06, "loss": 74.9531, "step": 4614 }, { "epoch": 0.04368569021497335, "grad_norm": 450.6729736328125, "learning_rate": 1.999018856649813e-06, "loss": 39.5938, "step": 4615 }, { "epoch": 0.043695156236688404, "grad_norm": 513.8416748046875, "learning_rate": 1.999017498418537e-06, "loss": 61.0156, "step": 4616 }, { "epoch": 0.043704622258403464, "grad_norm": 216.2947235107422, "learning_rate": 1.99901613924825e-06, "loss": 21.2656, "step": 4617 }, { "epoch": 0.04371408828011852, "grad_norm": 394.2510070800781, "learning_rate": 1.999014779138953e-06, "loss": 47.1641, "step": 4618 }, { "epoch": 0.04372355430183357, "grad_norm": 3.6238012313842773, "learning_rate": 1.999013418090647e-06, "loss": 1.0161, "step": 4619 }, { "epoch": 0.04373302032354862, "grad_norm": 209.38743591308594, "learning_rate": 1.9990120561033333e-06, "loss": 25.2031, "step": 4620 }, { "epoch": 0.043742486345263676, "grad_norm": 236.1110382080078, "learning_rate": 1.9990106931770138e-06, "loss": 24.4844, "step": 4621 }, { "epoch": 0.04375195236697873, "grad_norm": 796.129638671875, "learning_rate": 1.9990093293116883e-06, "loss": 30.75, "step": 4622 }, { "epoch": 0.04376141838869378, "grad_norm": 266.05755615234375, "learning_rate": 1.99900796450736e-06, "loss": 26.1875, "step": 4623 }, { "epoch": 0.043770884410408835, "grad_norm": 1245.3095703125, "learning_rate": 1.9990065987640286e-06, "loss": 86.2812, "step": 4624 }, { "epoch": 0.04378035043212389, "grad_norm": 213.57850646972656, "learning_rate": 1.999005232081696e-06, "loss": 25.0625, "step": 4625 }, { "epoch": 0.04378981645383895, "grad_norm": 257.6995849609375, "learning_rate": 1.9990038644603636e-06, "loss": 26.5703, "step": 4626 }, { "epoch": 0.043799282475554, "grad_norm": 2.8526198863983154, "learning_rate": 1.9990024959000323e-06, "loss": 0.9038, "step": 4627 }, { "epoch": 0.043808748497269054, "grad_norm": 189.022705078125, "learning_rate": 1.9990011264007037e-06, "loss": 27.9219, "step": 4628 }, { "epoch": 0.04381821451898411, "grad_norm": 747.3338012695312, "learning_rate": 1.9989997559623797e-06, "loss": 30.3281, "step": 4629 }, { "epoch": 0.04382768054069916, "grad_norm": 3.5929980278015137, "learning_rate": 1.9989983845850604e-06, "loss": 0.8794, "step": 4630 }, { "epoch": 0.04383714656241421, "grad_norm": 639.0078735351562, "learning_rate": 1.9989970122687475e-06, "loss": 37.7734, "step": 4631 }, { "epoch": 0.043846612584129266, "grad_norm": 220.7720184326172, "learning_rate": 1.998995639013442e-06, "loss": 28.75, "step": 4632 }, { "epoch": 0.04385607860584432, "grad_norm": 543.9453735351562, "learning_rate": 1.9989942648191467e-06, "loss": 45.9805, "step": 4633 }, { "epoch": 0.04386554462755938, "grad_norm": 695.1509399414062, "learning_rate": 1.998992889685861e-06, "loss": 24.2109, "step": 4634 }, { "epoch": 0.04387501064927443, "grad_norm": 291.5687561035156, "learning_rate": 1.998991513613587e-06, "loss": 23.3906, "step": 4635 }, { "epoch": 0.043884476670989485, "grad_norm": 1378.4710693359375, "learning_rate": 1.9989901366023265e-06, "loss": 44.5938, "step": 4636 }, { "epoch": 0.04389394269270454, "grad_norm": 407.33343505859375, "learning_rate": 1.9989887586520798e-06, "loss": 53.5, "step": 4637 }, { "epoch": 0.04390340871441959, "grad_norm": 2.8792850971221924, "learning_rate": 1.9989873797628488e-06, "loss": 0.8442, "step": 4638 }, { "epoch": 0.043912874736134644, "grad_norm": 3.3069965839385986, "learning_rate": 1.9989859999346343e-06, "loss": 1.0176, "step": 4639 }, { "epoch": 0.0439223407578497, "grad_norm": 745.7233276367188, "learning_rate": 1.998984619167438e-06, "loss": 75.9844, "step": 4640 }, { "epoch": 0.04393180677956475, "grad_norm": 413.80999755859375, "learning_rate": 1.9989832374612614e-06, "loss": 54.625, "step": 4641 }, { "epoch": 0.0439412728012798, "grad_norm": 2.845954179763794, "learning_rate": 1.998981854816106e-06, "loss": 0.8521, "step": 4642 }, { "epoch": 0.04395073882299486, "grad_norm": 479.3583984375, "learning_rate": 1.998980471231972e-06, "loss": 36.3438, "step": 4643 }, { "epoch": 0.043960204844709916, "grad_norm": 459.5681457519531, "learning_rate": 1.9989790867088615e-06, "loss": 24.8594, "step": 4644 }, { "epoch": 0.04396967086642497, "grad_norm": 521.2114868164062, "learning_rate": 1.9989777012467756e-06, "loss": 26.2344, "step": 4645 }, { "epoch": 0.04397913688814002, "grad_norm": 344.7895202636719, "learning_rate": 1.998976314845716e-06, "loss": 34.8906, "step": 4646 }, { "epoch": 0.043988602909855075, "grad_norm": 430.0755615234375, "learning_rate": 1.9989749275056834e-06, "loss": 49.1719, "step": 4647 }, { "epoch": 0.04399806893157013, "grad_norm": 284.45013427734375, "learning_rate": 1.9989735392266792e-06, "loss": 28.0312, "step": 4648 }, { "epoch": 0.04400753495328518, "grad_norm": 261.3016357421875, "learning_rate": 1.9989721500087048e-06, "loss": 31.7969, "step": 4649 }, { "epoch": 0.044017000975000234, "grad_norm": 787.9575805664062, "learning_rate": 1.9989707598517617e-06, "loss": 35.3828, "step": 4650 }, { "epoch": 0.044026466996715294, "grad_norm": 267.0346374511719, "learning_rate": 1.9989693687558512e-06, "loss": 23.625, "step": 4651 }, { "epoch": 0.04403593301843035, "grad_norm": 2.9852328300476074, "learning_rate": 1.9989679767209747e-06, "loss": 0.7939, "step": 4652 }, { "epoch": 0.0440453990401454, "grad_norm": 3.899380683898926, "learning_rate": 1.998966583747133e-06, "loss": 0.8748, "step": 4653 }, { "epoch": 0.04405486506186045, "grad_norm": 357.708984375, "learning_rate": 1.9989651898343275e-06, "loss": 47.8281, "step": 4654 }, { "epoch": 0.044064331083575506, "grad_norm": 233.02635192871094, "learning_rate": 1.99896379498256e-06, "loss": 24.5938, "step": 4655 }, { "epoch": 0.04407379710529056, "grad_norm": 300.4920654296875, "learning_rate": 1.9989623991918313e-06, "loss": 25.3906, "step": 4656 }, { "epoch": 0.04408326312700561, "grad_norm": 662.3690795898438, "learning_rate": 1.998961002462143e-06, "loss": 40.7031, "step": 4657 }, { "epoch": 0.044092729148720665, "grad_norm": 238.47482299804688, "learning_rate": 1.9989596047934965e-06, "loss": 29.5156, "step": 4658 }, { "epoch": 0.04410219517043572, "grad_norm": 217.72573852539062, "learning_rate": 1.998958206185893e-06, "loss": 20.5938, "step": 4659 }, { "epoch": 0.04411166119215078, "grad_norm": 236.90574645996094, "learning_rate": 1.9989568066393333e-06, "loss": 25.4844, "step": 4660 }, { "epoch": 0.04412112721386583, "grad_norm": 185.92510986328125, "learning_rate": 1.9989554061538196e-06, "loss": 24.8359, "step": 4661 }, { "epoch": 0.044130593235580884, "grad_norm": 444.87554931640625, "learning_rate": 1.998954004729353e-06, "loss": 19.0352, "step": 4662 }, { "epoch": 0.04414005925729594, "grad_norm": 2.925123691558838, "learning_rate": 1.998952602365934e-06, "loss": 0.9385, "step": 4663 }, { "epoch": 0.04414952527901099, "grad_norm": 979.5089721679688, "learning_rate": 1.998951199063565e-06, "loss": 44.1641, "step": 4664 }, { "epoch": 0.04415899130072604, "grad_norm": 268.52423095703125, "learning_rate": 1.9989497948222467e-06, "loss": 29.9688, "step": 4665 }, { "epoch": 0.044168457322441096, "grad_norm": 460.07366943359375, "learning_rate": 1.998948389641981e-06, "loss": 41.0, "step": 4666 }, { "epoch": 0.04417792334415615, "grad_norm": 625.9583740234375, "learning_rate": 1.9989469835227683e-06, "loss": 41.3594, "step": 4667 }, { "epoch": 0.0441873893658712, "grad_norm": 773.0640258789062, "learning_rate": 1.99894557646461e-06, "loss": 37.8359, "step": 4668 }, { "epoch": 0.04419685538758626, "grad_norm": 397.5743713378906, "learning_rate": 1.9989441684675086e-06, "loss": 20.8711, "step": 4669 }, { "epoch": 0.044206321409301315, "grad_norm": 472.1509704589844, "learning_rate": 1.9989427595314647e-06, "loss": 58.3438, "step": 4670 }, { "epoch": 0.04421578743101637, "grad_norm": 715.871826171875, "learning_rate": 1.998941349656479e-06, "loss": 23.3281, "step": 4671 }, { "epoch": 0.04422525345273142, "grad_norm": 583.1936645507812, "learning_rate": 1.9989399388425537e-06, "loss": 43.9609, "step": 4672 }, { "epoch": 0.044234719474446474, "grad_norm": 275.5933837890625, "learning_rate": 1.99893852708969e-06, "loss": 22.2188, "step": 4673 }, { "epoch": 0.04424418549616153, "grad_norm": 771.736572265625, "learning_rate": 1.998937114397889e-06, "loss": 19.5078, "step": 4674 }, { "epoch": 0.04425365151787658, "grad_norm": 463.2862854003906, "learning_rate": 1.998935700767152e-06, "loss": 29.0469, "step": 4675 }, { "epoch": 0.04426311753959163, "grad_norm": 207.92591857910156, "learning_rate": 1.9989342861974808e-06, "loss": 20.4219, "step": 4676 }, { "epoch": 0.04427258356130669, "grad_norm": 332.8160400390625, "learning_rate": 1.998932870688876e-06, "loss": 24.3203, "step": 4677 }, { "epoch": 0.044282049583021746, "grad_norm": 312.20440673828125, "learning_rate": 1.998931454241339e-06, "loss": 21.5703, "step": 4678 }, { "epoch": 0.0442915156047368, "grad_norm": 279.5443115234375, "learning_rate": 1.998930036854872e-06, "loss": 35.7031, "step": 4679 }, { "epoch": 0.04430098162645185, "grad_norm": 261.0059814453125, "learning_rate": 1.9989286185294755e-06, "loss": 31.0469, "step": 4680 }, { "epoch": 0.044310447648166905, "grad_norm": 228.44863891601562, "learning_rate": 1.998927199265151e-06, "loss": 27.6875, "step": 4681 }, { "epoch": 0.04431991366988196, "grad_norm": 313.5419616699219, "learning_rate": 1.9989257790619e-06, "loss": 27.875, "step": 4682 }, { "epoch": 0.04432937969159701, "grad_norm": 431.52392578125, "learning_rate": 1.998924357919724e-06, "loss": 28.3203, "step": 4683 }, { "epoch": 0.044338845713312064, "grad_norm": 3.209986448287964, "learning_rate": 1.9989229358386235e-06, "loss": 0.8555, "step": 4684 }, { "epoch": 0.04434831173502712, "grad_norm": 187.57301330566406, "learning_rate": 1.998921512818601e-06, "loss": 28.3281, "step": 4685 }, { "epoch": 0.04435777775674218, "grad_norm": 711.5709228515625, "learning_rate": 1.9989200888596568e-06, "loss": 58.0312, "step": 4686 }, { "epoch": 0.04436724377845723, "grad_norm": 409.4300231933594, "learning_rate": 1.9989186639617933e-06, "loss": 36.1562, "step": 4687 }, { "epoch": 0.04437670980017228, "grad_norm": 254.3749542236328, "learning_rate": 1.9989172381250107e-06, "loss": 25.3906, "step": 4688 }, { "epoch": 0.044386175821887336, "grad_norm": 413.6383361816406, "learning_rate": 1.9989158113493108e-06, "loss": 19.3438, "step": 4689 }, { "epoch": 0.04439564184360239, "grad_norm": 689.552001953125, "learning_rate": 1.9989143836346956e-06, "loss": 25.1016, "step": 4690 }, { "epoch": 0.04440510786531744, "grad_norm": 530.51513671875, "learning_rate": 1.9989129549811655e-06, "loss": 25.8125, "step": 4691 }, { "epoch": 0.044414573887032495, "grad_norm": 708.2060546875, "learning_rate": 1.9989115253887223e-06, "loss": 48.8047, "step": 4692 }, { "epoch": 0.04442403990874755, "grad_norm": 408.46923828125, "learning_rate": 1.998910094857367e-06, "loss": 31.0312, "step": 4693 }, { "epoch": 0.04443350593046261, "grad_norm": 383.8424072265625, "learning_rate": 1.9989086633871015e-06, "loss": 25.5781, "step": 4694 }, { "epoch": 0.04444297195217766, "grad_norm": 625.19482421875, "learning_rate": 1.998907230977927e-06, "loss": 41.6094, "step": 4695 }, { "epoch": 0.044452437973892714, "grad_norm": 437.831298828125, "learning_rate": 1.998905797629844e-06, "loss": 26.0469, "step": 4696 }, { "epoch": 0.04446190399560777, "grad_norm": 264.7362060546875, "learning_rate": 1.998904363342855e-06, "loss": 24.8594, "step": 4697 }, { "epoch": 0.04447137001732282, "grad_norm": 1333.9796142578125, "learning_rate": 1.998902928116961e-06, "loss": 42.2422, "step": 4698 }, { "epoch": 0.04448083603903787, "grad_norm": 402.5457763671875, "learning_rate": 1.998901491952163e-06, "loss": 36.5469, "step": 4699 }, { "epoch": 0.044490302060752926, "grad_norm": 184.3919677734375, "learning_rate": 1.9989000548484628e-06, "loss": 28.5859, "step": 4700 }, { "epoch": 0.04449976808246798, "grad_norm": 275.09710693359375, "learning_rate": 1.9988986168058613e-06, "loss": 30.9219, "step": 4701 }, { "epoch": 0.04450923410418303, "grad_norm": 523.4938354492188, "learning_rate": 1.99889717782436e-06, "loss": 44.875, "step": 4702 }, { "epoch": 0.04451870012589809, "grad_norm": 259.0872802734375, "learning_rate": 1.9988957379039605e-06, "loss": 27.4531, "step": 4703 }, { "epoch": 0.044528166147613145, "grad_norm": 522.5665893554688, "learning_rate": 1.998894297044664e-06, "loss": 35.0, "step": 4704 }, { "epoch": 0.0445376321693282, "grad_norm": 580.13623046875, "learning_rate": 1.998892855246472e-06, "loss": 62.2188, "step": 4705 }, { "epoch": 0.04454709819104325, "grad_norm": 321.9450378417969, "learning_rate": 1.9988914125093855e-06, "loss": 23.9766, "step": 4706 }, { "epoch": 0.044556564212758304, "grad_norm": 289.5340881347656, "learning_rate": 1.998889968833406e-06, "loss": 24.8438, "step": 4707 }, { "epoch": 0.04456603023447336, "grad_norm": 270.19000244140625, "learning_rate": 1.998888524218535e-06, "loss": 11.4258, "step": 4708 }, { "epoch": 0.04457549625618841, "grad_norm": 1326.96337890625, "learning_rate": 1.9988870786647734e-06, "loss": 103.8125, "step": 4709 }, { "epoch": 0.04458496227790346, "grad_norm": 204.0852813720703, "learning_rate": 1.9988856321721235e-06, "loss": 19.0859, "step": 4710 }, { "epoch": 0.044594428299618516, "grad_norm": 278.0, "learning_rate": 1.998884184740586e-06, "loss": 29.375, "step": 4711 }, { "epoch": 0.044603894321333576, "grad_norm": 306.1171569824219, "learning_rate": 1.998882736370162e-06, "loss": 30.4297, "step": 4712 }, { "epoch": 0.04461336034304863, "grad_norm": 509.03497314453125, "learning_rate": 1.9988812870608534e-06, "loss": 60.25, "step": 4713 }, { "epoch": 0.04462282636476368, "grad_norm": 360.24713134765625, "learning_rate": 1.9988798368126618e-06, "loss": 23.3594, "step": 4714 }, { "epoch": 0.044632292386478735, "grad_norm": 845.513916015625, "learning_rate": 1.9988783856255875e-06, "loss": 46.2383, "step": 4715 }, { "epoch": 0.04464175840819379, "grad_norm": 394.376220703125, "learning_rate": 1.9988769334996327e-06, "loss": 38.4531, "step": 4716 }, { "epoch": 0.04465122442990884, "grad_norm": 728.5464477539062, "learning_rate": 1.9988754804347986e-06, "loss": 56.8438, "step": 4717 }, { "epoch": 0.044660690451623894, "grad_norm": 427.53985595703125, "learning_rate": 1.9988740264310865e-06, "loss": 30.5938, "step": 4718 }, { "epoch": 0.04467015647333895, "grad_norm": 386.4418640136719, "learning_rate": 1.9988725714884976e-06, "loss": 48.8594, "step": 4719 }, { "epoch": 0.04467962249505401, "grad_norm": 1181.9637451171875, "learning_rate": 1.9988711156070338e-06, "loss": 39.6172, "step": 4720 }, { "epoch": 0.04468908851676906, "grad_norm": 596.3143310546875, "learning_rate": 1.998869658786696e-06, "loss": 31.9844, "step": 4721 }, { "epoch": 0.04469855453848411, "grad_norm": 383.9720458984375, "learning_rate": 1.998868201027486e-06, "loss": 31.1406, "step": 4722 }, { "epoch": 0.044708020560199166, "grad_norm": 404.9472961425781, "learning_rate": 1.9988667423294046e-06, "loss": 37.5312, "step": 4723 }, { "epoch": 0.04471748658191422, "grad_norm": 962.7823486328125, "learning_rate": 1.9988652826924533e-06, "loss": 68.7812, "step": 4724 }, { "epoch": 0.04472695260362927, "grad_norm": 267.6839599609375, "learning_rate": 1.9988638221166338e-06, "loss": 32.9531, "step": 4725 }, { "epoch": 0.044736418625344325, "grad_norm": 583.6217041015625, "learning_rate": 1.9988623606019472e-06, "loss": 79.7812, "step": 4726 }, { "epoch": 0.04474588464705938, "grad_norm": 902.021484375, "learning_rate": 1.998860898148395e-06, "loss": 25.0, "step": 4727 }, { "epoch": 0.04475535066877443, "grad_norm": 256.37774658203125, "learning_rate": 1.9988594347559787e-06, "loss": 25.9297, "step": 4728 }, { "epoch": 0.04476481669048949, "grad_norm": 351.54974365234375, "learning_rate": 1.998857970424699e-06, "loss": 28.7969, "step": 4729 }, { "epoch": 0.044774282712204544, "grad_norm": 741.7042846679688, "learning_rate": 1.9988565051545582e-06, "loss": 51.7031, "step": 4730 }, { "epoch": 0.0447837487339196, "grad_norm": 269.14385986328125, "learning_rate": 1.998855038945557e-06, "loss": 27.8281, "step": 4731 }, { "epoch": 0.04479321475563465, "grad_norm": 247.0953826904297, "learning_rate": 1.9988535717976974e-06, "loss": 27.3906, "step": 4732 }, { "epoch": 0.0448026807773497, "grad_norm": 688.0090942382812, "learning_rate": 1.9988521037109804e-06, "loss": 20.5391, "step": 4733 }, { "epoch": 0.044812146799064756, "grad_norm": 400.0714111328125, "learning_rate": 1.998850634685407e-06, "loss": 28.2891, "step": 4734 }, { "epoch": 0.04482161282077981, "grad_norm": 394.7759704589844, "learning_rate": 1.9988491647209795e-06, "loss": 26.5625, "step": 4735 }, { "epoch": 0.04483107884249486, "grad_norm": 204.69766235351562, "learning_rate": 1.9988476938176984e-06, "loss": 28.6406, "step": 4736 }, { "epoch": 0.04484054486420992, "grad_norm": 245.25743103027344, "learning_rate": 1.9988462219755656e-06, "loss": 25.2031, "step": 4737 }, { "epoch": 0.044850010885924975, "grad_norm": 705.1973266601562, "learning_rate": 1.9988447491945824e-06, "loss": 58.125, "step": 4738 }, { "epoch": 0.04485947690764003, "grad_norm": 298.877197265625, "learning_rate": 1.99884327547475e-06, "loss": 22.0156, "step": 4739 }, { "epoch": 0.04486894292935508, "grad_norm": 330.7499084472656, "learning_rate": 1.99884180081607e-06, "loss": 20.332, "step": 4740 }, { "epoch": 0.044878408951070134, "grad_norm": 408.70806884765625, "learning_rate": 1.9988403252185437e-06, "loss": 35.1484, "step": 4741 }, { "epoch": 0.04488787497278519, "grad_norm": 493.42620849609375, "learning_rate": 1.998838848682172e-06, "loss": 37.6875, "step": 4742 }, { "epoch": 0.04489734099450024, "grad_norm": 2.699302911758423, "learning_rate": 1.9988373712069572e-06, "loss": 0.8174, "step": 4743 }, { "epoch": 0.04490680701621529, "grad_norm": 481.3215637207031, "learning_rate": 1.9988358927929003e-06, "loss": 50.4688, "step": 4744 }, { "epoch": 0.044916273037930346, "grad_norm": 426.9627685546875, "learning_rate": 1.9988344134400027e-06, "loss": 34.7031, "step": 4745 }, { "epoch": 0.044925739059645406, "grad_norm": 344.6164245605469, "learning_rate": 1.9988329331482652e-06, "loss": 27.4922, "step": 4746 }, { "epoch": 0.04493520508136046, "grad_norm": 422.5010070800781, "learning_rate": 1.9988314519176904e-06, "loss": 41.9297, "step": 4747 }, { "epoch": 0.04494467110307551, "grad_norm": 394.2560729980469, "learning_rate": 1.9988299697482787e-06, "loss": 49.9531, "step": 4748 }, { "epoch": 0.044954137124790565, "grad_norm": 255.1030731201172, "learning_rate": 1.9988284866400317e-06, "loss": 29.7031, "step": 4749 }, { "epoch": 0.04496360314650562, "grad_norm": 820.7769775390625, "learning_rate": 1.998827002592951e-06, "loss": 24.7344, "step": 4750 }, { "epoch": 0.04497306916822067, "grad_norm": 292.0220031738281, "learning_rate": 1.9988255176070375e-06, "loss": 26.0312, "step": 4751 }, { "epoch": 0.044982535189935724, "grad_norm": 1141.03173828125, "learning_rate": 1.9988240316822934e-06, "loss": 59.3438, "step": 4752 }, { "epoch": 0.04499200121165078, "grad_norm": 969.8206176757812, "learning_rate": 1.99882254481872e-06, "loss": 37.3828, "step": 4753 }, { "epoch": 0.04500146723336583, "grad_norm": 457.51177978515625, "learning_rate": 1.998821057016318e-06, "loss": 54.9688, "step": 4754 }, { "epoch": 0.04501093325508089, "grad_norm": 223.54428100585938, "learning_rate": 1.9988195682750893e-06, "loss": 28.125, "step": 4755 }, { "epoch": 0.04502039927679594, "grad_norm": 597.4866943359375, "learning_rate": 1.9988180785950347e-06, "loss": 62.3125, "step": 4756 }, { "epoch": 0.045029865298510996, "grad_norm": 262.5660095214844, "learning_rate": 1.9988165879761567e-06, "loss": 26.0938, "step": 4757 }, { "epoch": 0.04503933132022605, "grad_norm": 466.81292724609375, "learning_rate": 1.998815096418456e-06, "loss": 48.5781, "step": 4758 }, { "epoch": 0.0450487973419411, "grad_norm": 671.2261352539062, "learning_rate": 1.9988136039219337e-06, "loss": 27.7188, "step": 4759 }, { "epoch": 0.045058263363656155, "grad_norm": 2.7984611988067627, "learning_rate": 1.9988121104865924e-06, "loss": 0.9058, "step": 4760 }, { "epoch": 0.04506772938537121, "grad_norm": 404.8177185058594, "learning_rate": 1.998810616112432e-06, "loss": 60.875, "step": 4761 }, { "epoch": 0.04507719540708626, "grad_norm": 686.510498046875, "learning_rate": 1.9988091207994546e-06, "loss": 43.4062, "step": 4762 }, { "epoch": 0.04508666142880132, "grad_norm": 334.0715637207031, "learning_rate": 1.998807624547662e-06, "loss": 19.8555, "step": 4763 }, { "epoch": 0.045096127450516374, "grad_norm": 227.77944946289062, "learning_rate": 1.9988061273570546e-06, "loss": 27.4531, "step": 4764 }, { "epoch": 0.04510559347223143, "grad_norm": 651.4144897460938, "learning_rate": 1.998804629227635e-06, "loss": 32.375, "step": 4765 }, { "epoch": 0.04511505949394648, "grad_norm": 526.6581420898438, "learning_rate": 1.998803130159404e-06, "loss": 25.2344, "step": 4766 }, { "epoch": 0.04512452551566153, "grad_norm": 2.8971078395843506, "learning_rate": 1.9988016301523626e-06, "loss": 0.8555, "step": 4767 }, { "epoch": 0.045133991537376586, "grad_norm": 466.20703125, "learning_rate": 1.9988001292065127e-06, "loss": 37.5312, "step": 4768 }, { "epoch": 0.04514345755909164, "grad_norm": 310.5910339355469, "learning_rate": 1.998798627321856e-06, "loss": 29.875, "step": 4769 }, { "epoch": 0.04515292358080669, "grad_norm": 173.25942993164062, "learning_rate": 1.9987971244983933e-06, "loss": 21.9531, "step": 4770 }, { "epoch": 0.045162389602521745, "grad_norm": 500.57537841796875, "learning_rate": 1.9987956207361267e-06, "loss": 21.8828, "step": 4771 }, { "epoch": 0.045171855624236805, "grad_norm": 218.7486572265625, "learning_rate": 1.9987941160350567e-06, "loss": 24.0156, "step": 4772 }, { "epoch": 0.04518132164595186, "grad_norm": 627.61474609375, "learning_rate": 1.9987926103951852e-06, "loss": 28.3242, "step": 4773 }, { "epoch": 0.04519078766766691, "grad_norm": 550.72705078125, "learning_rate": 1.9987911038165138e-06, "loss": 29.3672, "step": 4774 }, { "epoch": 0.045200253689381964, "grad_norm": 256.69903564453125, "learning_rate": 1.998789596299044e-06, "loss": 23.625, "step": 4775 }, { "epoch": 0.04520971971109702, "grad_norm": 420.2906494140625, "learning_rate": 1.9987880878427765e-06, "loss": 54.9062, "step": 4776 }, { "epoch": 0.04521918573281207, "grad_norm": 368.5379943847656, "learning_rate": 1.9987865784477137e-06, "loss": 29.1406, "step": 4777 }, { "epoch": 0.04522865175452712, "grad_norm": 230.60482788085938, "learning_rate": 1.998785068113856e-06, "loss": 29.0312, "step": 4778 }, { "epoch": 0.045238117776242176, "grad_norm": 387.39471435546875, "learning_rate": 1.9987835568412057e-06, "loss": 16.6406, "step": 4779 }, { "epoch": 0.045247583797957236, "grad_norm": 180.14146423339844, "learning_rate": 1.9987820446297634e-06, "loss": 21.1641, "step": 4780 }, { "epoch": 0.04525704981967229, "grad_norm": 607.1328125, "learning_rate": 1.998780531479531e-06, "loss": 43.6719, "step": 4781 }, { "epoch": 0.04526651584138734, "grad_norm": 392.7657165527344, "learning_rate": 1.99877901739051e-06, "loss": 39.875, "step": 4782 }, { "epoch": 0.045275981863102395, "grad_norm": 483.6501159667969, "learning_rate": 1.9987775023627018e-06, "loss": 25.5, "step": 4783 }, { "epoch": 0.04528544788481745, "grad_norm": 3.2879798412323, "learning_rate": 1.9987759863961075e-06, "loss": 0.9214, "step": 4784 }, { "epoch": 0.0452949139065325, "grad_norm": 498.51123046875, "learning_rate": 1.998774469490729e-06, "loss": 34.4688, "step": 4785 }, { "epoch": 0.045304379928247554, "grad_norm": 334.985595703125, "learning_rate": 1.9987729516465674e-06, "loss": 28.5, "step": 4786 }, { "epoch": 0.04531384594996261, "grad_norm": 914.745361328125, "learning_rate": 1.998771432863624e-06, "loss": 52.875, "step": 4787 }, { "epoch": 0.04532331197167766, "grad_norm": 1428.537353515625, "learning_rate": 1.9987699131419007e-06, "loss": 87.9375, "step": 4788 }, { "epoch": 0.04533277799339272, "grad_norm": 892.7255249023438, "learning_rate": 1.998768392481399e-06, "loss": 29.3359, "step": 4789 }, { "epoch": 0.04534224401510777, "grad_norm": 690.4173583984375, "learning_rate": 1.9987668708821193e-06, "loss": 56.3438, "step": 4790 }, { "epoch": 0.045351710036822826, "grad_norm": 218.13441467285156, "learning_rate": 1.998765348344064e-06, "loss": 25.7344, "step": 4791 }, { "epoch": 0.04536117605853788, "grad_norm": 240.02182006835938, "learning_rate": 1.998763824867234e-06, "loss": 20.2266, "step": 4792 }, { "epoch": 0.04537064208025293, "grad_norm": 476.3431701660156, "learning_rate": 1.998762300451631e-06, "loss": 34.7422, "step": 4793 }, { "epoch": 0.045380108101967985, "grad_norm": 457.1602783203125, "learning_rate": 1.9987607750972567e-06, "loss": 48.6875, "step": 4794 }, { "epoch": 0.04538957412368304, "grad_norm": 2.759016752243042, "learning_rate": 1.9987592488041123e-06, "loss": 0.8271, "step": 4795 }, { "epoch": 0.04539904014539809, "grad_norm": 182.53892517089844, "learning_rate": 1.9987577215721992e-06, "loss": 24.2656, "step": 4796 }, { "epoch": 0.045408506167113144, "grad_norm": 282.9500732421875, "learning_rate": 1.9987561934015187e-06, "loss": 26.5469, "step": 4797 }, { "epoch": 0.045417972188828204, "grad_norm": 295.39178466796875, "learning_rate": 1.998754664292072e-06, "loss": 22.9219, "step": 4798 }, { "epoch": 0.04542743821054326, "grad_norm": 597.5474853515625, "learning_rate": 1.998753134243861e-06, "loss": 60.3125, "step": 4799 }, { "epoch": 0.04543690423225831, "grad_norm": 261.8683776855469, "learning_rate": 1.9987516032568874e-06, "loss": 27.2188, "step": 4800 }, { "epoch": 0.04544637025397336, "grad_norm": 240.6015625, "learning_rate": 1.9987500713311523e-06, "loss": 28.9219, "step": 4801 }, { "epoch": 0.045455836275688416, "grad_norm": 667.9628295898438, "learning_rate": 1.998748538466657e-06, "loss": 31.8047, "step": 4802 }, { "epoch": 0.04546530229740347, "grad_norm": 2.2086353302001953, "learning_rate": 1.998747004663403e-06, "loss": 0.7109, "step": 4803 }, { "epoch": 0.04547476831911852, "grad_norm": 445.2358093261719, "learning_rate": 1.9987454699213915e-06, "loss": 23.6797, "step": 4804 }, { "epoch": 0.045484234340833575, "grad_norm": 276.59246826171875, "learning_rate": 1.9987439342406242e-06, "loss": 23.2031, "step": 4805 }, { "epoch": 0.045493700362548635, "grad_norm": 245.97760009765625, "learning_rate": 1.998742397621103e-06, "loss": 23.4062, "step": 4806 }, { "epoch": 0.04550316638426369, "grad_norm": 483.75946044921875, "learning_rate": 1.9987408600628287e-06, "loss": 32.0781, "step": 4807 }, { "epoch": 0.04551263240597874, "grad_norm": 894.851318359375, "learning_rate": 1.998739321565803e-06, "loss": 38.25, "step": 4808 }, { "epoch": 0.045522098427693794, "grad_norm": 2.996805191040039, "learning_rate": 1.9987377821300273e-06, "loss": 0.917, "step": 4809 }, { "epoch": 0.04553156444940885, "grad_norm": 411.85321044921875, "learning_rate": 1.998736241755503e-06, "loss": 26.9141, "step": 4810 }, { "epoch": 0.0455410304711239, "grad_norm": 191.93630981445312, "learning_rate": 1.9987347004422318e-06, "loss": 24.3047, "step": 4811 }, { "epoch": 0.04555049649283895, "grad_norm": 1233.132568359375, "learning_rate": 1.9987331581902146e-06, "loss": 28.1875, "step": 4812 }, { "epoch": 0.045559962514554006, "grad_norm": 785.2987060546875, "learning_rate": 1.9987316149994537e-06, "loss": 73.8125, "step": 4813 }, { "epoch": 0.04556942853626906, "grad_norm": 240.22434997558594, "learning_rate": 1.9987300708699495e-06, "loss": 25.875, "step": 4814 }, { "epoch": 0.04557889455798412, "grad_norm": 365.6331481933594, "learning_rate": 1.9987285258017045e-06, "loss": 24.6562, "step": 4815 }, { "epoch": 0.04558836057969917, "grad_norm": 2.4800925254821777, "learning_rate": 1.9987269797947195e-06, "loss": 0.8179, "step": 4816 }, { "epoch": 0.045597826601414225, "grad_norm": 476.07958984375, "learning_rate": 1.998725432848996e-06, "loss": 43.7969, "step": 4817 }, { "epoch": 0.04560729262312928, "grad_norm": 402.21630859375, "learning_rate": 1.9987238849645353e-06, "loss": 22.2656, "step": 4818 }, { "epoch": 0.04561675864484433, "grad_norm": 315.5611572265625, "learning_rate": 1.9987223361413395e-06, "loss": 24.6484, "step": 4819 }, { "epoch": 0.045626224666559384, "grad_norm": 1065.3551025390625, "learning_rate": 1.9987207863794096e-06, "loss": 51.3906, "step": 4820 }, { "epoch": 0.04563569068827444, "grad_norm": 619.24755859375, "learning_rate": 1.9987192356787474e-06, "loss": 44.2656, "step": 4821 }, { "epoch": 0.04564515670998949, "grad_norm": 836.4766235351562, "learning_rate": 1.998717684039354e-06, "loss": 46.125, "step": 4822 }, { "epoch": 0.04565462273170455, "grad_norm": 3.0622551441192627, "learning_rate": 1.998716131461231e-06, "loss": 0.8967, "step": 4823 }, { "epoch": 0.0456640887534196, "grad_norm": 634.5376586914062, "learning_rate": 1.998714577944379e-06, "loss": 39.6953, "step": 4824 }, { "epoch": 0.045673554775134656, "grad_norm": 374.5390930175781, "learning_rate": 1.998713023488801e-06, "loss": 21.5625, "step": 4825 }, { "epoch": 0.04568302079684971, "grad_norm": 745.7342529296875, "learning_rate": 1.9987114680944972e-06, "loss": 70.7812, "step": 4826 }, { "epoch": 0.04569248681856476, "grad_norm": 858.5900268554688, "learning_rate": 1.9987099117614704e-06, "loss": 36.2031, "step": 4827 }, { "epoch": 0.045701952840279815, "grad_norm": 318.5217590332031, "learning_rate": 1.9987083544897206e-06, "loss": 32.4219, "step": 4828 }, { "epoch": 0.04571141886199487, "grad_norm": 915.539306640625, "learning_rate": 1.9987067962792504e-06, "loss": 24.8281, "step": 4829 }, { "epoch": 0.04572088488370992, "grad_norm": 197.02090454101562, "learning_rate": 1.9987052371300605e-06, "loss": 26.0, "step": 4830 }, { "epoch": 0.045730350905424974, "grad_norm": 588.902587890625, "learning_rate": 1.9987036770421527e-06, "loss": 49.5312, "step": 4831 }, { "epoch": 0.045739816927140034, "grad_norm": 232.7539825439453, "learning_rate": 1.9987021160155283e-06, "loss": 24.2031, "step": 4832 }, { "epoch": 0.04574928294885509, "grad_norm": 223.51902770996094, "learning_rate": 1.998700554050189e-06, "loss": 23.4688, "step": 4833 }, { "epoch": 0.04575874897057014, "grad_norm": 354.3069152832031, "learning_rate": 1.9986989911461363e-06, "loss": 33.6875, "step": 4834 }, { "epoch": 0.04576821499228519, "grad_norm": 253.83914184570312, "learning_rate": 1.9986974273033713e-06, "loss": 26.0781, "step": 4835 }, { "epoch": 0.045777681014000246, "grad_norm": 196.713623046875, "learning_rate": 1.9986958625218956e-06, "loss": 21.1406, "step": 4836 }, { "epoch": 0.0457871470357153, "grad_norm": 409.67352294921875, "learning_rate": 1.998694296801711e-06, "loss": 34.8047, "step": 4837 }, { "epoch": 0.04579661305743035, "grad_norm": 1933.611328125, "learning_rate": 1.9986927301428183e-06, "loss": 34.3906, "step": 4838 }, { "epoch": 0.045806079079145405, "grad_norm": 926.9720458984375, "learning_rate": 1.99869116254522e-06, "loss": 44.6406, "step": 4839 }, { "epoch": 0.04581554510086046, "grad_norm": 506.21929931640625, "learning_rate": 1.9986895940089165e-06, "loss": 48.3125, "step": 4840 }, { "epoch": 0.04582501112257552, "grad_norm": 901.0478515625, "learning_rate": 1.99868802453391e-06, "loss": 63.8281, "step": 4841 }, { "epoch": 0.04583447714429057, "grad_norm": 400.96240234375, "learning_rate": 1.9986864541202014e-06, "loss": 30.2109, "step": 4842 }, { "epoch": 0.045843943166005624, "grad_norm": 547.9796142578125, "learning_rate": 1.998684882767793e-06, "loss": 56.5469, "step": 4843 }, { "epoch": 0.04585340918772068, "grad_norm": 681.2125244140625, "learning_rate": 1.9986833104766854e-06, "loss": 55.5469, "step": 4844 }, { "epoch": 0.04586287520943573, "grad_norm": 328.3799133300781, "learning_rate": 1.9986817372468806e-06, "loss": 30.4297, "step": 4845 }, { "epoch": 0.04587234123115078, "grad_norm": 291.5192565917969, "learning_rate": 1.9986801630783804e-06, "loss": 14.3633, "step": 4846 }, { "epoch": 0.045881807252865836, "grad_norm": 376.7858581542969, "learning_rate": 1.998678587971185e-06, "loss": 16.7656, "step": 4847 }, { "epoch": 0.04589127327458089, "grad_norm": 438.61810302734375, "learning_rate": 1.9986770119252973e-06, "loss": 21.75, "step": 4848 }, { "epoch": 0.04590073929629595, "grad_norm": 3.145371913909912, "learning_rate": 1.998675434940718e-06, "loss": 0.7842, "step": 4849 }, { "epoch": 0.045910205318011, "grad_norm": 196.5342254638672, "learning_rate": 1.9986738570174484e-06, "loss": 24.6016, "step": 4850 }, { "epoch": 0.045919671339726055, "grad_norm": 409.3076171875, "learning_rate": 1.9986722781554907e-06, "loss": 40.2891, "step": 4851 }, { "epoch": 0.04592913736144111, "grad_norm": 482.9364318847656, "learning_rate": 1.998670698354846e-06, "loss": 23.875, "step": 4852 }, { "epoch": 0.04593860338315616, "grad_norm": 581.1954345703125, "learning_rate": 1.998669117615516e-06, "loss": 53.9062, "step": 4853 }, { "epoch": 0.045948069404871214, "grad_norm": 1145.2845458984375, "learning_rate": 1.9986675359375017e-06, "loss": 59.0234, "step": 4854 }, { "epoch": 0.04595753542658627, "grad_norm": 166.15089416503906, "learning_rate": 1.998665953320805e-06, "loss": 32.0156, "step": 4855 }, { "epoch": 0.04596700144830132, "grad_norm": 646.8658447265625, "learning_rate": 1.998664369765427e-06, "loss": 53.3594, "step": 4856 }, { "epoch": 0.04597646747001637, "grad_norm": 3.5358567237854004, "learning_rate": 1.9986627852713695e-06, "loss": 1.0269, "step": 4857 }, { "epoch": 0.04598593349173143, "grad_norm": 278.1242370605469, "learning_rate": 1.998661199838634e-06, "loss": 22.4141, "step": 4858 }, { "epoch": 0.045995399513446486, "grad_norm": 756.1561279296875, "learning_rate": 1.9986596134672225e-06, "loss": 37.6289, "step": 4859 }, { "epoch": 0.04600486553516154, "grad_norm": 212.04299926757812, "learning_rate": 1.9986580261571352e-06, "loss": 22.2656, "step": 4860 }, { "epoch": 0.04601433155687659, "grad_norm": 427.7985534667969, "learning_rate": 1.998656437908375e-06, "loss": 47.4062, "step": 4861 }, { "epoch": 0.046023797578591645, "grad_norm": 459.7383117675781, "learning_rate": 1.998654848720942e-06, "loss": 50.8906, "step": 4862 }, { "epoch": 0.0460332636003067, "grad_norm": 337.75225830078125, "learning_rate": 1.9986532585948387e-06, "loss": 31.8125, "step": 4863 }, { "epoch": 0.04604272962202175, "grad_norm": 280.203857421875, "learning_rate": 1.9986516675300666e-06, "loss": 21.4375, "step": 4864 }, { "epoch": 0.046052195643736804, "grad_norm": 168.9491424560547, "learning_rate": 1.9986500755266265e-06, "loss": 22.5156, "step": 4865 }, { "epoch": 0.04606166166545186, "grad_norm": 567.06396484375, "learning_rate": 1.9986484825845206e-06, "loss": 25.5234, "step": 4866 }, { "epoch": 0.04607112768716692, "grad_norm": 221.18203735351562, "learning_rate": 1.99864688870375e-06, "loss": 21.9375, "step": 4867 }, { "epoch": 0.04608059370888197, "grad_norm": 690.2825927734375, "learning_rate": 1.998645293884316e-06, "loss": 55.6094, "step": 4868 }, { "epoch": 0.04609005973059702, "grad_norm": 1215.45654296875, "learning_rate": 1.9986436981262207e-06, "loss": 78.7266, "step": 4869 }, { "epoch": 0.046099525752312076, "grad_norm": 329.5827941894531, "learning_rate": 1.9986421014294656e-06, "loss": 36.1719, "step": 4870 }, { "epoch": 0.04610899177402713, "grad_norm": 585.6376342773438, "learning_rate": 1.9986405037940513e-06, "loss": 28.7969, "step": 4871 }, { "epoch": 0.04611845779574218, "grad_norm": 236.2150115966797, "learning_rate": 1.9986389052199798e-06, "loss": 24.2422, "step": 4872 }, { "epoch": 0.046127923817457235, "grad_norm": 390.70440673828125, "learning_rate": 1.9986373057072534e-06, "loss": 24.1406, "step": 4873 }, { "epoch": 0.04613738983917229, "grad_norm": 260.5181579589844, "learning_rate": 1.998635705255872e-06, "loss": 22.4141, "step": 4874 }, { "epoch": 0.04614685586088735, "grad_norm": 287.16168212890625, "learning_rate": 1.9986341038658387e-06, "loss": 30.3281, "step": 4875 }, { "epoch": 0.0461563218826024, "grad_norm": 502.43182373046875, "learning_rate": 1.9986325015371543e-06, "loss": 26.1562, "step": 4876 }, { "epoch": 0.046165787904317454, "grad_norm": 442.6324462890625, "learning_rate": 1.99863089826982e-06, "loss": 22.9844, "step": 4877 }, { "epoch": 0.04617525392603251, "grad_norm": 677.4999389648438, "learning_rate": 1.998629294063838e-06, "loss": 40.125, "step": 4878 }, { "epoch": 0.04618471994774756, "grad_norm": 195.59942626953125, "learning_rate": 1.998627688919209e-06, "loss": 22.4531, "step": 4879 }, { "epoch": 0.04619418596946261, "grad_norm": 666.4011840820312, "learning_rate": 1.998626082835935e-06, "loss": 42.4219, "step": 4880 }, { "epoch": 0.046203651991177666, "grad_norm": 221.5003204345703, "learning_rate": 1.9986244758140175e-06, "loss": 22.2812, "step": 4881 }, { "epoch": 0.04621311801289272, "grad_norm": 538.1617431640625, "learning_rate": 1.998622867853458e-06, "loss": 40.2812, "step": 4882 }, { "epoch": 0.04622258403460777, "grad_norm": 307.6631164550781, "learning_rate": 1.998621258954258e-06, "loss": 25.9922, "step": 4883 }, { "epoch": 0.04623205005632283, "grad_norm": 416.6775207519531, "learning_rate": 1.998619649116419e-06, "loss": 46.0781, "step": 4884 }, { "epoch": 0.046241516078037885, "grad_norm": 517.3224487304688, "learning_rate": 1.998618038339942e-06, "loss": 19.7031, "step": 4885 }, { "epoch": 0.04625098209975294, "grad_norm": 489.37139892578125, "learning_rate": 1.99861642662483e-06, "loss": 31.9375, "step": 4886 }, { "epoch": 0.04626044812146799, "grad_norm": 1272.1761474609375, "learning_rate": 1.9986148139710827e-06, "loss": 28.5156, "step": 4887 }, { "epoch": 0.046269914143183044, "grad_norm": 228.46556091308594, "learning_rate": 1.9986132003787028e-06, "loss": 25.7734, "step": 4888 }, { "epoch": 0.0462793801648981, "grad_norm": 720.1585083007812, "learning_rate": 1.9986115858476913e-06, "loss": 35.9531, "step": 4889 }, { "epoch": 0.04628884618661315, "grad_norm": 372.1622314453125, "learning_rate": 1.99860997037805e-06, "loss": 24.6562, "step": 4890 }, { "epoch": 0.0462983122083282, "grad_norm": 169.2445526123047, "learning_rate": 1.9986083539697807e-06, "loss": 20.4062, "step": 4891 }, { "epoch": 0.04630777823004326, "grad_norm": 686.4298095703125, "learning_rate": 1.9986067366228836e-06, "loss": 58.3828, "step": 4892 }, { "epoch": 0.046317244251758316, "grad_norm": 194.55233764648438, "learning_rate": 1.9986051183373617e-06, "loss": 31.1406, "step": 4893 }, { "epoch": 0.04632671027347337, "grad_norm": 845.1546630859375, "learning_rate": 1.9986034991132156e-06, "loss": 30.3594, "step": 4894 }, { "epoch": 0.04633617629518842, "grad_norm": 475.3451232910156, "learning_rate": 1.9986018789504477e-06, "loss": 29.3359, "step": 4895 }, { "epoch": 0.046345642316903475, "grad_norm": 281.85797119140625, "learning_rate": 1.9986002578490584e-06, "loss": 39.3984, "step": 4896 }, { "epoch": 0.04635510833861853, "grad_norm": 3.2843613624572754, "learning_rate": 1.9985986358090503e-06, "loss": 0.7766, "step": 4897 }, { "epoch": 0.04636457436033358, "grad_norm": 3.4960224628448486, "learning_rate": 1.9985970128304243e-06, "loss": 0.9668, "step": 4898 }, { "epoch": 0.046374040382048634, "grad_norm": 916.4220581054688, "learning_rate": 1.998595388913182e-06, "loss": 59.375, "step": 4899 }, { "epoch": 0.04638350640376369, "grad_norm": 261.834716796875, "learning_rate": 1.998593764057325e-06, "loss": 43.7969, "step": 4900 }, { "epoch": 0.04639297242547875, "grad_norm": 278.4112854003906, "learning_rate": 1.998592138262855e-06, "loss": 33.25, "step": 4901 }, { "epoch": 0.0464024384471938, "grad_norm": 673.2586059570312, "learning_rate": 1.998590511529773e-06, "loss": 34.2344, "step": 4902 }, { "epoch": 0.04641190446890885, "grad_norm": 254.03070068359375, "learning_rate": 1.998588883858081e-06, "loss": 25.2969, "step": 4903 }, { "epoch": 0.046421370490623906, "grad_norm": 369.7664489746094, "learning_rate": 1.9985872552477807e-06, "loss": 26.5625, "step": 4904 }, { "epoch": 0.04643083651233896, "grad_norm": 577.559814453125, "learning_rate": 1.9985856256988734e-06, "loss": 44.2422, "step": 4905 }, { "epoch": 0.04644030253405401, "grad_norm": 3.018941640853882, "learning_rate": 1.99858399521136e-06, "loss": 0.8472, "step": 4906 }, { "epoch": 0.046449768555769065, "grad_norm": 3.3745815753936768, "learning_rate": 1.998582363785243e-06, "loss": 0.957, "step": 4907 }, { "epoch": 0.04645923457748412, "grad_norm": 530.7095336914062, "learning_rate": 1.998580731420524e-06, "loss": 53.9375, "step": 4908 }, { "epoch": 0.04646870059919917, "grad_norm": 458.37310791015625, "learning_rate": 1.998579098117203e-06, "loss": 47.5938, "step": 4909 }, { "epoch": 0.04647816662091423, "grad_norm": 806.6177368164062, "learning_rate": 1.998577463875283e-06, "loss": 56.0469, "step": 4910 }, { "epoch": 0.046487632642629284, "grad_norm": 263.3856201171875, "learning_rate": 1.9985758286947654e-06, "loss": 27.0938, "step": 4911 }, { "epoch": 0.04649709866434434, "grad_norm": 2.7006289958953857, "learning_rate": 1.9985741925756514e-06, "loss": 0.8325, "step": 4912 }, { "epoch": 0.04650656468605939, "grad_norm": 2365.465576171875, "learning_rate": 1.9985725555179427e-06, "loss": 64.0312, "step": 4913 }, { "epoch": 0.04651603070777444, "grad_norm": 647.1509399414062, "learning_rate": 1.9985709175216407e-06, "loss": 20.1172, "step": 4914 }, { "epoch": 0.046525496729489496, "grad_norm": 302.82196044921875, "learning_rate": 1.998569278586747e-06, "loss": 30.25, "step": 4915 }, { "epoch": 0.04653496275120455, "grad_norm": 3.0986053943634033, "learning_rate": 1.998567638713263e-06, "loss": 1.0054, "step": 4916 }, { "epoch": 0.0465444287729196, "grad_norm": 192.00648498535156, "learning_rate": 1.9985659979011906e-06, "loss": 23.9609, "step": 4917 }, { "epoch": 0.04655389479463466, "grad_norm": 3.328106164932251, "learning_rate": 1.998564356150531e-06, "loss": 0.9902, "step": 4918 }, { "epoch": 0.046563360816349715, "grad_norm": 474.21759033203125, "learning_rate": 1.9985627134612856e-06, "loss": 38.5312, "step": 4919 }, { "epoch": 0.04657282683806477, "grad_norm": 538.9546508789062, "learning_rate": 1.9985610698334566e-06, "loss": 49.3359, "step": 4920 }, { "epoch": 0.04658229285977982, "grad_norm": 269.71673583984375, "learning_rate": 1.9985594252670452e-06, "loss": 25.3359, "step": 4921 }, { "epoch": 0.046591758881494874, "grad_norm": 436.94061279296875, "learning_rate": 1.9985577797620527e-06, "loss": 49.0625, "step": 4922 }, { "epoch": 0.04660122490320993, "grad_norm": 644.2550048828125, "learning_rate": 1.998556133318481e-06, "loss": 48.9219, "step": 4923 }, { "epoch": 0.04661069092492498, "grad_norm": 233.2210693359375, "learning_rate": 1.998554485936331e-06, "loss": 25.4531, "step": 4924 }, { "epoch": 0.04662015694664003, "grad_norm": 201.16781616210938, "learning_rate": 1.9985528376156055e-06, "loss": 27.4375, "step": 4925 }, { "epoch": 0.046629622968355086, "grad_norm": 184.18301391601562, "learning_rate": 1.998551188356305e-06, "loss": 20.8281, "step": 4926 }, { "epoch": 0.046639088990070146, "grad_norm": 463.6669006347656, "learning_rate": 1.998549538158431e-06, "loss": 56.0781, "step": 4927 }, { "epoch": 0.0466485550117852, "grad_norm": 2.787112236022949, "learning_rate": 1.9985478870219858e-06, "loss": 1.0176, "step": 4928 }, { "epoch": 0.04665802103350025, "grad_norm": 410.17059326171875, "learning_rate": 1.9985462349469704e-06, "loss": 30.5625, "step": 4929 }, { "epoch": 0.046667487055215305, "grad_norm": 1069.431396484375, "learning_rate": 1.9985445819333865e-06, "loss": 42.9688, "step": 4930 }, { "epoch": 0.04667695307693036, "grad_norm": 945.3935546875, "learning_rate": 1.9985429279812356e-06, "loss": 31.4844, "step": 4931 }, { "epoch": 0.04668641909864541, "grad_norm": 612.7189331054688, "learning_rate": 1.998541273090519e-06, "loss": 56.5781, "step": 4932 }, { "epoch": 0.046695885120360464, "grad_norm": 658.8267211914062, "learning_rate": 1.9985396172612394e-06, "loss": 80.6562, "step": 4933 }, { "epoch": 0.04670535114207552, "grad_norm": 424.18438720703125, "learning_rate": 1.9985379604933968e-06, "loss": 25.4531, "step": 4934 }, { "epoch": 0.04671481716379058, "grad_norm": 277.6717224121094, "learning_rate": 1.9985363027869937e-06, "loss": 26.6875, "step": 4935 }, { "epoch": 0.04672428318550563, "grad_norm": 676.702392578125, "learning_rate": 1.9985346441420317e-06, "loss": 32.75, "step": 4936 }, { "epoch": 0.04673374920722068, "grad_norm": 395.3481750488281, "learning_rate": 1.9985329845585117e-06, "loss": 29.2578, "step": 4937 }, { "epoch": 0.046743215228935736, "grad_norm": 245.13829040527344, "learning_rate": 1.9985313240364357e-06, "loss": 25.7344, "step": 4938 }, { "epoch": 0.04675268125065079, "grad_norm": 344.7325439453125, "learning_rate": 1.9985296625758056e-06, "loss": 31.8281, "step": 4939 }, { "epoch": 0.04676214727236584, "grad_norm": 399.8819885253906, "learning_rate": 1.998528000176622e-06, "loss": 26.625, "step": 4940 }, { "epoch": 0.046771613294080895, "grad_norm": 406.46124267578125, "learning_rate": 1.9985263368388873e-06, "loss": 41.9531, "step": 4941 }, { "epoch": 0.04678107931579595, "grad_norm": 356.9564514160156, "learning_rate": 1.9985246725626032e-06, "loss": 28.5781, "step": 4942 }, { "epoch": 0.046790545337511, "grad_norm": 161.81138610839844, "learning_rate": 1.9985230073477703e-06, "loss": 25.7812, "step": 4943 }, { "epoch": 0.04680001135922606, "grad_norm": 2.849299669265747, "learning_rate": 1.998521341194391e-06, "loss": 0.7217, "step": 4944 }, { "epoch": 0.046809477380941114, "grad_norm": 252.18081665039062, "learning_rate": 1.9985196741024664e-06, "loss": 26.1562, "step": 4945 }, { "epoch": 0.04681894340265617, "grad_norm": 199.55926513671875, "learning_rate": 1.9985180060719985e-06, "loss": 24.5078, "step": 4946 }, { "epoch": 0.04682840942437122, "grad_norm": 345.8384094238281, "learning_rate": 1.9985163371029885e-06, "loss": 26.2812, "step": 4947 }, { "epoch": 0.04683787544608627, "grad_norm": 318.3001708984375, "learning_rate": 1.998514667195438e-06, "loss": 33.3125, "step": 4948 }, { "epoch": 0.046847341467801326, "grad_norm": 335.01953125, "learning_rate": 1.998512996349349e-06, "loss": 48.7031, "step": 4949 }, { "epoch": 0.04685680748951638, "grad_norm": 475.6780700683594, "learning_rate": 1.9985113245647224e-06, "loss": 52.9375, "step": 4950 }, { "epoch": 0.04686627351123143, "grad_norm": 443.92999267578125, "learning_rate": 1.9985096518415602e-06, "loss": 46.9688, "step": 4951 }, { "epoch": 0.046875739532946485, "grad_norm": 853.9534301757812, "learning_rate": 1.998507978179864e-06, "loss": 69.3828, "step": 4952 }, { "epoch": 0.046885205554661545, "grad_norm": 441.2580261230469, "learning_rate": 1.9985063035796353e-06, "loss": 24.1172, "step": 4953 }, { "epoch": 0.0468946715763766, "grad_norm": 538.1654663085938, "learning_rate": 1.9985046280408755e-06, "loss": 33.1797, "step": 4954 }, { "epoch": 0.04690413759809165, "grad_norm": 3.1755077838897705, "learning_rate": 1.9985029515635863e-06, "loss": 0.9346, "step": 4955 }, { "epoch": 0.046913603619806704, "grad_norm": 195.17095947265625, "learning_rate": 1.9985012741477693e-06, "loss": 23.6094, "step": 4956 }, { "epoch": 0.04692306964152176, "grad_norm": 535.0870361328125, "learning_rate": 1.9984995957934263e-06, "loss": 60.0938, "step": 4957 }, { "epoch": 0.04693253566323681, "grad_norm": 227.46014404296875, "learning_rate": 1.998497916500558e-06, "loss": 23.6641, "step": 4958 }, { "epoch": 0.04694200168495186, "grad_norm": 176.2168731689453, "learning_rate": 1.998496236269167e-06, "loss": 21.2031, "step": 4959 }, { "epoch": 0.046951467706666916, "grad_norm": 309.41607666015625, "learning_rate": 1.9984945550992547e-06, "loss": 31.9844, "step": 4960 }, { "epoch": 0.046960933728381976, "grad_norm": 2.762362241744995, "learning_rate": 1.9984928729908223e-06, "loss": 0.8433, "step": 4961 }, { "epoch": 0.04697039975009703, "grad_norm": 519.140380859375, "learning_rate": 1.998491189943872e-06, "loss": 22.1914, "step": 4962 }, { "epoch": 0.04697986577181208, "grad_norm": 600.2186279296875, "learning_rate": 1.998489505958404e-06, "loss": 55.8516, "step": 4963 }, { "epoch": 0.046989331793527135, "grad_norm": 339.1544494628906, "learning_rate": 1.9984878210344215e-06, "loss": 30.2969, "step": 4964 }, { "epoch": 0.04699879781524219, "grad_norm": 319.9688720703125, "learning_rate": 1.998486135171925e-06, "loss": 29.9688, "step": 4965 }, { "epoch": 0.04700826383695724, "grad_norm": 218.00880432128906, "learning_rate": 1.9984844483709165e-06, "loss": 24.8438, "step": 4966 }, { "epoch": 0.047017729858672294, "grad_norm": 234.2946319580078, "learning_rate": 1.998482760631398e-06, "loss": 20.7422, "step": 4967 }, { "epoch": 0.04702719588038735, "grad_norm": 1051.26904296875, "learning_rate": 1.9984810719533704e-06, "loss": 44.9219, "step": 4968 }, { "epoch": 0.0470366619021024, "grad_norm": 367.2789611816406, "learning_rate": 1.9984793823368357e-06, "loss": 12.3633, "step": 4969 }, { "epoch": 0.04704612792381746, "grad_norm": 484.18829345703125, "learning_rate": 1.998477691781795e-06, "loss": 36.8438, "step": 4970 }, { "epoch": 0.04705559394553251, "grad_norm": 188.30006408691406, "learning_rate": 1.9984760002882506e-06, "loss": 23.4219, "step": 4971 }, { "epoch": 0.047065059967247566, "grad_norm": 404.5779113769531, "learning_rate": 1.9984743078562034e-06, "loss": 42.9531, "step": 4972 }, { "epoch": 0.04707452598896262, "grad_norm": 447.71270751953125, "learning_rate": 1.9984726144856556e-06, "loss": 27.2188, "step": 4973 }, { "epoch": 0.04708399201067767, "grad_norm": 340.4766845703125, "learning_rate": 1.9984709201766085e-06, "loss": 15.0, "step": 4974 }, { "epoch": 0.047093458032392725, "grad_norm": 351.8328552246094, "learning_rate": 1.998469224929063e-06, "loss": 29.1172, "step": 4975 }, { "epoch": 0.04710292405410778, "grad_norm": 3.1212470531463623, "learning_rate": 1.9984675287430223e-06, "loss": 0.9121, "step": 4976 }, { "epoch": 0.04711239007582283, "grad_norm": 253.04283142089844, "learning_rate": 1.9984658316184868e-06, "loss": 23.7891, "step": 4977 }, { "epoch": 0.04712185609753789, "grad_norm": 267.0601806640625, "learning_rate": 1.998464133555458e-06, "loss": 24.4688, "step": 4978 }, { "epoch": 0.047131322119252944, "grad_norm": 312.1460876464844, "learning_rate": 1.9984624345539383e-06, "loss": 31.1562, "step": 4979 }, { "epoch": 0.047140788140968, "grad_norm": 343.7242126464844, "learning_rate": 1.9984607346139287e-06, "loss": 24.125, "step": 4980 }, { "epoch": 0.04715025416268305, "grad_norm": 501.09521484375, "learning_rate": 1.9984590337354312e-06, "loss": 35.2422, "step": 4981 }, { "epoch": 0.0471597201843981, "grad_norm": 636.5592651367188, "learning_rate": 1.9984573319184466e-06, "loss": 42.5781, "step": 4982 }, { "epoch": 0.047169186206113156, "grad_norm": 243.40309143066406, "learning_rate": 1.9984556291629775e-06, "loss": 22.6719, "step": 4983 }, { "epoch": 0.04717865222782821, "grad_norm": 616.3126220703125, "learning_rate": 1.998453925469025e-06, "loss": 29.0156, "step": 4984 }, { "epoch": 0.04718811824954326, "grad_norm": 279.968994140625, "learning_rate": 1.9984522208365906e-06, "loss": 29.2109, "step": 4985 }, { "epoch": 0.047197584271258315, "grad_norm": 711.9099731445312, "learning_rate": 1.9984505152656763e-06, "loss": 36.4062, "step": 4986 }, { "epoch": 0.047207050292973375, "grad_norm": 3.1869430541992188, "learning_rate": 1.9984488087562833e-06, "loss": 0.999, "step": 4987 }, { "epoch": 0.04721651631468843, "grad_norm": 355.35211181640625, "learning_rate": 1.9984471013084135e-06, "loss": 27.8438, "step": 4988 }, { "epoch": 0.04722598233640348, "grad_norm": 318.0705261230469, "learning_rate": 1.9984453929220683e-06, "loss": 28.5234, "step": 4989 }, { "epoch": 0.047235448358118534, "grad_norm": 367.4602355957031, "learning_rate": 1.9984436835972497e-06, "loss": 28.1875, "step": 4990 }, { "epoch": 0.04724491437983359, "grad_norm": 512.8790893554688, "learning_rate": 1.9984419733339587e-06, "loss": 20.7266, "step": 4991 }, { "epoch": 0.04725438040154864, "grad_norm": 3.0510165691375732, "learning_rate": 1.9984402621321972e-06, "loss": 0.9121, "step": 4992 }, { "epoch": 0.04726384642326369, "grad_norm": 377.33428955078125, "learning_rate": 1.998438549991967e-06, "loss": 24.1875, "step": 4993 }, { "epoch": 0.047273312444978746, "grad_norm": 472.1523132324219, "learning_rate": 1.998436836913269e-06, "loss": 37.9062, "step": 4994 }, { "epoch": 0.0472827784666938, "grad_norm": 620.4907836914062, "learning_rate": 1.9984351228961057e-06, "loss": 44.1875, "step": 4995 }, { "epoch": 0.04729224448840886, "grad_norm": 374.70501708984375, "learning_rate": 1.998433407940478e-06, "loss": 46.7188, "step": 4996 }, { "epoch": 0.04730171051012391, "grad_norm": 1979.1954345703125, "learning_rate": 1.998431692046388e-06, "loss": 46.4062, "step": 4997 }, { "epoch": 0.047311176531838965, "grad_norm": 237.84727478027344, "learning_rate": 1.9984299752138375e-06, "loss": 22.1016, "step": 4998 }, { "epoch": 0.04732064255355402, "grad_norm": 772.0655517578125, "learning_rate": 1.9984282574428277e-06, "loss": 84.1562, "step": 4999 }, { "epoch": 0.04733010857526907, "grad_norm": 544.4893188476562, "learning_rate": 1.99842653873336e-06, "loss": 48.0469, "step": 5000 }, { "epoch": 0.047339574596984124, "grad_norm": 417.3282165527344, "learning_rate": 1.9984248190854365e-06, "loss": 51.4844, "step": 5001 }, { "epoch": 0.04734904061869918, "grad_norm": 472.72613525390625, "learning_rate": 1.9984230984990586e-06, "loss": 54.9609, "step": 5002 }, { "epoch": 0.04735850664041423, "grad_norm": 179.93121337890625, "learning_rate": 1.998421376974228e-06, "loss": 22.7344, "step": 5003 }, { "epoch": 0.04736797266212929, "grad_norm": 330.06158447265625, "learning_rate": 1.998419654510946e-06, "loss": 24.3984, "step": 5004 }, { "epoch": 0.04737743868384434, "grad_norm": 847.3718872070312, "learning_rate": 1.998417931109215e-06, "loss": 46.1484, "step": 5005 }, { "epoch": 0.047386904705559396, "grad_norm": 603.53076171875, "learning_rate": 1.9984162067690355e-06, "loss": 49.4844, "step": 5006 }, { "epoch": 0.04739637072727445, "grad_norm": 778.5750732421875, "learning_rate": 1.99841448149041e-06, "loss": 61.2578, "step": 5007 }, { "epoch": 0.0474058367489895, "grad_norm": 265.6759948730469, "learning_rate": 1.9984127552733397e-06, "loss": 28.3438, "step": 5008 }, { "epoch": 0.047415302770704555, "grad_norm": 224.01828002929688, "learning_rate": 1.9984110281178265e-06, "loss": 23.6641, "step": 5009 }, { "epoch": 0.04742476879241961, "grad_norm": 419.162109375, "learning_rate": 1.998409300023872e-06, "loss": 59.2812, "step": 5010 }, { "epoch": 0.04743423481413466, "grad_norm": 591.21875, "learning_rate": 1.9984075709914774e-06, "loss": 30.9062, "step": 5011 }, { "epoch": 0.047443700835849714, "grad_norm": 200.12530517578125, "learning_rate": 1.998405841020645e-06, "loss": 23.9531, "step": 5012 }, { "epoch": 0.047453166857564774, "grad_norm": 406.57696533203125, "learning_rate": 1.9984041101113756e-06, "loss": 30.6719, "step": 5013 }, { "epoch": 0.04746263287927983, "grad_norm": 232.7919921875, "learning_rate": 1.9984023782636718e-06, "loss": 26.0625, "step": 5014 }, { "epoch": 0.04747209890099488, "grad_norm": 1282.079833984375, "learning_rate": 1.998400645477534e-06, "loss": 66.3906, "step": 5015 }, { "epoch": 0.04748156492270993, "grad_norm": 690.8966064453125, "learning_rate": 1.9983989117529655e-06, "loss": 48.4531, "step": 5016 }, { "epoch": 0.047491030944424986, "grad_norm": 2.6194405555725098, "learning_rate": 1.9983971770899664e-06, "loss": 0.8286, "step": 5017 }, { "epoch": 0.04750049696614004, "grad_norm": 296.7707214355469, "learning_rate": 1.998395441488539e-06, "loss": 21.1094, "step": 5018 }, { "epoch": 0.04750996298785509, "grad_norm": 326.5553894042969, "learning_rate": 1.9983937049486848e-06, "loss": 22.9219, "step": 5019 }, { "epoch": 0.047519429009570145, "grad_norm": 1084.9110107421875, "learning_rate": 1.9983919674704052e-06, "loss": 90.7266, "step": 5020 }, { "epoch": 0.047528895031285205, "grad_norm": 2.9455926418304443, "learning_rate": 1.9983902290537026e-06, "loss": 0.9492, "step": 5021 }, { "epoch": 0.04753836105300026, "grad_norm": 2.646332263946533, "learning_rate": 1.998388489698578e-06, "loss": 0.7515, "step": 5022 }, { "epoch": 0.04754782707471531, "grad_norm": 662.1357421875, "learning_rate": 1.998386749405033e-06, "loss": 58.8125, "step": 5023 }, { "epoch": 0.047557293096430364, "grad_norm": 376.13946533203125, "learning_rate": 1.9983850081730696e-06, "loss": 23.7891, "step": 5024 }, { "epoch": 0.04756675911814542, "grad_norm": 279.9977111816406, "learning_rate": 1.9983832660026888e-06, "loss": 28.125, "step": 5025 }, { "epoch": 0.04757622513986047, "grad_norm": 346.9885559082031, "learning_rate": 1.9983815228938932e-06, "loss": 35.0625, "step": 5026 }, { "epoch": 0.04758569116157552, "grad_norm": 383.2882995605469, "learning_rate": 1.9983797788466835e-06, "loss": 34.9375, "step": 5027 }, { "epoch": 0.047595157183290576, "grad_norm": 190.07388305664062, "learning_rate": 1.998378033861062e-06, "loss": 23.6562, "step": 5028 }, { "epoch": 0.04760462320500563, "grad_norm": 701.0388793945312, "learning_rate": 1.99837628793703e-06, "loss": 26.7734, "step": 5029 }, { "epoch": 0.04761408922672069, "grad_norm": 218.05516052246094, "learning_rate": 1.9983745410745894e-06, "loss": 29.0, "step": 5030 }, { "epoch": 0.04762355524843574, "grad_norm": 291.8755187988281, "learning_rate": 1.9983727932737417e-06, "loss": 27.7266, "step": 5031 }, { "epoch": 0.047633021270150795, "grad_norm": 751.7029418945312, "learning_rate": 1.9983710445344886e-06, "loss": 55.2227, "step": 5032 }, { "epoch": 0.04764248729186585, "grad_norm": 357.5185546875, "learning_rate": 1.998369294856831e-06, "loss": 52.7812, "step": 5033 }, { "epoch": 0.0476519533135809, "grad_norm": 312.5255126953125, "learning_rate": 1.998367544240772e-06, "loss": 30.2188, "step": 5034 }, { "epoch": 0.047661419335295954, "grad_norm": 235.6238250732422, "learning_rate": 1.998365792686312e-06, "loss": 26.4609, "step": 5035 }, { "epoch": 0.04767088535701101, "grad_norm": 349.7823181152344, "learning_rate": 1.9983640401934532e-06, "loss": 38.4219, "step": 5036 }, { "epoch": 0.04768035137872606, "grad_norm": 279.1169128417969, "learning_rate": 1.9983622867621975e-06, "loss": 22.2109, "step": 5037 }, { "epoch": 0.04768981740044111, "grad_norm": 567.4696655273438, "learning_rate": 1.9983605323925458e-06, "loss": 26.6953, "step": 5038 }, { "epoch": 0.04769928342215617, "grad_norm": 574.74169921875, "learning_rate": 1.9983587770845005e-06, "loss": 22.3047, "step": 5039 }, { "epoch": 0.047708749443871226, "grad_norm": 579.5692138671875, "learning_rate": 1.9983570208380626e-06, "loss": 25.0547, "step": 5040 }, { "epoch": 0.04771821546558628, "grad_norm": 203.68820190429688, "learning_rate": 1.9983552636532337e-06, "loss": 25.8516, "step": 5041 }, { "epoch": 0.04772768148730133, "grad_norm": 636.0509643554688, "learning_rate": 1.998353505530016e-06, "loss": 29.9219, "step": 5042 }, { "epoch": 0.047737147509016385, "grad_norm": 1583.306640625, "learning_rate": 1.9983517464684116e-06, "loss": 43.6406, "step": 5043 }, { "epoch": 0.04774661353073144, "grad_norm": 383.0928649902344, "learning_rate": 1.998349986468421e-06, "loss": 27.5625, "step": 5044 }, { "epoch": 0.04775607955244649, "grad_norm": 351.5042419433594, "learning_rate": 1.9983482255300464e-06, "loss": 52.875, "step": 5045 }, { "epoch": 0.047765545574161544, "grad_norm": 1133.0323486328125, "learning_rate": 1.9983464636532895e-06, "loss": 56.4297, "step": 5046 }, { "epoch": 0.047775011595876604, "grad_norm": 163.3997039794922, "learning_rate": 1.9983447008381517e-06, "loss": 22.4141, "step": 5047 }, { "epoch": 0.04778447761759166, "grad_norm": 368.10888671875, "learning_rate": 1.998342937084635e-06, "loss": 25.8203, "step": 5048 }, { "epoch": 0.04779394363930671, "grad_norm": 196.52105712890625, "learning_rate": 1.9983411723927406e-06, "loss": 23.0781, "step": 5049 }, { "epoch": 0.04780340966102176, "grad_norm": 451.6856384277344, "learning_rate": 1.9983394067624707e-06, "loss": 27.0703, "step": 5050 }, { "epoch": 0.047812875682736816, "grad_norm": 1076.276611328125, "learning_rate": 1.9983376401938267e-06, "loss": 48.5781, "step": 5051 }, { "epoch": 0.04782234170445187, "grad_norm": 368.2643127441406, "learning_rate": 1.9983358726868104e-06, "loss": 32.3516, "step": 5052 }, { "epoch": 0.04783180772616692, "grad_norm": 343.957275390625, "learning_rate": 1.998334104241423e-06, "loss": 39.9688, "step": 5053 }, { "epoch": 0.047841273747881975, "grad_norm": 222.6621551513672, "learning_rate": 1.9983323348576668e-06, "loss": 22.6094, "step": 5054 }, { "epoch": 0.04785073976959703, "grad_norm": 347.5470275878906, "learning_rate": 1.998330564535543e-06, "loss": 47.9844, "step": 5055 }, { "epoch": 0.04786020579131209, "grad_norm": 223.65870666503906, "learning_rate": 1.9983287932750533e-06, "loss": 22.2812, "step": 5056 }, { "epoch": 0.04786967181302714, "grad_norm": 716.675537109375, "learning_rate": 1.9983270210761996e-06, "loss": 45.875, "step": 5057 }, { "epoch": 0.047879137834742194, "grad_norm": 210.7848358154297, "learning_rate": 1.9983252479389832e-06, "loss": 20.8594, "step": 5058 }, { "epoch": 0.04788860385645725, "grad_norm": 2.929103374481201, "learning_rate": 1.9983234738634064e-06, "loss": 0.8599, "step": 5059 }, { "epoch": 0.0478980698781723, "grad_norm": 389.3626708984375, "learning_rate": 1.9983216988494704e-06, "loss": 52.375, "step": 5060 }, { "epoch": 0.04790753589988735, "grad_norm": 201.16757202148438, "learning_rate": 1.998319922897177e-06, "loss": 23.0938, "step": 5061 }, { "epoch": 0.047917001921602406, "grad_norm": 459.6940002441406, "learning_rate": 1.9983181460065276e-06, "loss": 17.5391, "step": 5062 }, { "epoch": 0.04792646794331746, "grad_norm": 460.77978515625, "learning_rate": 1.998316368177524e-06, "loss": 28.5078, "step": 5063 }, { "epoch": 0.04793593396503252, "grad_norm": 854.8928833007812, "learning_rate": 1.9983145894101684e-06, "loss": 67.9688, "step": 5064 }, { "epoch": 0.04794539998674757, "grad_norm": 558.1221313476562, "learning_rate": 1.9983128097044614e-06, "loss": 22.4844, "step": 5065 }, { "epoch": 0.047954866008462625, "grad_norm": 466.6468505859375, "learning_rate": 1.998311029060406e-06, "loss": 35.3594, "step": 5066 }, { "epoch": 0.04796433203017768, "grad_norm": 223.0441436767578, "learning_rate": 1.998309247478003e-06, "loss": 25.2031, "step": 5067 }, { "epoch": 0.04797379805189273, "grad_norm": 185.67276000976562, "learning_rate": 1.998307464957254e-06, "loss": 19.6328, "step": 5068 }, { "epoch": 0.047983264073607784, "grad_norm": 393.2005920410156, "learning_rate": 1.9983056814981606e-06, "loss": 51.625, "step": 5069 }, { "epoch": 0.04799273009532284, "grad_norm": 290.8404541015625, "learning_rate": 1.9983038971007254e-06, "loss": 36.25, "step": 5070 }, { "epoch": 0.04800219611703789, "grad_norm": 333.7303466796875, "learning_rate": 1.9983021117649497e-06, "loss": 23.8008, "step": 5071 }, { "epoch": 0.04801166213875294, "grad_norm": 249.88063049316406, "learning_rate": 1.9983003254908342e-06, "loss": 24.3672, "step": 5072 }, { "epoch": 0.048021128160468, "grad_norm": 440.74273681640625, "learning_rate": 1.9982985382783816e-06, "loss": 27.7969, "step": 5073 }, { "epoch": 0.048030594182183056, "grad_norm": 578.466064453125, "learning_rate": 1.9982967501275935e-06, "loss": 51.9062, "step": 5074 }, { "epoch": 0.04804006020389811, "grad_norm": 2.999251365661621, "learning_rate": 1.998294961038471e-06, "loss": 0.8257, "step": 5075 }, { "epoch": 0.04804952622561316, "grad_norm": 443.05322265625, "learning_rate": 1.998293171011017e-06, "loss": 22.0781, "step": 5076 }, { "epoch": 0.048058992247328215, "grad_norm": 1261.7855224609375, "learning_rate": 1.9982913800452316e-06, "loss": 92.4531, "step": 5077 }, { "epoch": 0.04806845826904327, "grad_norm": 381.1056823730469, "learning_rate": 1.9982895881411177e-06, "loss": 26.5938, "step": 5078 }, { "epoch": 0.04807792429075832, "grad_norm": 446.94903564453125, "learning_rate": 1.9982877952986764e-06, "loss": 33.3281, "step": 5079 }, { "epoch": 0.048087390312473374, "grad_norm": 404.22857666015625, "learning_rate": 1.9982860015179093e-06, "loss": 42.0391, "step": 5080 }, { "epoch": 0.04809685633418843, "grad_norm": 490.7121887207031, "learning_rate": 1.9982842067988182e-06, "loss": 38.4219, "step": 5081 }, { "epoch": 0.04810632235590349, "grad_norm": 239.42745971679688, "learning_rate": 1.998282411141405e-06, "loss": 19.25, "step": 5082 }, { "epoch": 0.04811578837761854, "grad_norm": 608.2982177734375, "learning_rate": 1.998280614545672e-06, "loss": 19.9219, "step": 5083 }, { "epoch": 0.04812525439933359, "grad_norm": 334.5447692871094, "learning_rate": 1.9982788170116197e-06, "loss": 32.2031, "step": 5084 }, { "epoch": 0.048134720421048646, "grad_norm": 730.7213745117188, "learning_rate": 1.99827701853925e-06, "loss": 48.4375, "step": 5085 }, { "epoch": 0.0481441864427637, "grad_norm": 196.80300903320312, "learning_rate": 1.998275219128565e-06, "loss": 21.9297, "step": 5086 }, { "epoch": 0.04815365246447875, "grad_norm": 699.2928466796875, "learning_rate": 1.9982734187795663e-06, "loss": 30.1484, "step": 5087 }, { "epoch": 0.048163118486193805, "grad_norm": 779.814208984375, "learning_rate": 1.998271617492255e-06, "loss": 65.125, "step": 5088 }, { "epoch": 0.04817258450790886, "grad_norm": 209.43508911132812, "learning_rate": 1.998269815266634e-06, "loss": 22.5938, "step": 5089 }, { "epoch": 0.04818205052962392, "grad_norm": 478.05975341796875, "learning_rate": 1.998268012102704e-06, "loss": 28.6562, "step": 5090 }, { "epoch": 0.04819151655133897, "grad_norm": 281.19329833984375, "learning_rate": 1.998266208000467e-06, "loss": 26.7656, "step": 5091 }, { "epoch": 0.048200982573054024, "grad_norm": 583.8187866210938, "learning_rate": 1.998264402959925e-06, "loss": 53.1094, "step": 5092 }, { "epoch": 0.04821044859476908, "grad_norm": 333.410400390625, "learning_rate": 1.998262596981079e-06, "loss": 36.3672, "step": 5093 }, { "epoch": 0.04821991461648413, "grad_norm": 198.68142700195312, "learning_rate": 1.9982607900639314e-06, "loss": 23.9922, "step": 5094 }, { "epoch": 0.04822938063819918, "grad_norm": 814.661376953125, "learning_rate": 1.9982589822084837e-06, "loss": 51.3125, "step": 5095 }, { "epoch": 0.048238846659914236, "grad_norm": 258.6363525390625, "learning_rate": 1.9982571734147374e-06, "loss": 26.9297, "step": 5096 }, { "epoch": 0.04824831268162929, "grad_norm": 956.1966552734375, "learning_rate": 1.998255363682694e-06, "loss": 49.9766, "step": 5097 }, { "epoch": 0.04825777870334434, "grad_norm": 336.2134094238281, "learning_rate": 1.9982535530123556e-06, "loss": 29.7656, "step": 5098 }, { "epoch": 0.0482672447250594, "grad_norm": 227.8562469482422, "learning_rate": 1.998251741403724e-06, "loss": 26.625, "step": 5099 }, { "epoch": 0.048276710746774455, "grad_norm": 268.8437805175781, "learning_rate": 1.9982499288568006e-06, "loss": 37.3594, "step": 5100 }, { "epoch": 0.04828617676848951, "grad_norm": 208.97576904296875, "learning_rate": 1.998248115371587e-06, "loss": 19.7188, "step": 5101 }, { "epoch": 0.04829564279020456, "grad_norm": 4144.763671875, "learning_rate": 1.9982463009480853e-06, "loss": 27.875, "step": 5102 }, { "epoch": 0.048305108811919614, "grad_norm": 407.7375183105469, "learning_rate": 1.9982444855862972e-06, "loss": 33.8125, "step": 5103 }, { "epoch": 0.04831457483363467, "grad_norm": 601.0133056640625, "learning_rate": 1.998242669286224e-06, "loss": 65.6875, "step": 5104 }, { "epoch": 0.04832404085534972, "grad_norm": 415.3020324707031, "learning_rate": 1.9982408520478677e-06, "loss": 67.7812, "step": 5105 }, { "epoch": 0.04833350687706477, "grad_norm": 231.29164123535156, "learning_rate": 1.99823903387123e-06, "loss": 24.6562, "step": 5106 }, { "epoch": 0.04834297289877983, "grad_norm": 204.03456115722656, "learning_rate": 1.9982372147563126e-06, "loss": 19.6562, "step": 5107 }, { "epoch": 0.048352438920494886, "grad_norm": 3.257467746734619, "learning_rate": 1.998235394703117e-06, "loss": 0.8982, "step": 5108 }, { "epoch": 0.04836190494220994, "grad_norm": 646.3291015625, "learning_rate": 1.9982335737116452e-06, "loss": 59.2031, "step": 5109 }, { "epoch": 0.04837137096392499, "grad_norm": 3.1203408241271973, "learning_rate": 1.9982317517818985e-06, "loss": 1.021, "step": 5110 }, { "epoch": 0.048380836985640045, "grad_norm": 2.680015802383423, "learning_rate": 1.998229928913879e-06, "loss": 0.9419, "step": 5111 }, { "epoch": 0.0483903030073551, "grad_norm": 251.99136352539062, "learning_rate": 1.9982281051075884e-06, "loss": 27.5, "step": 5112 }, { "epoch": 0.04839976902907015, "grad_norm": 334.4241027832031, "learning_rate": 1.9982262803630284e-06, "loss": 22.8359, "step": 5113 }, { "epoch": 0.048409235050785204, "grad_norm": 453.2379455566406, "learning_rate": 1.9982244546802007e-06, "loss": 36.0781, "step": 5114 }, { "epoch": 0.04841870107250026, "grad_norm": 207.59730529785156, "learning_rate": 1.9982226280591065e-06, "loss": 25.1797, "step": 5115 }, { "epoch": 0.04842816709421532, "grad_norm": 1027.565185546875, "learning_rate": 1.9982208004997484e-06, "loss": 68.1875, "step": 5116 }, { "epoch": 0.04843763311593037, "grad_norm": 310.982666015625, "learning_rate": 1.9982189720021277e-06, "loss": 27.8867, "step": 5117 }, { "epoch": 0.04844709913764542, "grad_norm": 213.55662536621094, "learning_rate": 1.9982171425662457e-06, "loss": 24.6719, "step": 5118 }, { "epoch": 0.048456565159360476, "grad_norm": 202.9219970703125, "learning_rate": 1.9982153121921052e-06, "loss": 25.9688, "step": 5119 }, { "epoch": 0.04846603118107553, "grad_norm": 365.7906799316406, "learning_rate": 1.9982134808797064e-06, "loss": 28.875, "step": 5120 }, { "epoch": 0.04847549720279058, "grad_norm": 336.6467590332031, "learning_rate": 1.9982116486290525e-06, "loss": 27.9844, "step": 5121 }, { "epoch": 0.048484963224505635, "grad_norm": 492.2422180175781, "learning_rate": 1.9982098154401445e-06, "loss": 25.3438, "step": 5122 }, { "epoch": 0.04849442924622069, "grad_norm": 266.2247619628906, "learning_rate": 1.998207981312984e-06, "loss": 24.6406, "step": 5123 }, { "epoch": 0.04850389526793574, "grad_norm": 519.4354248046875, "learning_rate": 1.9982061462475727e-06, "loss": 38.3438, "step": 5124 }, { "epoch": 0.0485133612896508, "grad_norm": 2.8549959659576416, "learning_rate": 1.998204310243913e-06, "loss": 0.8242, "step": 5125 }, { "epoch": 0.048522827311365854, "grad_norm": 678.2728881835938, "learning_rate": 1.998202473302006e-06, "loss": 35.8281, "step": 5126 }, { "epoch": 0.04853229333308091, "grad_norm": 223.77845764160156, "learning_rate": 1.998200635421854e-06, "loss": 24.6641, "step": 5127 }, { "epoch": 0.04854175935479596, "grad_norm": 405.7039794921875, "learning_rate": 1.9981987966034577e-06, "loss": 58.7188, "step": 5128 }, { "epoch": 0.04855122537651101, "grad_norm": 378.7366027832031, "learning_rate": 1.99819695684682e-06, "loss": 30.6484, "step": 5129 }, { "epoch": 0.048560691398226066, "grad_norm": 806.5079956054688, "learning_rate": 1.998195116151942e-06, "loss": 25.9688, "step": 5130 }, { "epoch": 0.04857015741994112, "grad_norm": 444.58526611328125, "learning_rate": 1.998193274518825e-06, "loss": 25.9688, "step": 5131 }, { "epoch": 0.04857962344165617, "grad_norm": 420.381103515625, "learning_rate": 1.9981914319474717e-06, "loss": 47.8203, "step": 5132 }, { "epoch": 0.04858908946337123, "grad_norm": 559.9047241210938, "learning_rate": 1.9981895884378835e-06, "loss": 54.0, "step": 5133 }, { "epoch": 0.048598555485086285, "grad_norm": 250.37710571289062, "learning_rate": 1.998187743990062e-06, "loss": 18.1289, "step": 5134 }, { "epoch": 0.04860802150680134, "grad_norm": 375.27105712890625, "learning_rate": 1.9981858986040087e-06, "loss": 22.9844, "step": 5135 }, { "epoch": 0.04861748752851639, "grad_norm": 424.8392333984375, "learning_rate": 1.998184052279726e-06, "loss": 41.4688, "step": 5136 }, { "epoch": 0.048626953550231444, "grad_norm": 810.2687377929688, "learning_rate": 1.9981822050172146e-06, "loss": 50.8203, "step": 5137 }, { "epoch": 0.0486364195719465, "grad_norm": 832.7623291015625, "learning_rate": 1.9981803568164776e-06, "loss": 48.4609, "step": 5138 }, { "epoch": 0.04864588559366155, "grad_norm": 3.305624008178711, "learning_rate": 1.9981785076775152e-06, "loss": 0.9136, "step": 5139 }, { "epoch": 0.0486553516153766, "grad_norm": 332.3863525390625, "learning_rate": 1.9981766576003305e-06, "loss": 25.6406, "step": 5140 }, { "epoch": 0.048664817637091656, "grad_norm": 1004.7781372070312, "learning_rate": 1.9981748065849246e-06, "loss": 33.3125, "step": 5141 }, { "epoch": 0.048674283658806716, "grad_norm": 577.759033203125, "learning_rate": 1.9981729546312993e-06, "loss": 35.7656, "step": 5142 }, { "epoch": 0.04868374968052177, "grad_norm": 245.04685974121094, "learning_rate": 1.9981711017394567e-06, "loss": 27.2969, "step": 5143 }, { "epoch": 0.04869321570223682, "grad_norm": 765.3406372070312, "learning_rate": 1.9981692479093976e-06, "loss": 67.8438, "step": 5144 }, { "epoch": 0.048702681723951875, "grad_norm": 481.549072265625, "learning_rate": 1.9981673931411246e-06, "loss": 22.957, "step": 5145 }, { "epoch": 0.04871214774566693, "grad_norm": 428.3376770019531, "learning_rate": 1.998165537434639e-06, "loss": 28.8906, "step": 5146 }, { "epoch": 0.04872161376738198, "grad_norm": 515.824951171875, "learning_rate": 1.998163680789943e-06, "loss": 25.4609, "step": 5147 }, { "epoch": 0.048731079789097034, "grad_norm": 447.50732421875, "learning_rate": 1.9981618232070376e-06, "loss": 50.4062, "step": 5148 }, { "epoch": 0.04874054581081209, "grad_norm": 357.427001953125, "learning_rate": 1.9981599646859254e-06, "loss": 25.8516, "step": 5149 }, { "epoch": 0.04875001183252715, "grad_norm": 531.6966552734375, "learning_rate": 1.9981581052266077e-06, "loss": 43.0, "step": 5150 }, { "epoch": 0.0487594778542422, "grad_norm": 335.1936950683594, "learning_rate": 1.9981562448290863e-06, "loss": 34.4531, "step": 5151 }, { "epoch": 0.04876894387595725, "grad_norm": 590.9857788085938, "learning_rate": 1.9981543834933632e-06, "loss": 31.9219, "step": 5152 }, { "epoch": 0.048778409897672306, "grad_norm": 385.0715026855469, "learning_rate": 1.99815252121944e-06, "loss": 26.5703, "step": 5153 }, { "epoch": 0.04878787591938736, "grad_norm": 573.6887817382812, "learning_rate": 1.9981506580073177e-06, "loss": 49.7656, "step": 5154 }, { "epoch": 0.04879734194110241, "grad_norm": 427.3918762207031, "learning_rate": 1.9981487938569994e-06, "loss": 29.9062, "step": 5155 }, { "epoch": 0.048806807962817465, "grad_norm": 458.49200439453125, "learning_rate": 1.998146928768486e-06, "loss": 42.1406, "step": 5156 }, { "epoch": 0.04881627398453252, "grad_norm": 278.4169921875, "learning_rate": 1.998145062741779e-06, "loss": 30.0469, "step": 5157 }, { "epoch": 0.04882574000624757, "grad_norm": 357.3337707519531, "learning_rate": 1.998143195776881e-06, "loss": 42.8281, "step": 5158 }, { "epoch": 0.04883520602796263, "grad_norm": 211.1962127685547, "learning_rate": 1.998141327873793e-06, "loss": 21.7031, "step": 5159 }, { "epoch": 0.048844672049677684, "grad_norm": 476.93682861328125, "learning_rate": 1.9981394590325172e-06, "loss": 38.8281, "step": 5160 }, { "epoch": 0.04885413807139274, "grad_norm": 808.9371337890625, "learning_rate": 1.9981375892530556e-06, "loss": 36.4297, "step": 5161 }, { "epoch": 0.04886360409310779, "grad_norm": 322.5860900878906, "learning_rate": 1.9981357185354093e-06, "loss": 19.1406, "step": 5162 }, { "epoch": 0.04887307011482284, "grad_norm": 517.3023681640625, "learning_rate": 1.9981338468795803e-06, "loss": 28.5859, "step": 5163 }, { "epoch": 0.048882536136537896, "grad_norm": 1200.0032958984375, "learning_rate": 1.9981319742855705e-06, "loss": 48.6406, "step": 5164 }, { "epoch": 0.04889200215825295, "grad_norm": 1156.256591796875, "learning_rate": 1.998130100753382e-06, "loss": 64.7734, "step": 5165 }, { "epoch": 0.048901468179968, "grad_norm": 347.3496398925781, "learning_rate": 1.9981282262830154e-06, "loss": 37.4531, "step": 5166 }, { "epoch": 0.048910934201683055, "grad_norm": 853.1897583007812, "learning_rate": 1.9981263508744735e-06, "loss": 28.125, "step": 5167 }, { "epoch": 0.048920400223398115, "grad_norm": 425.0667724609375, "learning_rate": 1.998124474527758e-06, "loss": 39.2891, "step": 5168 }, { "epoch": 0.04892986624511317, "grad_norm": 529.8400268554688, "learning_rate": 1.9981225972428703e-06, "loss": 47.3906, "step": 5169 }, { "epoch": 0.04893933226682822, "grad_norm": 526.0480346679688, "learning_rate": 1.998120719019812e-06, "loss": 30.625, "step": 5170 }, { "epoch": 0.048948798288543274, "grad_norm": 219.2708740234375, "learning_rate": 1.998118839858585e-06, "loss": 28.0469, "step": 5171 }, { "epoch": 0.04895826431025833, "grad_norm": 289.1834716796875, "learning_rate": 1.998116959759192e-06, "loss": 26.8438, "step": 5172 }, { "epoch": 0.04896773033197338, "grad_norm": 3.072312831878662, "learning_rate": 1.9981150787216334e-06, "loss": 0.7971, "step": 5173 }, { "epoch": 0.04897719635368843, "grad_norm": 691.4142456054688, "learning_rate": 1.9981131967459116e-06, "loss": 58.3906, "step": 5174 }, { "epoch": 0.048986662375403486, "grad_norm": 2.5461976528167725, "learning_rate": 1.9981113138320284e-06, "loss": 0.717, "step": 5175 }, { "epoch": 0.048996128397118546, "grad_norm": 5285.74267578125, "learning_rate": 1.9981094299799856e-06, "loss": 58.4688, "step": 5176 }, { "epoch": 0.0490055944188336, "grad_norm": 721.47314453125, "learning_rate": 1.998107545189785e-06, "loss": 55.4062, "step": 5177 }, { "epoch": 0.04901506044054865, "grad_norm": 779.8312377929688, "learning_rate": 1.998105659461428e-06, "loss": 22.3125, "step": 5178 }, { "epoch": 0.049024526462263705, "grad_norm": 596.8756103515625, "learning_rate": 1.9981037727949164e-06, "loss": 59.625, "step": 5179 }, { "epoch": 0.04903399248397876, "grad_norm": 321.33233642578125, "learning_rate": 1.998101885190253e-06, "loss": 24.6875, "step": 5180 }, { "epoch": 0.04904345850569381, "grad_norm": 422.54559326171875, "learning_rate": 1.998099996647438e-06, "loss": 38.5625, "step": 5181 }, { "epoch": 0.049052924527408864, "grad_norm": 644.8276977539062, "learning_rate": 1.998098107166474e-06, "loss": 29.6836, "step": 5182 }, { "epoch": 0.04906239054912392, "grad_norm": 236.41900634765625, "learning_rate": 1.998096216747363e-06, "loss": 25.1406, "step": 5183 }, { "epoch": 0.04907185657083897, "grad_norm": 1430.39306640625, "learning_rate": 1.9980943253901066e-06, "loss": 53.4062, "step": 5184 }, { "epoch": 0.04908132259255403, "grad_norm": 3.0889763832092285, "learning_rate": 1.998092433094706e-06, "loss": 0.9243, "step": 5185 }, { "epoch": 0.04909078861426908, "grad_norm": 475.5988464355469, "learning_rate": 1.9980905398611636e-06, "loss": 47.4531, "step": 5186 }, { "epoch": 0.049100254635984136, "grad_norm": 258.5972900390625, "learning_rate": 1.998088645689481e-06, "loss": 23.7891, "step": 5187 }, { "epoch": 0.04910972065769919, "grad_norm": 514.62939453125, "learning_rate": 1.9980867505796598e-06, "loss": 31.5, "step": 5188 }, { "epoch": 0.04911918667941424, "grad_norm": 380.1419372558594, "learning_rate": 1.998084854531703e-06, "loss": 20.8828, "step": 5189 }, { "epoch": 0.049128652701129295, "grad_norm": 215.22946166992188, "learning_rate": 1.99808295754561e-06, "loss": 21.2812, "step": 5190 }, { "epoch": 0.04913811872284435, "grad_norm": 517.4558715820312, "learning_rate": 1.9980810596213846e-06, "loss": 54.3516, "step": 5191 }, { "epoch": 0.0491475847445594, "grad_norm": 633.379150390625, "learning_rate": 1.998079160759028e-06, "loss": 31.9609, "step": 5192 }, { "epoch": 0.04915705076627446, "grad_norm": 354.2154235839844, "learning_rate": 1.9980772609585415e-06, "loss": 23.8906, "step": 5193 }, { "epoch": 0.049166516787989514, "grad_norm": 332.4269714355469, "learning_rate": 1.9980753602199283e-06, "loss": 21.0391, "step": 5194 }, { "epoch": 0.04917598280970457, "grad_norm": 525.1338500976562, "learning_rate": 1.9980734585431883e-06, "loss": 51.1172, "step": 5195 }, { "epoch": 0.04918544883141962, "grad_norm": 418.5876159667969, "learning_rate": 1.998071555928324e-06, "loss": 38.875, "step": 5196 }, { "epoch": 0.04919491485313467, "grad_norm": 333.3221435546875, "learning_rate": 1.998069652375338e-06, "loss": 25.3125, "step": 5197 }, { "epoch": 0.049204380874849726, "grad_norm": 492.2866516113281, "learning_rate": 1.9980677478842312e-06, "loss": 36.7969, "step": 5198 }, { "epoch": 0.04921384689656478, "grad_norm": 1126.69384765625, "learning_rate": 1.9980658424550057e-06, "loss": 41.9219, "step": 5199 }, { "epoch": 0.04922331291827983, "grad_norm": 265.4482727050781, "learning_rate": 1.9980639360876633e-06, "loss": 31.4531, "step": 5200 }, { "epoch": 0.049232778939994885, "grad_norm": 241.1100616455078, "learning_rate": 1.9980620287822056e-06, "loss": 24.7578, "step": 5201 }, { "epoch": 0.049242244961709945, "grad_norm": 680.2837524414062, "learning_rate": 1.9980601205386343e-06, "loss": 22.7188, "step": 5202 }, { "epoch": 0.049251710983425, "grad_norm": 1710.6566162109375, "learning_rate": 1.9980582113569517e-06, "loss": 45.1094, "step": 5203 }, { "epoch": 0.04926117700514005, "grad_norm": 261.5258483886719, "learning_rate": 1.9980563012371592e-06, "loss": 26.9531, "step": 5204 }, { "epoch": 0.049270643026855104, "grad_norm": 409.3571472167969, "learning_rate": 1.998054390179259e-06, "loss": 30.4609, "step": 5205 }, { "epoch": 0.04928010904857016, "grad_norm": 993.267822265625, "learning_rate": 1.9980524781832524e-06, "loss": 56.0312, "step": 5206 }, { "epoch": 0.04928957507028521, "grad_norm": 1005.1253051757812, "learning_rate": 1.9980505652491413e-06, "loss": 42.7969, "step": 5207 }, { "epoch": 0.04929904109200026, "grad_norm": 587.9796142578125, "learning_rate": 1.9980486513769277e-06, "loss": 51.0547, "step": 5208 }, { "epoch": 0.049308507113715316, "grad_norm": 554.8817749023438, "learning_rate": 1.9980467365666133e-06, "loss": 52.5156, "step": 5209 }, { "epoch": 0.04931797313543037, "grad_norm": 392.9414367675781, "learning_rate": 1.9980448208182e-06, "loss": 33.2891, "step": 5210 }, { "epoch": 0.04932743915714543, "grad_norm": 226.76649475097656, "learning_rate": 1.9980429041316894e-06, "loss": 20.5234, "step": 5211 }, { "epoch": 0.04933690517886048, "grad_norm": 989.1110229492188, "learning_rate": 1.998040986507083e-06, "loss": 36.8906, "step": 5212 }, { "epoch": 0.049346371200575535, "grad_norm": 609.2036743164062, "learning_rate": 1.998039067944384e-06, "loss": 51.8125, "step": 5213 }, { "epoch": 0.04935583722229059, "grad_norm": 2.7856369018554688, "learning_rate": 1.9980371484435923e-06, "loss": 0.8857, "step": 5214 }, { "epoch": 0.04936530324400564, "grad_norm": 390.9459228515625, "learning_rate": 1.998035228004711e-06, "loss": 32.4531, "step": 5215 }, { "epoch": 0.049374769265720694, "grad_norm": 2.906804084777832, "learning_rate": 1.998033306627741e-06, "loss": 0.7136, "step": 5216 }, { "epoch": 0.04938423528743575, "grad_norm": 598.6974487304688, "learning_rate": 1.998031384312685e-06, "loss": 34.375, "step": 5217 }, { "epoch": 0.0493937013091508, "grad_norm": 691.2288208007812, "learning_rate": 1.9980294610595445e-06, "loss": 56.3438, "step": 5218 }, { "epoch": 0.04940316733086586, "grad_norm": 579.7899780273438, "learning_rate": 1.998027536868321e-06, "loss": 20.2812, "step": 5219 }, { "epoch": 0.04941263335258091, "grad_norm": 375.57330322265625, "learning_rate": 1.9980256117390168e-06, "loss": 29.1562, "step": 5220 }, { "epoch": 0.049422099374295966, "grad_norm": 232.38436889648438, "learning_rate": 1.9980236856716336e-06, "loss": 24.3047, "step": 5221 }, { "epoch": 0.04943156539601102, "grad_norm": 3.0018670558929443, "learning_rate": 1.9980217586661725e-06, "loss": 1.02, "step": 5222 }, { "epoch": 0.04944103141772607, "grad_norm": 323.08966064453125, "learning_rate": 1.9980198307226363e-06, "loss": 26.5156, "step": 5223 }, { "epoch": 0.049450497439441125, "grad_norm": 634.68994140625, "learning_rate": 1.998017901841026e-06, "loss": 58.2031, "step": 5224 }, { "epoch": 0.04945996346115618, "grad_norm": 1589.779052734375, "learning_rate": 1.9980159720213445e-06, "loss": 48.8516, "step": 5225 }, { "epoch": 0.04946942948287123, "grad_norm": 580.04248046875, "learning_rate": 1.9980140412635926e-06, "loss": 45.0469, "step": 5226 }, { "epoch": 0.049478895504586284, "grad_norm": 552.8474731445312, "learning_rate": 1.998012109567772e-06, "loss": 38.8906, "step": 5227 }, { "epoch": 0.049488361526301344, "grad_norm": 493.69024658203125, "learning_rate": 1.998010176933885e-06, "loss": 30.8281, "step": 5228 }, { "epoch": 0.0494978275480164, "grad_norm": 314.4693603515625, "learning_rate": 1.998008243361934e-06, "loss": 33.3906, "step": 5229 }, { "epoch": 0.04950729356973145, "grad_norm": 642.1728515625, "learning_rate": 1.9980063088519197e-06, "loss": 24.1055, "step": 5230 }, { "epoch": 0.0495167595914465, "grad_norm": 434.8253173828125, "learning_rate": 1.9980043734038444e-06, "loss": 35.6719, "step": 5231 }, { "epoch": 0.049526225613161556, "grad_norm": 471.2936096191406, "learning_rate": 1.9980024370177098e-06, "loss": 36.125, "step": 5232 }, { "epoch": 0.04953569163487661, "grad_norm": 3.1551895141601562, "learning_rate": 1.9980004996935184e-06, "loss": 0.8281, "step": 5233 }, { "epoch": 0.04954515765659166, "grad_norm": 488.4020080566406, "learning_rate": 1.997998561431271e-06, "loss": 32.4219, "step": 5234 }, { "epoch": 0.049554623678306715, "grad_norm": 326.4213562011719, "learning_rate": 1.99799662223097e-06, "loss": 24.3906, "step": 5235 }, { "epoch": 0.04956408970002177, "grad_norm": 2.9514169692993164, "learning_rate": 1.997994682092617e-06, "loss": 0.8796, "step": 5236 }, { "epoch": 0.04957355572173683, "grad_norm": 916.8401489257812, "learning_rate": 1.997992741016214e-06, "loss": 40.2188, "step": 5237 }, { "epoch": 0.04958302174345188, "grad_norm": 151.92575073242188, "learning_rate": 1.9979907990017626e-06, "loss": 20.7266, "step": 5238 }, { "epoch": 0.049592487765166934, "grad_norm": 245.1997528076172, "learning_rate": 1.9979888560492647e-06, "loss": 28.4688, "step": 5239 }, { "epoch": 0.04960195378688199, "grad_norm": 743.9102172851562, "learning_rate": 1.9979869121587227e-06, "loss": 58.9219, "step": 5240 }, { "epoch": 0.04961141980859704, "grad_norm": 245.05882263183594, "learning_rate": 1.9979849673301374e-06, "loss": 21.1562, "step": 5241 }, { "epoch": 0.04962088583031209, "grad_norm": 367.3678894042969, "learning_rate": 1.9979830215635114e-06, "loss": 25.0469, "step": 5242 }, { "epoch": 0.049630351852027146, "grad_norm": 356.7420654296875, "learning_rate": 1.9979810748588464e-06, "loss": 21.7344, "step": 5243 }, { "epoch": 0.0496398178737422, "grad_norm": 400.4414978027344, "learning_rate": 1.9979791272161437e-06, "loss": 28.1562, "step": 5244 }, { "epoch": 0.04964928389545726, "grad_norm": 614.1138305664062, "learning_rate": 1.997977178635406e-06, "loss": 24.3125, "step": 5245 }, { "epoch": 0.04965874991717231, "grad_norm": 884.127685546875, "learning_rate": 1.9979752291166343e-06, "loss": 50.2969, "step": 5246 }, { "epoch": 0.049668215938887365, "grad_norm": 214.02169799804688, "learning_rate": 1.997973278659831e-06, "loss": 34.7812, "step": 5247 }, { "epoch": 0.04967768196060242, "grad_norm": 256.9007568359375, "learning_rate": 1.9979713272649977e-06, "loss": 23.6875, "step": 5248 }, { "epoch": 0.04968714798231747, "grad_norm": 700.6049194335938, "learning_rate": 1.9979693749321363e-06, "loss": 62.4375, "step": 5249 }, { "epoch": 0.049696614004032524, "grad_norm": 243.4370574951172, "learning_rate": 1.9979674216612486e-06, "loss": 23.5234, "step": 5250 }, { "epoch": 0.04970608002574758, "grad_norm": 721.7056274414062, "learning_rate": 1.9979654674523367e-06, "loss": 39.8125, "step": 5251 }, { "epoch": 0.04971554604746263, "grad_norm": 176.00653076171875, "learning_rate": 1.997963512305402e-06, "loss": 21.9609, "step": 5252 }, { "epoch": 0.04972501206917768, "grad_norm": 398.43841552734375, "learning_rate": 1.9979615562204463e-06, "loss": 34.8125, "step": 5253 }, { "epoch": 0.04973447809089274, "grad_norm": 3.1930084228515625, "learning_rate": 1.9979595991974716e-06, "loss": 0.8975, "step": 5254 }, { "epoch": 0.049743944112607796, "grad_norm": 361.5784912109375, "learning_rate": 1.9979576412364803e-06, "loss": 19.8125, "step": 5255 }, { "epoch": 0.04975341013432285, "grad_norm": 1539.3179931640625, "learning_rate": 1.9979556823374733e-06, "loss": 51.9062, "step": 5256 }, { "epoch": 0.0497628761560379, "grad_norm": 432.0285949707031, "learning_rate": 1.997953722500453e-06, "loss": 34.375, "step": 5257 }, { "epoch": 0.049772342177752955, "grad_norm": 608.7377319335938, "learning_rate": 1.9979517617254215e-06, "loss": 44.3906, "step": 5258 }, { "epoch": 0.04978180819946801, "grad_norm": 283.7597351074219, "learning_rate": 1.9979498000123796e-06, "loss": 34.3281, "step": 5259 }, { "epoch": 0.04979127422118306, "grad_norm": 1748.102783203125, "learning_rate": 1.9979478373613304e-06, "loss": 67.8984, "step": 5260 }, { "epoch": 0.049800740242898114, "grad_norm": 511.7937316894531, "learning_rate": 1.997945873772275e-06, "loss": 20.4844, "step": 5261 }, { "epoch": 0.049810206264613174, "grad_norm": 567.5361938476562, "learning_rate": 1.997943909245215e-06, "loss": 31.5234, "step": 5262 }, { "epoch": 0.04981967228632823, "grad_norm": 477.16253662109375, "learning_rate": 1.997941943780153e-06, "loss": 49.9688, "step": 5263 }, { "epoch": 0.04982913830804328, "grad_norm": 392.032958984375, "learning_rate": 1.99793997737709e-06, "loss": 25.3906, "step": 5264 }, { "epoch": 0.04983860432975833, "grad_norm": 412.4135437011719, "learning_rate": 1.9979380100360294e-06, "loss": 27.3672, "step": 5265 }, { "epoch": 0.049848070351473386, "grad_norm": 203.4495391845703, "learning_rate": 1.9979360417569715e-06, "loss": 26.4219, "step": 5266 }, { "epoch": 0.04985753637318844, "grad_norm": 331.6919860839844, "learning_rate": 1.997934072539918e-06, "loss": 28.4453, "step": 5267 }, { "epoch": 0.04986700239490349, "grad_norm": 682.936279296875, "learning_rate": 1.997932102384872e-06, "loss": 37.0781, "step": 5268 }, { "epoch": 0.049876468416618545, "grad_norm": 345.3226623535156, "learning_rate": 1.9979301312918347e-06, "loss": 32.3594, "step": 5269 }, { "epoch": 0.0498859344383336, "grad_norm": 168.68975830078125, "learning_rate": 1.997928159260808e-06, "loss": 23.6719, "step": 5270 }, { "epoch": 0.04989540046004866, "grad_norm": 440.9328918457031, "learning_rate": 1.9979261862917934e-06, "loss": 46.0312, "step": 5271 }, { "epoch": 0.04990486648176371, "grad_norm": 996.1240844726562, "learning_rate": 1.9979242123847936e-06, "loss": 22.6875, "step": 5272 }, { "epoch": 0.049914332503478764, "grad_norm": 1153.6104736328125, "learning_rate": 1.99792223753981e-06, "loss": 37.6484, "step": 5273 }, { "epoch": 0.04992379852519382, "grad_norm": 413.0576171875, "learning_rate": 1.997920261756844e-06, "loss": 35.9062, "step": 5274 }, { "epoch": 0.04993326454690887, "grad_norm": 552.7859497070312, "learning_rate": 1.997918285035898e-06, "loss": 52.625, "step": 5275 }, { "epoch": 0.04994273056862392, "grad_norm": 296.2982177734375, "learning_rate": 1.9979163073769736e-06, "loss": 27.5312, "step": 5276 }, { "epoch": 0.049952196590338976, "grad_norm": 561.613525390625, "learning_rate": 1.997914328780073e-06, "loss": 21.9648, "step": 5277 }, { "epoch": 0.04996166261205403, "grad_norm": 573.782958984375, "learning_rate": 1.997912349245198e-06, "loss": 43.625, "step": 5278 }, { "epoch": 0.04997112863376908, "grad_norm": 371.718505859375, "learning_rate": 1.99791036877235e-06, "loss": 27.0781, "step": 5279 }, { "epoch": 0.04998059465548414, "grad_norm": 309.628173828125, "learning_rate": 1.997908387361531e-06, "loss": 28.2734, "step": 5280 }, { "epoch": 0.049990060677199195, "grad_norm": 391.4361877441406, "learning_rate": 1.997906405012744e-06, "loss": 24.5938, "step": 5281 }, { "epoch": 0.04999952669891425, "grad_norm": 3.188462018966675, "learning_rate": 1.9979044217259888e-06, "loss": 0.8979, "step": 5282 }, { "epoch": 0.0500089927206293, "grad_norm": 245.1901397705078, "learning_rate": 1.997902437501269e-06, "loss": 26.4062, "step": 5283 }, { "epoch": 0.050018458742344354, "grad_norm": 1036.894775390625, "learning_rate": 1.9979004523385852e-06, "loss": 82.7344, "step": 5284 }, { "epoch": 0.05002792476405941, "grad_norm": 427.4485168457031, "learning_rate": 1.99789846623794e-06, "loss": 28.0977, "step": 5285 }, { "epoch": 0.05003739078577446, "grad_norm": 546.472412109375, "learning_rate": 1.997896479199336e-06, "loss": 36.3438, "step": 5286 }, { "epoch": 0.05004685680748951, "grad_norm": 416.75482177734375, "learning_rate": 1.9978944912227735e-06, "loss": 50.5625, "step": 5287 }, { "epoch": 0.05005632282920457, "grad_norm": 428.3577575683594, "learning_rate": 1.9978925023082553e-06, "loss": 58.8125, "step": 5288 }, { "epoch": 0.050065788850919626, "grad_norm": 621.1582641601562, "learning_rate": 1.997890512455783e-06, "loss": 27.5, "step": 5289 }, { "epoch": 0.05007525487263468, "grad_norm": 322.2375183105469, "learning_rate": 1.9978885216653585e-06, "loss": 23.1562, "step": 5290 }, { "epoch": 0.05008472089434973, "grad_norm": 461.04229736328125, "learning_rate": 1.997886529936984e-06, "loss": 37.6016, "step": 5291 }, { "epoch": 0.050094186916064785, "grad_norm": 312.9264831542969, "learning_rate": 1.9978845372706606e-06, "loss": 24.625, "step": 5292 }, { "epoch": 0.05010365293777984, "grad_norm": 544.4048461914062, "learning_rate": 1.997882543666391e-06, "loss": 29.0156, "step": 5293 }, { "epoch": 0.05011311895949489, "grad_norm": 797.8146362304688, "learning_rate": 1.9978805491241768e-06, "loss": 55.4219, "step": 5294 }, { "epoch": 0.050122584981209944, "grad_norm": 332.91827392578125, "learning_rate": 1.99787855364402e-06, "loss": 29.7266, "step": 5295 }, { "epoch": 0.050132051002925, "grad_norm": 416.5557861328125, "learning_rate": 1.9978765572259214e-06, "loss": 29.0781, "step": 5296 }, { "epoch": 0.05014151702464006, "grad_norm": 283.68731689453125, "learning_rate": 1.9978745598698843e-06, "loss": 23.0625, "step": 5297 }, { "epoch": 0.05015098304635511, "grad_norm": 753.0687255859375, "learning_rate": 1.9978725615759103e-06, "loss": 34.9375, "step": 5298 }, { "epoch": 0.05016044906807016, "grad_norm": 186.77285766601562, "learning_rate": 1.9978705623440005e-06, "loss": 26.7734, "step": 5299 }, { "epoch": 0.050169915089785216, "grad_norm": 432.09796142578125, "learning_rate": 1.9978685621741577e-06, "loss": 23.1797, "step": 5300 }, { "epoch": 0.05017938111150027, "grad_norm": 939.197998046875, "learning_rate": 1.9978665610663833e-06, "loss": 47.2969, "step": 5301 }, { "epoch": 0.05018884713321532, "grad_norm": 273.6300964355469, "learning_rate": 1.997864559020679e-06, "loss": 25.0625, "step": 5302 }, { "epoch": 0.050198313154930375, "grad_norm": 315.52984619140625, "learning_rate": 1.997862556037047e-06, "loss": 22.2656, "step": 5303 }, { "epoch": 0.05020777917664543, "grad_norm": 893.2252807617188, "learning_rate": 1.997860552115489e-06, "loss": 48.5312, "step": 5304 }, { "epoch": 0.05021724519836049, "grad_norm": 342.21270751953125, "learning_rate": 1.9978585472560073e-06, "loss": 23.2969, "step": 5305 }, { "epoch": 0.05022671122007554, "grad_norm": 471.5636901855469, "learning_rate": 1.9978565414586033e-06, "loss": 34.9062, "step": 5306 }, { "epoch": 0.050236177241790594, "grad_norm": 594.0134887695312, "learning_rate": 1.9978545347232793e-06, "loss": 35.5, "step": 5307 }, { "epoch": 0.05024564326350565, "grad_norm": 246.68966674804688, "learning_rate": 1.997852527050036e-06, "loss": 21.2969, "step": 5308 }, { "epoch": 0.0502551092852207, "grad_norm": 632.1905517578125, "learning_rate": 1.9978505184388777e-06, "loss": 27.6875, "step": 5309 }, { "epoch": 0.05026457530693575, "grad_norm": 299.65582275390625, "learning_rate": 1.997848508889804e-06, "loss": 37.8125, "step": 5310 }, { "epoch": 0.050274041328650806, "grad_norm": 255.455078125, "learning_rate": 1.9978464984028176e-06, "loss": 28.0234, "step": 5311 }, { "epoch": 0.05028350735036586, "grad_norm": 3.099949359893799, "learning_rate": 1.9978444869779207e-06, "loss": 0.9731, "step": 5312 }, { "epoch": 0.05029297337208091, "grad_norm": 193.1591033935547, "learning_rate": 1.9978424746151148e-06, "loss": 20.5156, "step": 5313 }, { "epoch": 0.05030243939379597, "grad_norm": 582.0731201171875, "learning_rate": 1.9978404613144016e-06, "loss": 31.4492, "step": 5314 }, { "epoch": 0.050311905415511025, "grad_norm": 392.0903625488281, "learning_rate": 1.9978384470757836e-06, "loss": 28.8594, "step": 5315 }, { "epoch": 0.05032137143722608, "grad_norm": 341.2526550292969, "learning_rate": 1.997836431899262e-06, "loss": 17.3164, "step": 5316 }, { "epoch": 0.05033083745894113, "grad_norm": 2.517918586730957, "learning_rate": 1.9978344157848393e-06, "loss": 0.8789, "step": 5317 }, { "epoch": 0.050340303480656184, "grad_norm": 3.3505351543426514, "learning_rate": 1.9978323987325172e-06, "loss": 1.0996, "step": 5318 }, { "epoch": 0.05034976950237124, "grad_norm": 492.8443298339844, "learning_rate": 1.997830380742298e-06, "loss": 56.125, "step": 5319 }, { "epoch": 0.05035923552408629, "grad_norm": 540.8740844726562, "learning_rate": 1.9978283618141825e-06, "loss": 32.625, "step": 5320 }, { "epoch": 0.05036870154580134, "grad_norm": 343.8959655761719, "learning_rate": 1.9978263419481737e-06, "loss": 40.7109, "step": 5321 }, { "epoch": 0.050378167567516396, "grad_norm": 337.7358703613281, "learning_rate": 1.9978243211442728e-06, "loss": 17.4922, "step": 5322 }, { "epoch": 0.050387633589231456, "grad_norm": 361.9353332519531, "learning_rate": 1.997822299402482e-06, "loss": 22.1328, "step": 5323 }, { "epoch": 0.05039709961094651, "grad_norm": 202.66880798339844, "learning_rate": 1.997820276722803e-06, "loss": 23.7891, "step": 5324 }, { "epoch": 0.05040656563266156, "grad_norm": 2.9936141967773438, "learning_rate": 1.997818253105238e-06, "loss": 0.9814, "step": 5325 }, { "epoch": 0.050416031654376615, "grad_norm": 548.0919189453125, "learning_rate": 1.9978162285497885e-06, "loss": 53.9375, "step": 5326 }, { "epoch": 0.05042549767609167, "grad_norm": 267.9744567871094, "learning_rate": 1.997814203056457e-06, "loss": 25.4375, "step": 5327 }, { "epoch": 0.05043496369780672, "grad_norm": 585.6610107421875, "learning_rate": 1.9978121766252453e-06, "loss": 29.1406, "step": 5328 }, { "epoch": 0.050444429719521774, "grad_norm": 233.8960418701172, "learning_rate": 1.9978101492561542e-06, "loss": 28.1094, "step": 5329 }, { "epoch": 0.05045389574123683, "grad_norm": 431.9930114746094, "learning_rate": 1.9978081209491872e-06, "loss": 29.5312, "step": 5330 }, { "epoch": 0.05046336176295189, "grad_norm": 528.4657592773438, "learning_rate": 1.9978060917043455e-06, "loss": 69.125, "step": 5331 }, { "epoch": 0.05047282778466694, "grad_norm": 353.7773742675781, "learning_rate": 1.9978040615216304e-06, "loss": 38.2812, "step": 5332 }, { "epoch": 0.05048229380638199, "grad_norm": 645.0875854492188, "learning_rate": 1.997802030401045e-06, "loss": 62.9844, "step": 5333 }, { "epoch": 0.050491759828097046, "grad_norm": 3.1926827430725098, "learning_rate": 1.9977999983425905e-06, "loss": 0.9255, "step": 5334 }, { "epoch": 0.0505012258498121, "grad_norm": 591.0059204101562, "learning_rate": 1.9977979653462686e-06, "loss": 31.9688, "step": 5335 }, { "epoch": 0.05051069187152715, "grad_norm": 433.75018310546875, "learning_rate": 1.9977959314120814e-06, "loss": 36.6953, "step": 5336 }, { "epoch": 0.050520157893242205, "grad_norm": 253.76461791992188, "learning_rate": 1.9977938965400314e-06, "loss": 20.4844, "step": 5337 }, { "epoch": 0.05052962391495726, "grad_norm": 694.1417846679688, "learning_rate": 1.99779186073012e-06, "loss": 46.0625, "step": 5338 }, { "epoch": 0.05053908993667231, "grad_norm": 172.93321228027344, "learning_rate": 1.9977898239823488e-06, "loss": 23.4766, "step": 5339 }, { "epoch": 0.05054855595838737, "grad_norm": 295.9559020996094, "learning_rate": 1.9977877862967204e-06, "loss": 23.0938, "step": 5340 }, { "epoch": 0.050558021980102424, "grad_norm": 442.8326721191406, "learning_rate": 1.9977857476732364e-06, "loss": 25.5156, "step": 5341 }, { "epoch": 0.05056748800181748, "grad_norm": 172.93882751464844, "learning_rate": 1.9977837081118985e-06, "loss": 24.8438, "step": 5342 }, { "epoch": 0.05057695402353253, "grad_norm": 170.6522216796875, "learning_rate": 1.9977816676127087e-06, "loss": 22.5469, "step": 5343 }, { "epoch": 0.05058642004524758, "grad_norm": 570.1143798828125, "learning_rate": 1.9977796261756693e-06, "loss": 25.375, "step": 5344 }, { "epoch": 0.050595886066962636, "grad_norm": 434.2060241699219, "learning_rate": 1.9977775838007823e-06, "loss": 10.5508, "step": 5345 }, { "epoch": 0.05060535208867769, "grad_norm": 382.55718994140625, "learning_rate": 1.997775540488049e-06, "loss": 31.7969, "step": 5346 }, { "epoch": 0.05061481811039274, "grad_norm": 429.2646179199219, "learning_rate": 1.9977734962374714e-06, "loss": 32.6406, "step": 5347 }, { "epoch": 0.0506242841321078, "grad_norm": 328.7024230957031, "learning_rate": 1.9977714510490517e-06, "loss": 42.25, "step": 5348 }, { "epoch": 0.050633750153822855, "grad_norm": 667.7344970703125, "learning_rate": 1.9977694049227917e-06, "loss": 33.2891, "step": 5349 }, { "epoch": 0.05064321617553791, "grad_norm": 428.8728332519531, "learning_rate": 1.9977673578586934e-06, "loss": 33.1562, "step": 5350 }, { "epoch": 0.05065268219725296, "grad_norm": 208.6680450439453, "learning_rate": 1.997765309856759e-06, "loss": 27.8906, "step": 5351 }, { "epoch": 0.050662148218968014, "grad_norm": 144.12509155273438, "learning_rate": 1.9977632609169897e-06, "loss": 22.3281, "step": 5352 }, { "epoch": 0.05067161424068307, "grad_norm": 3.376955986022949, "learning_rate": 1.997761211039388e-06, "loss": 0.8599, "step": 5353 }, { "epoch": 0.05068108026239812, "grad_norm": 407.7967834472656, "learning_rate": 1.9977591602239558e-06, "loss": 19.7344, "step": 5354 }, { "epoch": 0.050690546284113173, "grad_norm": 297.9488525390625, "learning_rate": 1.9977571084706945e-06, "loss": 31.75, "step": 5355 }, { "epoch": 0.050700012305828226, "grad_norm": 1532.1309814453125, "learning_rate": 1.997755055779607e-06, "loss": 37.0781, "step": 5356 }, { "epoch": 0.050709478327543286, "grad_norm": 624.8471069335938, "learning_rate": 1.997753002150694e-06, "loss": 24.9531, "step": 5357 }, { "epoch": 0.05071894434925834, "grad_norm": 415.9170227050781, "learning_rate": 1.9977509475839587e-06, "loss": 25.4219, "step": 5358 }, { "epoch": 0.05072841037097339, "grad_norm": 384.15435791015625, "learning_rate": 1.997748892079402e-06, "loss": 28.1523, "step": 5359 }, { "epoch": 0.050737876392688445, "grad_norm": 284.5003967285156, "learning_rate": 1.9977468356370265e-06, "loss": 20.1875, "step": 5360 }, { "epoch": 0.0507473424144035, "grad_norm": 239.40980529785156, "learning_rate": 1.997744778256834e-06, "loss": 23.5469, "step": 5361 }, { "epoch": 0.05075680843611855, "grad_norm": 679.1757202148438, "learning_rate": 1.997742719938826e-06, "loss": 26.7188, "step": 5362 }, { "epoch": 0.050766274457833604, "grad_norm": 313.8531188964844, "learning_rate": 1.997740660683005e-06, "loss": 23.0938, "step": 5363 }, { "epoch": 0.05077574047954866, "grad_norm": 466.4092102050781, "learning_rate": 1.9977386004893722e-06, "loss": 13.7969, "step": 5364 }, { "epoch": 0.05078520650126371, "grad_norm": 341.6548156738281, "learning_rate": 1.9977365393579303e-06, "loss": 26.75, "step": 5365 }, { "epoch": 0.05079467252297877, "grad_norm": 438.5145568847656, "learning_rate": 1.997734477288681e-06, "loss": 39.2344, "step": 5366 }, { "epoch": 0.05080413854469382, "grad_norm": 483.2239074707031, "learning_rate": 1.997732414281626e-06, "loss": 44.7266, "step": 5367 }, { "epoch": 0.050813604566408876, "grad_norm": 289.5129089355469, "learning_rate": 1.997730350336768e-06, "loss": 11.4648, "step": 5368 }, { "epoch": 0.05082307058812393, "grad_norm": 751.6818237304688, "learning_rate": 1.9977282854541083e-06, "loss": 77.3359, "step": 5369 }, { "epoch": 0.05083253660983898, "grad_norm": 310.1125183105469, "learning_rate": 1.9977262196336487e-06, "loss": 27.5, "step": 5370 }, { "epoch": 0.050842002631554035, "grad_norm": 437.0325012207031, "learning_rate": 1.997724152875391e-06, "loss": 25.3203, "step": 5371 }, { "epoch": 0.05085146865326909, "grad_norm": 266.029541015625, "learning_rate": 1.997722085179338e-06, "loss": 26.3125, "step": 5372 }, { "epoch": 0.05086093467498414, "grad_norm": 277.61279296875, "learning_rate": 1.9977200165454906e-06, "loss": 28.9531, "step": 5373 }, { "epoch": 0.0508704006966992, "grad_norm": 379.05401611328125, "learning_rate": 1.997717946973852e-06, "loss": 24.375, "step": 5374 }, { "epoch": 0.050879866718414254, "grad_norm": 471.94110107421875, "learning_rate": 1.997715876464423e-06, "loss": 28.5156, "step": 5375 }, { "epoch": 0.05088933274012931, "grad_norm": 288.6314697265625, "learning_rate": 1.997713805017206e-06, "loss": 23.3672, "step": 5376 }, { "epoch": 0.05089879876184436, "grad_norm": 230.88404846191406, "learning_rate": 1.997711732632203e-06, "loss": 27.4375, "step": 5377 }, { "epoch": 0.050908264783559413, "grad_norm": 408.583740234375, "learning_rate": 1.997709659309416e-06, "loss": 27.0078, "step": 5378 }, { "epoch": 0.050917730805274466, "grad_norm": 302.6225280761719, "learning_rate": 1.9977075850488465e-06, "loss": 26.6016, "step": 5379 }, { "epoch": 0.05092719682698952, "grad_norm": 247.5236358642578, "learning_rate": 1.9977055098504972e-06, "loss": 21.7812, "step": 5380 }, { "epoch": 0.05093666284870457, "grad_norm": 444.846435546875, "learning_rate": 1.9977034337143694e-06, "loss": 34.875, "step": 5381 }, { "epoch": 0.050946128870419626, "grad_norm": 250.2587432861328, "learning_rate": 1.997701356640465e-06, "loss": 25.3984, "step": 5382 }, { "epoch": 0.050955594892134685, "grad_norm": 169.0071563720703, "learning_rate": 1.997699278628787e-06, "loss": 17.0391, "step": 5383 }, { "epoch": 0.05096506091384974, "grad_norm": 192.25213623046875, "learning_rate": 1.9976971996793357e-06, "loss": 25.5859, "step": 5384 }, { "epoch": 0.05097452693556479, "grad_norm": 381.0699157714844, "learning_rate": 1.9976951197921144e-06, "loss": 20.6094, "step": 5385 }, { "epoch": 0.050983992957279844, "grad_norm": 898.5232543945312, "learning_rate": 1.9976930389671247e-06, "loss": 57.5156, "step": 5386 }, { "epoch": 0.0509934589789949, "grad_norm": 270.2722473144531, "learning_rate": 1.9976909572043684e-06, "loss": 26.6172, "step": 5387 }, { "epoch": 0.05100292500070995, "grad_norm": 782.7325439453125, "learning_rate": 1.997688874503847e-06, "loss": 53.0469, "step": 5388 }, { "epoch": 0.051012391022425004, "grad_norm": 376.7284851074219, "learning_rate": 1.9976867908655636e-06, "loss": 42.6719, "step": 5389 }, { "epoch": 0.051021857044140057, "grad_norm": 324.336181640625, "learning_rate": 1.9976847062895194e-06, "loss": 24.9062, "step": 5390 }, { "epoch": 0.051031323065855116, "grad_norm": 285.356689453125, "learning_rate": 1.997682620775716e-06, "loss": 27.2344, "step": 5391 }, { "epoch": 0.05104078908757017, "grad_norm": 146.22853088378906, "learning_rate": 1.9976805343241564e-06, "loss": 18.9531, "step": 5392 }, { "epoch": 0.05105025510928522, "grad_norm": 3.0180344581604004, "learning_rate": 1.997678446934842e-06, "loss": 0.8267, "step": 5393 }, { "epoch": 0.051059721131000275, "grad_norm": 195.66775512695312, "learning_rate": 1.9976763586077745e-06, "loss": 24.8203, "step": 5394 }, { "epoch": 0.05106918715271533, "grad_norm": 433.33331298828125, "learning_rate": 1.997674269342956e-06, "loss": 25.9766, "step": 5395 }, { "epoch": 0.05107865317443038, "grad_norm": 3.14933180809021, "learning_rate": 1.997672179140389e-06, "loss": 0.9961, "step": 5396 }, { "epoch": 0.051088119196145435, "grad_norm": 156.66604614257812, "learning_rate": 1.9976700880000744e-06, "loss": 21.9219, "step": 5397 }, { "epoch": 0.05109758521786049, "grad_norm": 300.00494384765625, "learning_rate": 1.9976679959220153e-06, "loss": 24.4766, "step": 5398 }, { "epoch": 0.05110705123957554, "grad_norm": 377.45318603515625, "learning_rate": 1.997665902906213e-06, "loss": 34.5391, "step": 5399 }, { "epoch": 0.0511165172612906, "grad_norm": 195.10887145996094, "learning_rate": 1.99766380895267e-06, "loss": 31.5781, "step": 5400 }, { "epoch": 0.051125983283005653, "grad_norm": 552.6152954101562, "learning_rate": 1.9976617140613876e-06, "loss": 56.5625, "step": 5401 }, { "epoch": 0.051135449304720706, "grad_norm": 241.9661102294922, "learning_rate": 1.9976596182323685e-06, "loss": 33.0469, "step": 5402 }, { "epoch": 0.05114491532643576, "grad_norm": 370.9960021972656, "learning_rate": 1.997657521465614e-06, "loss": 41.1875, "step": 5403 }, { "epoch": 0.05115438134815081, "grad_norm": 461.7136535644531, "learning_rate": 1.997655423761126e-06, "loss": 38.5781, "step": 5404 }, { "epoch": 0.051163847369865866, "grad_norm": 3.225895404815674, "learning_rate": 1.9976533251189073e-06, "loss": 0.8521, "step": 5405 }, { "epoch": 0.05117331339158092, "grad_norm": 501.816162109375, "learning_rate": 1.997651225538959e-06, "loss": 33.8984, "step": 5406 }, { "epoch": 0.05118277941329597, "grad_norm": 864.1810913085938, "learning_rate": 1.9976491250212833e-06, "loss": 94.5391, "step": 5407 }, { "epoch": 0.051192245435011025, "grad_norm": 397.95654296875, "learning_rate": 1.997647023565883e-06, "loss": 30.9844, "step": 5408 }, { "epoch": 0.051201711456726084, "grad_norm": 208.98770141601562, "learning_rate": 1.997644921172759e-06, "loss": 17.0547, "step": 5409 }, { "epoch": 0.05121117747844114, "grad_norm": 296.62060546875, "learning_rate": 1.9976428178419134e-06, "loss": 52.0469, "step": 5410 }, { "epoch": 0.05122064350015619, "grad_norm": 513.4114990234375, "learning_rate": 1.997640713573349e-06, "loss": 19.5117, "step": 5411 }, { "epoch": 0.051230109521871244, "grad_norm": 286.5925598144531, "learning_rate": 1.9976386083670673e-06, "loss": 22.375, "step": 5412 }, { "epoch": 0.051239575543586297, "grad_norm": 404.5957946777344, "learning_rate": 1.9976365022230697e-06, "loss": 49.3906, "step": 5413 }, { "epoch": 0.05124904156530135, "grad_norm": 468.12152099609375, "learning_rate": 1.997634395141359e-06, "loss": 45.375, "step": 5414 }, { "epoch": 0.0512585075870164, "grad_norm": 557.3435668945312, "learning_rate": 1.997632287121937e-06, "loss": 36.0625, "step": 5415 }, { "epoch": 0.051267973608731456, "grad_norm": 513.0791015625, "learning_rate": 1.997630178164805e-06, "loss": 25.875, "step": 5416 }, { "epoch": 0.051277439630446515, "grad_norm": 294.3284606933594, "learning_rate": 1.997628068269966e-06, "loss": 30.4062, "step": 5417 }, { "epoch": 0.05128690565216157, "grad_norm": 494.54364013671875, "learning_rate": 1.997625957437421e-06, "loss": 31.4766, "step": 5418 }, { "epoch": 0.05129637167387662, "grad_norm": 323.341064453125, "learning_rate": 1.997623845667173e-06, "loss": 23.9766, "step": 5419 }, { "epoch": 0.051305837695591675, "grad_norm": 488.78948974609375, "learning_rate": 1.9976217329592235e-06, "loss": 43.125, "step": 5420 }, { "epoch": 0.05131530371730673, "grad_norm": 2.6845717430114746, "learning_rate": 1.997619619313574e-06, "loss": 0.8047, "step": 5421 }, { "epoch": 0.05132476973902178, "grad_norm": 605.0252075195312, "learning_rate": 1.9976175047302272e-06, "loss": 33.1016, "step": 5422 }, { "epoch": 0.051334235760736834, "grad_norm": 2.4544153213500977, "learning_rate": 1.997615389209185e-06, "loss": 0.854, "step": 5423 }, { "epoch": 0.05134370178245189, "grad_norm": 462.503173828125, "learning_rate": 1.997613272750449e-06, "loss": 54.125, "step": 5424 }, { "epoch": 0.05135316780416694, "grad_norm": 750.3469848632812, "learning_rate": 1.9976111553540217e-06, "loss": 57.6719, "step": 5425 }, { "epoch": 0.051362633825882, "grad_norm": 237.41331481933594, "learning_rate": 1.9976090370199046e-06, "loss": 26.6641, "step": 5426 }, { "epoch": 0.05137209984759705, "grad_norm": 245.6708984375, "learning_rate": 1.9976069177481005e-06, "loss": 25.2188, "step": 5427 }, { "epoch": 0.051381565869312106, "grad_norm": 214.9398651123047, "learning_rate": 1.9976047975386097e-06, "loss": 24.1562, "step": 5428 }, { "epoch": 0.05139103189102716, "grad_norm": 269.2427673339844, "learning_rate": 1.997602676391436e-06, "loss": 30.0625, "step": 5429 }, { "epoch": 0.05140049791274221, "grad_norm": 675.1229858398438, "learning_rate": 1.997600554306581e-06, "loss": 27.6875, "step": 5430 }, { "epoch": 0.051409963934457265, "grad_norm": 218.0255889892578, "learning_rate": 1.997598431284046e-06, "loss": 24.1719, "step": 5431 }, { "epoch": 0.05141942995617232, "grad_norm": 601.4052734375, "learning_rate": 1.997596307323833e-06, "loss": 22.6172, "step": 5432 }, { "epoch": 0.05142889597788737, "grad_norm": 395.4801025390625, "learning_rate": 1.9975941824259445e-06, "loss": 48.9688, "step": 5433 }, { "epoch": 0.05143836199960243, "grad_norm": 278.6930847167969, "learning_rate": 1.9975920565903825e-06, "loss": 22.7188, "step": 5434 }, { "epoch": 0.051447828021317484, "grad_norm": 427.4809265136719, "learning_rate": 1.997589929817149e-06, "loss": 21.5703, "step": 5435 }, { "epoch": 0.051457294043032537, "grad_norm": 227.16278076171875, "learning_rate": 1.9975878021062456e-06, "loss": 23.5859, "step": 5436 }, { "epoch": 0.05146676006474759, "grad_norm": 664.8355102539062, "learning_rate": 1.997585673457675e-06, "loss": 37.4141, "step": 5437 }, { "epoch": 0.05147622608646264, "grad_norm": 562.3089599609375, "learning_rate": 1.9975835438714384e-06, "loss": 42.9141, "step": 5438 }, { "epoch": 0.051485692108177696, "grad_norm": 302.0076904296875, "learning_rate": 1.997581413347538e-06, "loss": 34.6953, "step": 5439 }, { "epoch": 0.05149515812989275, "grad_norm": 435.7312927246094, "learning_rate": 1.9975792818859764e-06, "loss": 24.0625, "step": 5440 }, { "epoch": 0.0515046241516078, "grad_norm": 499.3035888671875, "learning_rate": 1.997577149486755e-06, "loss": 15.5703, "step": 5441 }, { "epoch": 0.051514090173322855, "grad_norm": 228.8357696533203, "learning_rate": 1.9975750161498763e-06, "loss": 29.5469, "step": 5442 }, { "epoch": 0.051523556195037915, "grad_norm": 345.34661865234375, "learning_rate": 1.9975728818753416e-06, "loss": 26.125, "step": 5443 }, { "epoch": 0.05153302221675297, "grad_norm": 464.826171875, "learning_rate": 1.9975707466631532e-06, "loss": 44.9688, "step": 5444 }, { "epoch": 0.05154248823846802, "grad_norm": 652.1891479492188, "learning_rate": 1.9975686105133134e-06, "loss": 66.2812, "step": 5445 }, { "epoch": 0.051551954260183074, "grad_norm": 214.3760223388672, "learning_rate": 1.9975664734258238e-06, "loss": 23.25, "step": 5446 }, { "epoch": 0.05156142028189813, "grad_norm": 706.2904663085938, "learning_rate": 1.997564335400687e-06, "loss": 44.375, "step": 5447 }, { "epoch": 0.05157088630361318, "grad_norm": 677.34765625, "learning_rate": 1.997562196437904e-06, "loss": 44.2812, "step": 5448 }, { "epoch": 0.05158035232532823, "grad_norm": 1045.811767578125, "learning_rate": 1.997560056537478e-06, "loss": 26.6641, "step": 5449 }, { "epoch": 0.051589818347043286, "grad_norm": 280.4783020019531, "learning_rate": 1.9975579156994104e-06, "loss": 24.4062, "step": 5450 }, { "epoch": 0.05159928436875834, "grad_norm": 1245.4703369140625, "learning_rate": 1.997555773923703e-06, "loss": 89.5312, "step": 5451 }, { "epoch": 0.0516087503904734, "grad_norm": 768.4357299804688, "learning_rate": 1.997553631210358e-06, "loss": 64.875, "step": 5452 }, { "epoch": 0.05161821641218845, "grad_norm": 651.9760131835938, "learning_rate": 1.9975514875593776e-06, "loss": 51.4844, "step": 5453 }, { "epoch": 0.051627682433903505, "grad_norm": 310.3631896972656, "learning_rate": 1.997549342970764e-06, "loss": 24.0703, "step": 5454 }, { "epoch": 0.05163714845561856, "grad_norm": 357.3893127441406, "learning_rate": 1.9975471974445186e-06, "loss": 24.375, "step": 5455 }, { "epoch": 0.05164661447733361, "grad_norm": 341.1490478515625, "learning_rate": 1.9975450509806436e-06, "loss": 24.4688, "step": 5456 }, { "epoch": 0.051656080499048664, "grad_norm": 382.71124267578125, "learning_rate": 1.9975429035791412e-06, "loss": 51.0625, "step": 5457 }, { "epoch": 0.05166554652076372, "grad_norm": 166.29067993164062, "learning_rate": 1.997540755240014e-06, "loss": 22.875, "step": 5458 }, { "epoch": 0.05167501254247877, "grad_norm": 457.2251281738281, "learning_rate": 1.9975386059632625e-06, "loss": 50.2812, "step": 5459 }, { "epoch": 0.05168447856419383, "grad_norm": 559.280029296875, "learning_rate": 1.9975364557488903e-06, "loss": 49.6094, "step": 5460 }, { "epoch": 0.05169394458590888, "grad_norm": 726.8652954101562, "learning_rate": 1.997534304596898e-06, "loss": 46.2344, "step": 5461 }, { "epoch": 0.051703410607623936, "grad_norm": 772.1675415039062, "learning_rate": 1.997532152507289e-06, "loss": 15.832, "step": 5462 }, { "epoch": 0.05171287662933899, "grad_norm": 546.2420043945312, "learning_rate": 1.9975299994800644e-06, "loss": 21.8203, "step": 5463 }, { "epoch": 0.05172234265105404, "grad_norm": 317.6883239746094, "learning_rate": 1.9975278455152265e-06, "loss": 25.5, "step": 5464 }, { "epoch": 0.051731808672769095, "grad_norm": 686.9398803710938, "learning_rate": 1.9975256906127777e-06, "loss": 47.7031, "step": 5465 }, { "epoch": 0.05174127469448415, "grad_norm": 504.7706298828125, "learning_rate": 1.997523534772719e-06, "loss": 25.75, "step": 5466 }, { "epoch": 0.0517507407161992, "grad_norm": 469.89007568359375, "learning_rate": 1.997521377995053e-06, "loss": 24.1797, "step": 5467 }, { "epoch": 0.051760206737914254, "grad_norm": 362.7875671386719, "learning_rate": 1.9975192202797824e-06, "loss": 32.4609, "step": 5468 }, { "epoch": 0.051769672759629314, "grad_norm": 1310.3465576171875, "learning_rate": 1.9975170616269084e-06, "loss": 39.8438, "step": 5469 }, { "epoch": 0.05177913878134437, "grad_norm": 212.76202392578125, "learning_rate": 1.9975149020364334e-06, "loss": 21.3047, "step": 5470 }, { "epoch": 0.05178860480305942, "grad_norm": 478.8789367675781, "learning_rate": 1.997512741508359e-06, "loss": 51.0781, "step": 5471 }, { "epoch": 0.05179807082477447, "grad_norm": 215.00352478027344, "learning_rate": 1.9975105800426877e-06, "loss": 25.4062, "step": 5472 }, { "epoch": 0.051807536846489526, "grad_norm": 275.618896484375, "learning_rate": 1.9975084176394217e-06, "loss": 26.6328, "step": 5473 }, { "epoch": 0.05181700286820458, "grad_norm": 646.673583984375, "learning_rate": 1.9975062542985617e-06, "loss": 16.5703, "step": 5474 }, { "epoch": 0.05182646888991963, "grad_norm": 522.8369140625, "learning_rate": 1.9975040900201113e-06, "loss": 23.8438, "step": 5475 }, { "epoch": 0.051835934911634685, "grad_norm": 734.3524169921875, "learning_rate": 1.997501924804072e-06, "loss": 46.75, "step": 5476 }, { "epoch": 0.051845400933349745, "grad_norm": 746.54833984375, "learning_rate": 1.997499758650446e-06, "loss": 37.3594, "step": 5477 }, { "epoch": 0.0518548669550648, "grad_norm": 716.3026123046875, "learning_rate": 1.9974975915592346e-06, "loss": 41.0938, "step": 5478 }, { "epoch": 0.05186433297677985, "grad_norm": 639.9254150390625, "learning_rate": 1.9974954235304408e-06, "loss": 56.2031, "step": 5479 }, { "epoch": 0.051873798998494904, "grad_norm": 762.0792846679688, "learning_rate": 1.9974932545640657e-06, "loss": 66.375, "step": 5480 }, { "epoch": 0.05188326502020996, "grad_norm": 535.5723876953125, "learning_rate": 1.9974910846601125e-06, "loss": 35.2422, "step": 5481 }, { "epoch": 0.05189273104192501, "grad_norm": 2.8963236808776855, "learning_rate": 1.997488913818582e-06, "loss": 0.8838, "step": 5482 }, { "epoch": 0.05190219706364006, "grad_norm": 516.295166015625, "learning_rate": 1.997486742039477e-06, "loss": 69.6094, "step": 5483 }, { "epoch": 0.051911663085355116, "grad_norm": 224.4370880126953, "learning_rate": 1.9974845693227994e-06, "loss": 21.1172, "step": 5484 }, { "epoch": 0.05192112910707017, "grad_norm": 274.4236755371094, "learning_rate": 1.997482395668551e-06, "loss": 25.2031, "step": 5485 }, { "epoch": 0.05193059512878523, "grad_norm": 593.9751586914062, "learning_rate": 1.997480221076734e-06, "loss": 22.8516, "step": 5486 }, { "epoch": 0.05194006115050028, "grad_norm": 350.78460693359375, "learning_rate": 1.997478045547351e-06, "loss": 44.9375, "step": 5487 }, { "epoch": 0.051949527172215335, "grad_norm": 594.5233154296875, "learning_rate": 1.9974758690804033e-06, "loss": 36.9688, "step": 5488 }, { "epoch": 0.05195899319393039, "grad_norm": 426.91009521484375, "learning_rate": 1.997473691675893e-06, "loss": 43.125, "step": 5489 }, { "epoch": 0.05196845921564544, "grad_norm": 661.0267333984375, "learning_rate": 1.9974715133338223e-06, "loss": 32.9141, "step": 5490 }, { "epoch": 0.051977925237360494, "grad_norm": 375.51519775390625, "learning_rate": 1.9974693340541935e-06, "loss": 32.9375, "step": 5491 }, { "epoch": 0.05198739125907555, "grad_norm": 3.8487746715545654, "learning_rate": 1.997467153837008e-06, "loss": 0.7783, "step": 5492 }, { "epoch": 0.0519968572807906, "grad_norm": 293.6549072265625, "learning_rate": 1.9974649726822684e-06, "loss": 43.8398, "step": 5493 }, { "epoch": 0.05200632330250565, "grad_norm": 197.20999145507812, "learning_rate": 1.997462790589977e-06, "loss": 21.6172, "step": 5494 }, { "epoch": 0.05201578932422071, "grad_norm": 474.3670654296875, "learning_rate": 1.997460607560135e-06, "loss": 33.4766, "step": 5495 }, { "epoch": 0.052025255345935766, "grad_norm": 388.3454895019531, "learning_rate": 1.997458423592745e-06, "loss": 19.7031, "step": 5496 }, { "epoch": 0.05203472136765082, "grad_norm": 311.4346923828125, "learning_rate": 1.997456238687809e-06, "loss": 26.875, "step": 5497 }, { "epoch": 0.05204418738936587, "grad_norm": 163.6356658935547, "learning_rate": 1.9974540528453297e-06, "loss": 22.875, "step": 5498 }, { "epoch": 0.052053653411080925, "grad_norm": 453.92156982421875, "learning_rate": 1.9974518660653075e-06, "loss": 19.2656, "step": 5499 }, { "epoch": 0.05206311943279598, "grad_norm": 514.9556884765625, "learning_rate": 1.9974496783477457e-06, "loss": 36.3281, "step": 5500 }, { "epoch": 0.05207258545451103, "grad_norm": 183.66847229003906, "learning_rate": 1.9974474896926467e-06, "loss": 20.8906, "step": 5501 }, { "epoch": 0.052082051476226084, "grad_norm": 261.4858093261719, "learning_rate": 1.9974453001000114e-06, "loss": 23.8516, "step": 5502 }, { "epoch": 0.052091517497941144, "grad_norm": 328.94427490234375, "learning_rate": 1.9974431095698424e-06, "loss": 25.7812, "step": 5503 }, { "epoch": 0.0521009835196562, "grad_norm": 432.4238586425781, "learning_rate": 1.9974409181021417e-06, "loss": 26.7031, "step": 5504 }, { "epoch": 0.05211044954137125, "grad_norm": 671.0927734375, "learning_rate": 1.9974387256969115e-06, "loss": 48.0234, "step": 5505 }, { "epoch": 0.0521199155630863, "grad_norm": 506.3154602050781, "learning_rate": 1.997436532354154e-06, "loss": 25.6328, "step": 5506 }, { "epoch": 0.052129381584801356, "grad_norm": 293.83465576171875, "learning_rate": 1.997434338073871e-06, "loss": 25.3125, "step": 5507 }, { "epoch": 0.05213884760651641, "grad_norm": 649.0279541015625, "learning_rate": 1.9974321428560645e-06, "loss": 44.1875, "step": 5508 }, { "epoch": 0.05214831362823146, "grad_norm": 315.30029296875, "learning_rate": 1.9974299467007365e-06, "loss": 26.6797, "step": 5509 }, { "epoch": 0.052157779649946515, "grad_norm": 378.3651123046875, "learning_rate": 1.9974277496078896e-06, "loss": 30.6719, "step": 5510 }, { "epoch": 0.05216724567166157, "grad_norm": 2.7674808502197266, "learning_rate": 1.9974255515775255e-06, "loss": 0.939, "step": 5511 }, { "epoch": 0.05217671169337663, "grad_norm": 965.9395751953125, "learning_rate": 1.997423352609646e-06, "loss": 58.6875, "step": 5512 }, { "epoch": 0.05218617771509168, "grad_norm": 206.8462371826172, "learning_rate": 1.9974211527042535e-06, "loss": 24.1094, "step": 5513 }, { "epoch": 0.052195643736806734, "grad_norm": 480.78521728515625, "learning_rate": 1.99741895186135e-06, "loss": 47.4062, "step": 5514 }, { "epoch": 0.05220510975852179, "grad_norm": 787.5599975585938, "learning_rate": 1.9974167500809375e-06, "loss": 59.6094, "step": 5515 }, { "epoch": 0.05221457578023684, "grad_norm": 494.20697021484375, "learning_rate": 1.9974145473630185e-06, "loss": 26.5938, "step": 5516 }, { "epoch": 0.05222404180195189, "grad_norm": 639.6586303710938, "learning_rate": 1.9974123437075945e-06, "loss": 36.7422, "step": 5517 }, { "epoch": 0.052233507823666946, "grad_norm": 713.6619262695312, "learning_rate": 1.9974101391146677e-06, "loss": 54.0312, "step": 5518 }, { "epoch": 0.052242973845382, "grad_norm": 765.6390380859375, "learning_rate": 1.99740793358424e-06, "loss": 20.6797, "step": 5519 }, { "epoch": 0.05225243986709706, "grad_norm": 452.2580261230469, "learning_rate": 1.9974057271163144e-06, "loss": 27.4453, "step": 5520 }, { "epoch": 0.05226190588881211, "grad_norm": 483.1461486816406, "learning_rate": 1.9974035197108918e-06, "loss": 43.4844, "step": 5521 }, { "epoch": 0.052271371910527165, "grad_norm": 286.26123046875, "learning_rate": 1.9974013113679747e-06, "loss": 26.125, "step": 5522 }, { "epoch": 0.05228083793224222, "grad_norm": 706.8688354492188, "learning_rate": 1.9973991020875655e-06, "loss": 41.7344, "step": 5523 }, { "epoch": 0.05229030395395727, "grad_norm": 260.1729736328125, "learning_rate": 1.997396891869666e-06, "loss": 23.3906, "step": 5524 }, { "epoch": 0.052299769975672324, "grad_norm": 1195.005615234375, "learning_rate": 1.9973946807142786e-06, "loss": 22.6562, "step": 5525 }, { "epoch": 0.05230923599738738, "grad_norm": 245.28445434570312, "learning_rate": 1.9973924686214045e-06, "loss": 19.5312, "step": 5526 }, { "epoch": 0.05231870201910243, "grad_norm": 239.15138244628906, "learning_rate": 1.9973902555910465e-06, "loss": 21.5469, "step": 5527 }, { "epoch": 0.05232816804081748, "grad_norm": 201.17465209960938, "learning_rate": 1.997388041623207e-06, "loss": 27.5156, "step": 5528 }, { "epoch": 0.05233763406253254, "grad_norm": 396.5793762207031, "learning_rate": 1.9973858267178873e-06, "loss": 28.9531, "step": 5529 }, { "epoch": 0.052347100084247596, "grad_norm": 1195.634765625, "learning_rate": 1.99738361087509e-06, "loss": 22.4922, "step": 5530 }, { "epoch": 0.05235656610596265, "grad_norm": 215.78939819335938, "learning_rate": 1.9973813940948166e-06, "loss": 25.6562, "step": 5531 }, { "epoch": 0.0523660321276777, "grad_norm": 326.3107604980469, "learning_rate": 1.99737917637707e-06, "loss": 24.7344, "step": 5532 }, { "epoch": 0.052375498149392755, "grad_norm": 462.03851318359375, "learning_rate": 1.9973769577218515e-06, "loss": 50.3594, "step": 5533 }, { "epoch": 0.05238496417110781, "grad_norm": 1375.3004150390625, "learning_rate": 1.997374738129164e-06, "loss": 22.2969, "step": 5534 }, { "epoch": 0.05239443019282286, "grad_norm": 619.94970703125, "learning_rate": 1.9973725175990084e-06, "loss": 31.5469, "step": 5535 }, { "epoch": 0.052403896214537914, "grad_norm": 425.4481201171875, "learning_rate": 1.997370296131388e-06, "loss": 45.9531, "step": 5536 }, { "epoch": 0.05241336223625297, "grad_norm": 593.907958984375, "learning_rate": 1.9973680737263043e-06, "loss": 55.293, "step": 5537 }, { "epoch": 0.05242282825796803, "grad_norm": 2.9831011295318604, "learning_rate": 1.9973658503837594e-06, "loss": 0.8804, "step": 5538 }, { "epoch": 0.05243229427968308, "grad_norm": 565.964599609375, "learning_rate": 1.9973636261037557e-06, "loss": 47.0625, "step": 5539 }, { "epoch": 0.05244176030139813, "grad_norm": 360.53277587890625, "learning_rate": 1.997361400886295e-06, "loss": 32.5938, "step": 5540 }, { "epoch": 0.052451226323113186, "grad_norm": 233.06103515625, "learning_rate": 1.9973591747313796e-06, "loss": 23.9141, "step": 5541 }, { "epoch": 0.05246069234482824, "grad_norm": 216.13095092773438, "learning_rate": 1.997356947639011e-06, "loss": 22.5469, "step": 5542 }, { "epoch": 0.05247015836654329, "grad_norm": 287.75848388671875, "learning_rate": 1.9973547196091923e-06, "loss": 26.375, "step": 5543 }, { "epoch": 0.052479624388258345, "grad_norm": 399.1507263183594, "learning_rate": 1.9973524906419247e-06, "loss": 35.6406, "step": 5544 }, { "epoch": 0.0524890904099734, "grad_norm": 874.6475219726562, "learning_rate": 1.9973502607372106e-06, "loss": 52.3125, "step": 5545 }, { "epoch": 0.05249855643168846, "grad_norm": 222.24952697753906, "learning_rate": 1.9973480298950525e-06, "loss": 21.375, "step": 5546 }, { "epoch": 0.05250802245340351, "grad_norm": 384.72698974609375, "learning_rate": 1.997345798115452e-06, "loss": 25.6562, "step": 5547 }, { "epoch": 0.052517488475118564, "grad_norm": 390.6571044921875, "learning_rate": 1.997343565398411e-06, "loss": 27.0312, "step": 5548 }, { "epoch": 0.05252695449683362, "grad_norm": 262.771728515625, "learning_rate": 1.997341331743932e-06, "loss": 29.4375, "step": 5549 }, { "epoch": 0.05253642051854867, "grad_norm": 392.3255920410156, "learning_rate": 1.9973390971520176e-06, "loss": 30.375, "step": 5550 }, { "epoch": 0.05254588654026372, "grad_norm": 3.223918914794922, "learning_rate": 1.997336861622669e-06, "loss": 0.8367, "step": 5551 }, { "epoch": 0.052555352561978776, "grad_norm": 399.49517822265625, "learning_rate": 1.9973346251558884e-06, "loss": 50.9531, "step": 5552 }, { "epoch": 0.05256481858369383, "grad_norm": 459.0816345214844, "learning_rate": 1.9973323877516784e-06, "loss": 59.1016, "step": 5553 }, { "epoch": 0.05257428460540888, "grad_norm": 514.27783203125, "learning_rate": 1.9973301494100405e-06, "loss": 25.2578, "step": 5554 }, { "epoch": 0.05258375062712394, "grad_norm": 3.054666757583618, "learning_rate": 1.9973279101309773e-06, "loss": 0.8315, "step": 5555 }, { "epoch": 0.052593216648838995, "grad_norm": 281.6676940917969, "learning_rate": 1.997325669914491e-06, "loss": 24.3281, "step": 5556 }, { "epoch": 0.05260268267055405, "grad_norm": 500.47015380859375, "learning_rate": 1.9973234287605833e-06, "loss": 27.6562, "step": 5557 }, { "epoch": 0.0526121486922691, "grad_norm": 534.552001953125, "learning_rate": 1.9973211866692567e-06, "loss": 26.4922, "step": 5558 }, { "epoch": 0.052621614713984154, "grad_norm": 411.9308776855469, "learning_rate": 1.9973189436405125e-06, "loss": 20.7109, "step": 5559 }, { "epoch": 0.05263108073569921, "grad_norm": 757.9192504882812, "learning_rate": 1.997316699674354e-06, "loss": 55.5938, "step": 5560 }, { "epoch": 0.05264054675741426, "grad_norm": 447.719970703125, "learning_rate": 1.9973144547707822e-06, "loss": 35.6875, "step": 5561 }, { "epoch": 0.05265001277912931, "grad_norm": 428.537841796875, "learning_rate": 1.9973122089297997e-06, "loss": 31.4844, "step": 5562 }, { "epoch": 0.05265947880084437, "grad_norm": 448.26104736328125, "learning_rate": 1.9973099621514087e-06, "loss": 36.6719, "step": 5563 }, { "epoch": 0.052668944822559426, "grad_norm": 823.0311889648438, "learning_rate": 1.9973077144356114e-06, "loss": 48.7656, "step": 5564 }, { "epoch": 0.05267841084427448, "grad_norm": 163.4945526123047, "learning_rate": 1.9973054657824096e-06, "loss": 21.6484, "step": 5565 }, { "epoch": 0.05268787686598953, "grad_norm": 902.5891723632812, "learning_rate": 1.9973032161918057e-06, "loss": 55.25, "step": 5566 }, { "epoch": 0.052697342887704585, "grad_norm": 3.163367986679077, "learning_rate": 1.9973009656638014e-06, "loss": 0.9688, "step": 5567 }, { "epoch": 0.05270680890941964, "grad_norm": 374.59722900390625, "learning_rate": 1.997298714198399e-06, "loss": 27.9844, "step": 5568 }, { "epoch": 0.05271627493113469, "grad_norm": 583.3342895507812, "learning_rate": 1.9972964617956008e-06, "loss": 40.5781, "step": 5569 }, { "epoch": 0.052725740952849744, "grad_norm": 3.309891939163208, "learning_rate": 1.997294208455409e-06, "loss": 0.874, "step": 5570 }, { "epoch": 0.0527352069745648, "grad_norm": 435.5919494628906, "learning_rate": 1.9972919541778254e-06, "loss": 40.7812, "step": 5571 }, { "epoch": 0.05274467299627986, "grad_norm": 179.5215606689453, "learning_rate": 1.9972896989628524e-06, "loss": 20.0234, "step": 5572 }, { "epoch": 0.05275413901799491, "grad_norm": 336.3832092285156, "learning_rate": 1.9972874428104914e-06, "loss": 31.6797, "step": 5573 }, { "epoch": 0.05276360503970996, "grad_norm": 271.18096923828125, "learning_rate": 1.9972851857207457e-06, "loss": 20.9297, "step": 5574 }, { "epoch": 0.052773071061425016, "grad_norm": 702.805419921875, "learning_rate": 1.997282927693616e-06, "loss": 52.7656, "step": 5575 }, { "epoch": 0.05278253708314007, "grad_norm": 247.8122100830078, "learning_rate": 1.997280668729106e-06, "loss": 19.7734, "step": 5576 }, { "epoch": 0.05279200310485512, "grad_norm": 3.1653003692626953, "learning_rate": 1.997278408827217e-06, "loss": 1.082, "step": 5577 }, { "epoch": 0.052801469126570175, "grad_norm": 254.62632751464844, "learning_rate": 1.997276147987951e-06, "loss": 25.5156, "step": 5578 }, { "epoch": 0.05281093514828523, "grad_norm": 293.65850830078125, "learning_rate": 1.9972738862113104e-06, "loss": 25.0859, "step": 5579 }, { "epoch": 0.05282040117000028, "grad_norm": 319.0703430175781, "learning_rate": 1.997271623497297e-06, "loss": 12.3516, "step": 5580 }, { "epoch": 0.05282986719171534, "grad_norm": 3.65259051322937, "learning_rate": 1.9972693598459134e-06, "loss": 1.0288, "step": 5581 }, { "epoch": 0.052839333213430394, "grad_norm": 878.1048583984375, "learning_rate": 1.9972670952571614e-06, "loss": 49.8906, "step": 5582 }, { "epoch": 0.05284879923514545, "grad_norm": 3.148484230041504, "learning_rate": 1.9972648297310434e-06, "loss": 0.9395, "step": 5583 }, { "epoch": 0.0528582652568605, "grad_norm": 345.7774658203125, "learning_rate": 1.997262563267561e-06, "loss": 27.9219, "step": 5584 }, { "epoch": 0.05286773127857555, "grad_norm": 376.03863525390625, "learning_rate": 1.9972602958667167e-06, "loss": 50.4375, "step": 5585 }, { "epoch": 0.052877197300290606, "grad_norm": 167.8245391845703, "learning_rate": 1.997258027528513e-06, "loss": 24.25, "step": 5586 }, { "epoch": 0.05288666332200566, "grad_norm": 3.790109872817993, "learning_rate": 1.997255758252951e-06, "loss": 1.0063, "step": 5587 }, { "epoch": 0.05289612934372071, "grad_norm": 591.8297119140625, "learning_rate": 1.9972534880400335e-06, "loss": 28.4844, "step": 5588 }, { "epoch": 0.05290559536543577, "grad_norm": 495.22894287109375, "learning_rate": 1.9972512168897628e-06, "loss": 24.6406, "step": 5589 }, { "epoch": 0.052915061387150825, "grad_norm": 287.3546142578125, "learning_rate": 1.997248944802141e-06, "loss": 22.2344, "step": 5590 }, { "epoch": 0.05292452740886588, "grad_norm": 410.7136535644531, "learning_rate": 1.99724667177717e-06, "loss": 39.3594, "step": 5591 }, { "epoch": 0.05293399343058093, "grad_norm": 508.54766845703125, "learning_rate": 1.9972443978148518e-06, "loss": 24.2305, "step": 5592 }, { "epoch": 0.052943459452295984, "grad_norm": 262.8753967285156, "learning_rate": 1.997242122915189e-06, "loss": 22.8984, "step": 5593 }, { "epoch": 0.05295292547401104, "grad_norm": 175.68719482421875, "learning_rate": 1.9972398470781836e-06, "loss": 23.0469, "step": 5594 }, { "epoch": 0.05296239149572609, "grad_norm": 309.2794189453125, "learning_rate": 1.997237570303837e-06, "loss": 26.2188, "step": 5595 }, { "epoch": 0.05297185751744114, "grad_norm": 331.76654052734375, "learning_rate": 1.9972352925921525e-06, "loss": 32.4609, "step": 5596 }, { "epoch": 0.052981323539156196, "grad_norm": 492.26025390625, "learning_rate": 1.9972330139431317e-06, "loss": 34.75, "step": 5597 }, { "epoch": 0.052990789560871256, "grad_norm": 674.7601928710938, "learning_rate": 1.9972307343567766e-06, "loss": 24.9375, "step": 5598 }, { "epoch": 0.05300025558258631, "grad_norm": 188.34445190429688, "learning_rate": 1.9972284538330894e-06, "loss": 19.1172, "step": 5599 }, { "epoch": 0.05300972160430136, "grad_norm": 1034.451904296875, "learning_rate": 1.9972261723720725e-06, "loss": 46.8438, "step": 5600 }, { "epoch": 0.053019187626016415, "grad_norm": 500.3156433105469, "learning_rate": 1.9972238899737278e-06, "loss": 59.9062, "step": 5601 }, { "epoch": 0.05302865364773147, "grad_norm": 219.652099609375, "learning_rate": 1.9972216066380576e-06, "loss": 24.4375, "step": 5602 }, { "epoch": 0.05303811966944652, "grad_norm": 225.13917541503906, "learning_rate": 1.9972193223650637e-06, "loss": 26.9297, "step": 5603 }, { "epoch": 0.053047585691161574, "grad_norm": 281.7875061035156, "learning_rate": 1.9972170371547487e-06, "loss": 22.3203, "step": 5604 }, { "epoch": 0.05305705171287663, "grad_norm": 409.88055419921875, "learning_rate": 1.9972147510071146e-06, "loss": 30.3125, "step": 5605 }, { "epoch": 0.05306651773459168, "grad_norm": 333.03515625, "learning_rate": 1.9972124639221636e-06, "loss": 26.4219, "step": 5606 }, { "epoch": 0.05307598375630674, "grad_norm": 646.8379516601562, "learning_rate": 1.997210175899897e-06, "loss": 42.2852, "step": 5607 }, { "epoch": 0.05308544977802179, "grad_norm": 228.169921875, "learning_rate": 1.9972078869403185e-06, "loss": 21.9609, "step": 5608 }, { "epoch": 0.053094915799736846, "grad_norm": 270.39483642578125, "learning_rate": 1.9972055970434295e-06, "loss": 16.082, "step": 5609 }, { "epoch": 0.0531043818214519, "grad_norm": 3.187453508377075, "learning_rate": 1.9972033062092316e-06, "loss": 0.8818, "step": 5610 }, { "epoch": 0.05311384784316695, "grad_norm": 229.49549865722656, "learning_rate": 1.9972010144377278e-06, "loss": 25.5703, "step": 5611 }, { "epoch": 0.053123313864882005, "grad_norm": 168.6467742919922, "learning_rate": 1.9971987217289197e-06, "loss": 21.6719, "step": 5612 }, { "epoch": 0.05313277988659706, "grad_norm": 428.873779296875, "learning_rate": 1.99719642808281e-06, "loss": 23.2656, "step": 5613 }, { "epoch": 0.05314224590831211, "grad_norm": 1571.4398193359375, "learning_rate": 1.9971941334994004e-06, "loss": 59.2109, "step": 5614 }, { "epoch": 0.05315171193002717, "grad_norm": 302.4323425292969, "learning_rate": 1.9971918379786933e-06, "loss": 29.8203, "step": 5615 }, { "epoch": 0.053161177951742224, "grad_norm": 573.5867919921875, "learning_rate": 1.9971895415206904e-06, "loss": 20.6836, "step": 5616 }, { "epoch": 0.05317064397345728, "grad_norm": 434.8702697753906, "learning_rate": 1.9971872441253943e-06, "loss": 20.5625, "step": 5617 }, { "epoch": 0.05318010999517233, "grad_norm": 205.18508911132812, "learning_rate": 1.997184945792807e-06, "loss": 23.5391, "step": 5618 }, { "epoch": 0.05318957601688738, "grad_norm": 885.43212890625, "learning_rate": 1.997182646522931e-06, "loss": 28.1875, "step": 5619 }, { "epoch": 0.053199042038602436, "grad_norm": 500.51983642578125, "learning_rate": 1.997180346315768e-06, "loss": 57.1875, "step": 5620 }, { "epoch": 0.05320850806031749, "grad_norm": 2.845707416534424, "learning_rate": 1.997178045171321e-06, "loss": 0.9316, "step": 5621 }, { "epoch": 0.05321797408203254, "grad_norm": 398.90057373046875, "learning_rate": 1.9971757430895906e-06, "loss": 29.8281, "step": 5622 }, { "epoch": 0.053227440103747595, "grad_norm": 313.4096374511719, "learning_rate": 1.99717344007058e-06, "loss": 36.1875, "step": 5623 }, { "epoch": 0.053236906125462655, "grad_norm": 245.70513916015625, "learning_rate": 1.9971711361142915e-06, "loss": 23.7188, "step": 5624 }, { "epoch": 0.05324637214717771, "grad_norm": 777.8829345703125, "learning_rate": 1.9971688312207273e-06, "loss": 45.5312, "step": 5625 }, { "epoch": 0.05325583816889276, "grad_norm": 345.7402038574219, "learning_rate": 1.9971665253898886e-06, "loss": 19.3281, "step": 5626 }, { "epoch": 0.053265304190607814, "grad_norm": 194.63369750976562, "learning_rate": 1.9971642186217786e-06, "loss": 17.7578, "step": 5627 }, { "epoch": 0.05327477021232287, "grad_norm": 628.17431640625, "learning_rate": 1.997161910916399e-06, "loss": 40.9805, "step": 5628 }, { "epoch": 0.05328423623403792, "grad_norm": 551.1390991210938, "learning_rate": 1.997159602273752e-06, "loss": 29.3516, "step": 5629 }, { "epoch": 0.05329370225575297, "grad_norm": 328.1981201171875, "learning_rate": 1.99715729269384e-06, "loss": 22.2422, "step": 5630 }, { "epoch": 0.053303168277468026, "grad_norm": 485.4477233886719, "learning_rate": 1.997154982176665e-06, "loss": 36.4766, "step": 5631 }, { "epoch": 0.053312634299183086, "grad_norm": 312.98065185546875, "learning_rate": 1.997152670722229e-06, "loss": 24.2578, "step": 5632 }, { "epoch": 0.05332210032089814, "grad_norm": 913.0411987304688, "learning_rate": 1.9971503583305345e-06, "loss": 74.0078, "step": 5633 }, { "epoch": 0.05333156634261319, "grad_norm": 639.6564331054688, "learning_rate": 1.9971480450015835e-06, "loss": 24.7188, "step": 5634 }, { "epoch": 0.053341032364328245, "grad_norm": 3.2123684883117676, "learning_rate": 1.9971457307353783e-06, "loss": 0.918, "step": 5635 }, { "epoch": 0.0533504983860433, "grad_norm": 267.2029113769531, "learning_rate": 1.997143415531921e-06, "loss": 22.9297, "step": 5636 }, { "epoch": 0.05335996440775835, "grad_norm": 1565.9306640625, "learning_rate": 1.9971410993912136e-06, "loss": 50.75, "step": 5637 }, { "epoch": 0.053369430429473404, "grad_norm": 330.43438720703125, "learning_rate": 1.997138782313259e-06, "loss": 17.1953, "step": 5638 }, { "epoch": 0.05337889645118846, "grad_norm": 377.5650939941406, "learning_rate": 1.997136464298058e-06, "loss": 28.375, "step": 5639 }, { "epoch": 0.05338836247290351, "grad_norm": 370.7406311035156, "learning_rate": 1.997134145345614e-06, "loss": 25.1211, "step": 5640 }, { "epoch": 0.05339782849461857, "grad_norm": 862.9140625, "learning_rate": 1.9971318254559287e-06, "loss": 63.9531, "step": 5641 }, { "epoch": 0.05340729451633362, "grad_norm": 280.1409912109375, "learning_rate": 1.9971295046290043e-06, "loss": 28.7266, "step": 5642 }, { "epoch": 0.053416760538048676, "grad_norm": 371.5050964355469, "learning_rate": 1.997127182864843e-06, "loss": 19.4961, "step": 5643 }, { "epoch": 0.05342622655976373, "grad_norm": 201.5992889404297, "learning_rate": 1.997124860163447e-06, "loss": 24.3125, "step": 5644 }, { "epoch": 0.05343569258147878, "grad_norm": 353.76556396484375, "learning_rate": 1.9971225365248186e-06, "loss": 27.8906, "step": 5645 }, { "epoch": 0.053445158603193835, "grad_norm": 383.3140869140625, "learning_rate": 1.99712021194896e-06, "loss": 57.9844, "step": 5646 }, { "epoch": 0.05345462462490889, "grad_norm": 552.2037353515625, "learning_rate": 1.997117886435873e-06, "loss": 48.7656, "step": 5647 }, { "epoch": 0.05346409064662394, "grad_norm": 550.6431274414062, "learning_rate": 1.9971155599855606e-06, "loss": 48.5312, "step": 5648 }, { "epoch": 0.053473556668338994, "grad_norm": 2.5669212341308594, "learning_rate": 1.997113232598024e-06, "loss": 0.9653, "step": 5649 }, { "epoch": 0.053483022690054054, "grad_norm": 159.8703155517578, "learning_rate": 1.9971109042732656e-06, "loss": 22.2812, "step": 5650 }, { "epoch": 0.05349248871176911, "grad_norm": 303.66729736328125, "learning_rate": 1.997108575011288e-06, "loss": 21.1328, "step": 5651 }, { "epoch": 0.05350195473348416, "grad_norm": 576.9896850585938, "learning_rate": 1.9971062448120934e-06, "loss": 54.8984, "step": 5652 }, { "epoch": 0.05351142075519921, "grad_norm": 353.2442321777344, "learning_rate": 1.9971039136756837e-06, "loss": 23.5703, "step": 5653 }, { "epoch": 0.053520886776914266, "grad_norm": 175.97190856933594, "learning_rate": 1.9971015816020613e-06, "loss": 23.6875, "step": 5654 }, { "epoch": 0.05353035279862932, "grad_norm": 188.78054809570312, "learning_rate": 1.997099248591228e-06, "loss": 24.9375, "step": 5655 }, { "epoch": 0.05353981882034437, "grad_norm": 340.1147155761719, "learning_rate": 1.9970969146431864e-06, "loss": 28.7969, "step": 5656 }, { "epoch": 0.053549284842059425, "grad_norm": 388.2791748046875, "learning_rate": 1.9970945797579385e-06, "loss": 25.2344, "step": 5657 }, { "epoch": 0.053558750863774485, "grad_norm": 495.3157653808594, "learning_rate": 1.9970922439354867e-06, "loss": 40.9219, "step": 5658 }, { "epoch": 0.05356821688548954, "grad_norm": 394.5104064941406, "learning_rate": 1.9970899071758333e-06, "loss": 42.2812, "step": 5659 }, { "epoch": 0.05357768290720459, "grad_norm": 749.3380126953125, "learning_rate": 1.9970875694789798e-06, "loss": 25.5859, "step": 5660 }, { "epoch": 0.053587148928919644, "grad_norm": 388.4966125488281, "learning_rate": 1.9970852308449292e-06, "loss": 28.2422, "step": 5661 }, { "epoch": 0.0535966149506347, "grad_norm": 292.68475341796875, "learning_rate": 1.997082891273683e-06, "loss": 27.2656, "step": 5662 }, { "epoch": 0.05360608097234975, "grad_norm": 182.567138671875, "learning_rate": 1.997080550765244e-06, "loss": 21.375, "step": 5663 }, { "epoch": 0.0536155469940648, "grad_norm": 330.9601135253906, "learning_rate": 1.997078209319614e-06, "loss": 28.6406, "step": 5664 }, { "epoch": 0.053625013015779856, "grad_norm": 1000.6528930664062, "learning_rate": 1.9970758669367954e-06, "loss": 44.0938, "step": 5665 }, { "epoch": 0.05363447903749491, "grad_norm": 253.8422393798828, "learning_rate": 1.99707352361679e-06, "loss": 20.7227, "step": 5666 }, { "epoch": 0.05364394505920997, "grad_norm": 597.3182983398438, "learning_rate": 1.9970711793596007e-06, "loss": 37.4219, "step": 5667 }, { "epoch": 0.05365341108092502, "grad_norm": 234.28591918945312, "learning_rate": 1.9970688341652296e-06, "loss": 18.0234, "step": 5668 }, { "epoch": 0.053662877102640075, "grad_norm": 736.8815307617188, "learning_rate": 1.9970664880336783e-06, "loss": 48.9531, "step": 5669 }, { "epoch": 0.05367234312435513, "grad_norm": 724.9033813476562, "learning_rate": 1.9970641409649495e-06, "loss": 45.3281, "step": 5670 }, { "epoch": 0.05368180914607018, "grad_norm": 828.4583740234375, "learning_rate": 1.9970617929590452e-06, "loss": 51.8906, "step": 5671 }, { "epoch": 0.053691275167785234, "grad_norm": 584.1402587890625, "learning_rate": 1.997059444015968e-06, "loss": 66.6875, "step": 5672 }, { "epoch": 0.05370074118950029, "grad_norm": 367.2530517578125, "learning_rate": 1.9970570941357196e-06, "loss": 17.9531, "step": 5673 }, { "epoch": 0.05371020721121534, "grad_norm": 418.38555908203125, "learning_rate": 1.997054743318302e-06, "loss": 28.4375, "step": 5674 }, { "epoch": 0.0537196732329304, "grad_norm": 352.43731689453125, "learning_rate": 1.9970523915637183e-06, "loss": 42.0312, "step": 5675 }, { "epoch": 0.05372913925464545, "grad_norm": 719.1171264648438, "learning_rate": 1.9970500388719697e-06, "loss": 49.3125, "step": 5676 }, { "epoch": 0.053738605276360506, "grad_norm": 964.0706176757812, "learning_rate": 1.9970476852430593e-06, "loss": 28.6016, "step": 5677 }, { "epoch": 0.05374807129807556, "grad_norm": 500.1060791015625, "learning_rate": 1.9970453306769894e-06, "loss": 29.1562, "step": 5678 }, { "epoch": 0.05375753731979061, "grad_norm": 625.6259765625, "learning_rate": 1.9970429751737606e-06, "loss": 40.7266, "step": 5679 }, { "epoch": 0.053767003341505665, "grad_norm": 265.9289245605469, "learning_rate": 1.997040618733377e-06, "loss": 35.3125, "step": 5680 }, { "epoch": 0.05377646936322072, "grad_norm": 405.9252014160156, "learning_rate": 1.99703826135584e-06, "loss": 12.8594, "step": 5681 }, { "epoch": 0.05378593538493577, "grad_norm": 323.647705078125, "learning_rate": 1.9970359030411523e-06, "loss": 23.2891, "step": 5682 }, { "epoch": 0.053795401406650824, "grad_norm": 305.0298767089844, "learning_rate": 1.997033543789315e-06, "loss": 25.6406, "step": 5683 }, { "epoch": 0.053804867428365884, "grad_norm": 310.2297668457031, "learning_rate": 1.9970311836003315e-06, "loss": 41.7188, "step": 5684 }, { "epoch": 0.05381433345008094, "grad_norm": 398.707275390625, "learning_rate": 1.997028822474203e-06, "loss": 45.6875, "step": 5685 }, { "epoch": 0.05382379947179599, "grad_norm": 505.1233825683594, "learning_rate": 1.997026460410933e-06, "loss": 48.6406, "step": 5686 }, { "epoch": 0.05383326549351104, "grad_norm": 192.23684692382812, "learning_rate": 1.9970240974105223e-06, "loss": 21.5078, "step": 5687 }, { "epoch": 0.053842731515226096, "grad_norm": 295.61651611328125, "learning_rate": 1.9970217334729744e-06, "loss": 23.7578, "step": 5688 }, { "epoch": 0.05385219753694115, "grad_norm": 353.2547607421875, "learning_rate": 1.9970193685982908e-06, "loss": 32.2266, "step": 5689 }, { "epoch": 0.0538616635586562, "grad_norm": 422.10052490234375, "learning_rate": 1.9970170027864736e-06, "loss": 21.4297, "step": 5690 }, { "epoch": 0.053871129580371255, "grad_norm": 949.9984741210938, "learning_rate": 1.997014636037525e-06, "loss": 46.5938, "step": 5691 }, { "epoch": 0.05388059560208631, "grad_norm": 285.9170227050781, "learning_rate": 1.9970122683514485e-06, "loss": 20.9062, "step": 5692 }, { "epoch": 0.05389006162380137, "grad_norm": 241.88262939453125, "learning_rate": 1.9970098997282443e-06, "loss": 24.9453, "step": 5693 }, { "epoch": 0.05389952764551642, "grad_norm": 189.1852569580078, "learning_rate": 1.997007530167916e-06, "loss": 23.0156, "step": 5694 }, { "epoch": 0.053908993667231474, "grad_norm": 261.3092956542969, "learning_rate": 1.997005159670466e-06, "loss": 25.4766, "step": 5695 }, { "epoch": 0.05391845968894653, "grad_norm": 152.83155822753906, "learning_rate": 1.9970027882358953e-06, "loss": 17.8359, "step": 5696 }, { "epoch": 0.05392792571066158, "grad_norm": 5.164846897125244, "learning_rate": 1.9970004158642074e-06, "loss": 0.9551, "step": 5697 }, { "epoch": 0.05393739173237663, "grad_norm": 485.5975341796875, "learning_rate": 1.9969980425554037e-06, "loss": 23.2109, "step": 5698 }, { "epoch": 0.053946857754091686, "grad_norm": 405.067626953125, "learning_rate": 1.9969956683094864e-06, "loss": 27.4219, "step": 5699 }, { "epoch": 0.05395632377580674, "grad_norm": 689.7136840820312, "learning_rate": 1.9969932931264584e-06, "loss": 51.375, "step": 5700 }, { "epoch": 0.0539657897975218, "grad_norm": 396.62286376953125, "learning_rate": 1.9969909170063215e-06, "loss": 32.1562, "step": 5701 }, { "epoch": 0.05397525581923685, "grad_norm": 694.4052734375, "learning_rate": 1.9969885399490777e-06, "loss": 47.3594, "step": 5702 }, { "epoch": 0.053984721840951905, "grad_norm": 171.4988555908203, "learning_rate": 1.99698616195473e-06, "loss": 25.4844, "step": 5703 }, { "epoch": 0.05399418786266696, "grad_norm": 421.12994384765625, "learning_rate": 1.99698378302328e-06, "loss": 32.4141, "step": 5704 }, { "epoch": 0.05400365388438201, "grad_norm": 232.99755859375, "learning_rate": 1.99698140315473e-06, "loss": 23.8281, "step": 5705 }, { "epoch": 0.054013119906097064, "grad_norm": 239.55625915527344, "learning_rate": 1.9969790223490826e-06, "loss": 24.8438, "step": 5706 }, { "epoch": 0.05402258592781212, "grad_norm": 218.33018493652344, "learning_rate": 1.9969766406063397e-06, "loss": 24.375, "step": 5707 }, { "epoch": 0.05403205194952717, "grad_norm": 273.0238037109375, "learning_rate": 1.9969742579265035e-06, "loss": 22.9141, "step": 5708 }, { "epoch": 0.05404151797124222, "grad_norm": 334.6183166503906, "learning_rate": 1.996971874309576e-06, "loss": 34.0312, "step": 5709 }, { "epoch": 0.05405098399295728, "grad_norm": 217.93284606933594, "learning_rate": 1.99696948975556e-06, "loss": 25.8594, "step": 5710 }, { "epoch": 0.054060450014672336, "grad_norm": 593.750732421875, "learning_rate": 1.996967104264458e-06, "loss": 15.1914, "step": 5711 }, { "epoch": 0.05406991603638739, "grad_norm": 997.5277099609375, "learning_rate": 1.9969647178362713e-06, "loss": 84.4688, "step": 5712 }, { "epoch": 0.05407938205810244, "grad_norm": 661.0353393554688, "learning_rate": 1.996962330471003e-06, "loss": 50.25, "step": 5713 }, { "epoch": 0.054088848079817495, "grad_norm": 854.0291137695312, "learning_rate": 1.9969599421686547e-06, "loss": 39.8281, "step": 5714 }, { "epoch": 0.05409831410153255, "grad_norm": 424.1227722167969, "learning_rate": 1.996957552929229e-06, "loss": 23.7891, "step": 5715 }, { "epoch": 0.0541077801232476, "grad_norm": 779.1250610351562, "learning_rate": 1.996955162752728e-06, "loss": 28.5469, "step": 5716 }, { "epoch": 0.054117246144962654, "grad_norm": 159.21420288085938, "learning_rate": 1.996952771639154e-06, "loss": 21.7812, "step": 5717 }, { "epoch": 0.054126712166677714, "grad_norm": 205.27548217773438, "learning_rate": 1.9969503795885094e-06, "loss": 23.5078, "step": 5718 }, { "epoch": 0.05413617818839277, "grad_norm": 174.98684692382812, "learning_rate": 1.996947986600796e-06, "loss": 15.2812, "step": 5719 }, { "epoch": 0.05414564421010782, "grad_norm": 418.80621337890625, "learning_rate": 1.9969455926760167e-06, "loss": 35.1875, "step": 5720 }, { "epoch": 0.05415511023182287, "grad_norm": 417.1382141113281, "learning_rate": 1.996943197814173e-06, "loss": 17.5312, "step": 5721 }, { "epoch": 0.054164576253537926, "grad_norm": 464.9803466796875, "learning_rate": 1.9969408020152676e-06, "loss": 55.6562, "step": 5722 }, { "epoch": 0.05417404227525298, "grad_norm": 2.904372453689575, "learning_rate": 1.996938405279303e-06, "loss": 0.8608, "step": 5723 }, { "epoch": 0.05418350829696803, "grad_norm": 273.75244140625, "learning_rate": 1.996936007606281e-06, "loss": 21.4453, "step": 5724 }, { "epoch": 0.054192974318683085, "grad_norm": 552.9636840820312, "learning_rate": 1.996933608996204e-06, "loss": 52.4219, "step": 5725 }, { "epoch": 0.05420244034039814, "grad_norm": 216.53729248046875, "learning_rate": 1.9969312094490743e-06, "loss": 22.25, "step": 5726 }, { "epoch": 0.0542119063621132, "grad_norm": 969.7297973632812, "learning_rate": 1.9969288089648944e-06, "loss": 68.7188, "step": 5727 }, { "epoch": 0.05422137238382825, "grad_norm": 3.1679420471191406, "learning_rate": 1.996926407543666e-06, "loss": 0.9648, "step": 5728 }, { "epoch": 0.054230838405543304, "grad_norm": 836.8701782226562, "learning_rate": 1.9969240051853914e-06, "loss": 46.625, "step": 5729 }, { "epoch": 0.05424030442725836, "grad_norm": 3.532238245010376, "learning_rate": 1.9969216018900734e-06, "loss": 0.9048, "step": 5730 }, { "epoch": 0.05424977044897341, "grad_norm": 378.0259094238281, "learning_rate": 1.996919197657714e-06, "loss": 31.4922, "step": 5731 }, { "epoch": 0.05425923647068846, "grad_norm": 557.8311767578125, "learning_rate": 1.996916792488315e-06, "loss": 24.3906, "step": 5732 }, { "epoch": 0.054268702492403516, "grad_norm": 402.97491455078125, "learning_rate": 1.9969143863818795e-06, "loss": 26.1406, "step": 5733 }, { "epoch": 0.05427816851411857, "grad_norm": 677.6795043945312, "learning_rate": 1.9969119793384094e-06, "loss": 29.8828, "step": 5734 }, { "epoch": 0.05428763453583362, "grad_norm": 505.14886474609375, "learning_rate": 1.9969095713579066e-06, "loss": 34.5156, "step": 5735 }, { "epoch": 0.05429710055754868, "grad_norm": 659.0269165039062, "learning_rate": 1.9969071624403735e-06, "loss": 45.1094, "step": 5736 }, { "epoch": 0.054306566579263735, "grad_norm": 422.01129150390625, "learning_rate": 1.996904752585813e-06, "loss": 56.6797, "step": 5737 }, { "epoch": 0.05431603260097879, "grad_norm": 415.2609558105469, "learning_rate": 1.9969023417942266e-06, "loss": 47.625, "step": 5738 }, { "epoch": 0.05432549862269384, "grad_norm": 631.7333984375, "learning_rate": 1.996899930065617e-06, "loss": 31.1328, "step": 5739 }, { "epoch": 0.054334964644408894, "grad_norm": 346.9601135253906, "learning_rate": 1.996897517399986e-06, "loss": 24.4375, "step": 5740 }, { "epoch": 0.05434443066612395, "grad_norm": 528.3674926757812, "learning_rate": 1.9968951037973363e-06, "loss": 30.4844, "step": 5741 }, { "epoch": 0.054353896687839, "grad_norm": 657.2178955078125, "learning_rate": 1.99689268925767e-06, "loss": 43.0352, "step": 5742 }, { "epoch": 0.05436336270955405, "grad_norm": 586.3692626953125, "learning_rate": 1.9968902737809894e-06, "loss": 26.4844, "step": 5743 }, { "epoch": 0.05437282873126911, "grad_norm": 806.5737915039062, "learning_rate": 1.996887857367297e-06, "loss": 22.2969, "step": 5744 }, { "epoch": 0.054382294752984166, "grad_norm": 482.68804931640625, "learning_rate": 1.9968854400165947e-06, "loss": 51.0781, "step": 5745 }, { "epoch": 0.05439176077469922, "grad_norm": 613.7179565429688, "learning_rate": 1.9968830217288854e-06, "loss": 56.6875, "step": 5746 }, { "epoch": 0.05440122679641427, "grad_norm": 2.7517762184143066, "learning_rate": 1.9968806025041706e-06, "loss": 0.8398, "step": 5747 }, { "epoch": 0.054410692818129325, "grad_norm": 351.1712646484375, "learning_rate": 1.9968781823424526e-06, "loss": 31.1094, "step": 5748 }, { "epoch": 0.05442015883984438, "grad_norm": 597.9843139648438, "learning_rate": 1.9968757612437345e-06, "loss": 32.7969, "step": 5749 }, { "epoch": 0.05442962486155943, "grad_norm": 235.6949920654297, "learning_rate": 1.9968733392080176e-06, "loss": 24.4688, "step": 5750 }, { "epoch": 0.054439090883274484, "grad_norm": 234.38430786132812, "learning_rate": 1.9968709162353047e-06, "loss": 30.0625, "step": 5751 }, { "epoch": 0.05444855690498954, "grad_norm": 234.31752014160156, "learning_rate": 1.996868492325598e-06, "loss": 19.9141, "step": 5752 }, { "epoch": 0.0544580229267046, "grad_norm": 235.2005615234375, "learning_rate": 1.9968660674789e-06, "loss": 25.2188, "step": 5753 }, { "epoch": 0.05446748894841965, "grad_norm": 401.55242919921875, "learning_rate": 1.9968636416952125e-06, "loss": 47.2344, "step": 5754 }, { "epoch": 0.0544769549701347, "grad_norm": 837.7681274414062, "learning_rate": 1.996861214974538e-06, "loss": 43.4062, "step": 5755 }, { "epoch": 0.054486420991849756, "grad_norm": 204.93215942382812, "learning_rate": 1.9968587873168793e-06, "loss": 19.4609, "step": 5756 }, { "epoch": 0.05449588701356481, "grad_norm": 329.6005554199219, "learning_rate": 1.9968563587222376e-06, "loss": 22.125, "step": 5757 }, { "epoch": 0.05450535303527986, "grad_norm": 416.6439514160156, "learning_rate": 1.996853929190616e-06, "loss": 37.2969, "step": 5758 }, { "epoch": 0.054514819056994915, "grad_norm": 245.97755432128906, "learning_rate": 1.9968514987220163e-06, "loss": 21.8594, "step": 5759 }, { "epoch": 0.05452428507870997, "grad_norm": 420.1456298828125, "learning_rate": 1.9968490673164412e-06, "loss": 30.3984, "step": 5760 }, { "epoch": 0.05453375110042503, "grad_norm": 426.9433288574219, "learning_rate": 1.996846634973893e-06, "loss": 53.2656, "step": 5761 }, { "epoch": 0.05454321712214008, "grad_norm": 832.617431640625, "learning_rate": 1.9968442016943737e-06, "loss": 59.6875, "step": 5762 }, { "epoch": 0.054552683143855134, "grad_norm": 412.642822265625, "learning_rate": 1.9968417674778855e-06, "loss": 58.3125, "step": 5763 }, { "epoch": 0.05456214916557019, "grad_norm": 199.42921447753906, "learning_rate": 1.9968393323244315e-06, "loss": 22.1719, "step": 5764 }, { "epoch": 0.05457161518728524, "grad_norm": 3.442962884902954, "learning_rate": 1.996836896234013e-06, "loss": 0.9805, "step": 5765 }, { "epoch": 0.05458108120900029, "grad_norm": 219.7270965576172, "learning_rate": 1.9968344592066323e-06, "loss": 24.4844, "step": 5766 }, { "epoch": 0.054590547230715346, "grad_norm": 390.6729736328125, "learning_rate": 1.9968320212422926e-06, "loss": 50.2812, "step": 5767 }, { "epoch": 0.0546000132524304, "grad_norm": 411.1804504394531, "learning_rate": 1.996829582340995e-06, "loss": 60.8281, "step": 5768 }, { "epoch": 0.05460947927414545, "grad_norm": 260.2649230957031, "learning_rate": 1.9968271425027434e-06, "loss": 21.8203, "step": 5769 }, { "epoch": 0.05461894529586051, "grad_norm": 432.86309814453125, "learning_rate": 1.9968247017275384e-06, "loss": 47.9688, "step": 5770 }, { "epoch": 0.054628411317575565, "grad_norm": 438.3985290527344, "learning_rate": 1.9968222600153833e-06, "loss": 43.2734, "step": 5771 }, { "epoch": 0.05463787733929062, "grad_norm": 794.7569580078125, "learning_rate": 1.9968198173662802e-06, "loss": 80.0, "step": 5772 }, { "epoch": 0.05464734336100567, "grad_norm": 486.7784118652344, "learning_rate": 1.996817373780231e-06, "loss": 17.9297, "step": 5773 }, { "epoch": 0.054656809382720724, "grad_norm": 343.8138732910156, "learning_rate": 1.9968149292572383e-06, "loss": 32.9375, "step": 5774 }, { "epoch": 0.05466627540443578, "grad_norm": 3.06115460395813, "learning_rate": 1.9968124837973047e-06, "loss": 0.853, "step": 5775 }, { "epoch": 0.05467574142615083, "grad_norm": 607.7181396484375, "learning_rate": 1.9968100374004323e-06, "loss": 51.7344, "step": 5776 }, { "epoch": 0.05468520744786588, "grad_norm": 509.88555908203125, "learning_rate": 1.996807590066623e-06, "loss": 21.75, "step": 5777 }, { "epoch": 0.054694673469580936, "grad_norm": 925.7606201171875, "learning_rate": 1.9968051417958794e-06, "loss": 56.5391, "step": 5778 }, { "epoch": 0.054704139491295996, "grad_norm": 491.1330871582031, "learning_rate": 1.9968026925882044e-06, "loss": 55.375, "step": 5779 }, { "epoch": 0.05471360551301105, "grad_norm": 1092.1614990234375, "learning_rate": 1.9968002424435987e-06, "loss": 36.2344, "step": 5780 }, { "epoch": 0.0547230715347261, "grad_norm": 521.0477905273438, "learning_rate": 1.9967977913620666e-06, "loss": 50.0156, "step": 5781 }, { "epoch": 0.054732537556441155, "grad_norm": 251.18748474121094, "learning_rate": 1.996795339343609e-06, "loss": 20.2969, "step": 5782 }, { "epoch": 0.05474200357815621, "grad_norm": 267.9259338378906, "learning_rate": 1.9967928863882286e-06, "loss": 29.8125, "step": 5783 }, { "epoch": 0.05475146959987126, "grad_norm": 611.448486328125, "learning_rate": 1.9967904324959277e-06, "loss": 51.0, "step": 5784 }, { "epoch": 0.054760935621586314, "grad_norm": 172.14967346191406, "learning_rate": 1.9967879776667085e-06, "loss": 23.9844, "step": 5785 }, { "epoch": 0.05477040164330137, "grad_norm": 397.0157165527344, "learning_rate": 1.996785521900574e-06, "loss": 28.5, "step": 5786 }, { "epoch": 0.05477986766501643, "grad_norm": 280.12872314453125, "learning_rate": 1.9967830651975255e-06, "loss": 37.9062, "step": 5787 }, { "epoch": 0.05478933368673148, "grad_norm": 401.8898620605469, "learning_rate": 1.9967806075575655e-06, "loss": 24.2656, "step": 5788 }, { "epoch": 0.05479879970844653, "grad_norm": 815.7952270507812, "learning_rate": 1.9967781489806973e-06, "loss": 32.4766, "step": 5789 }, { "epoch": 0.054808265730161586, "grad_norm": 394.7861633300781, "learning_rate": 1.996775689466922e-06, "loss": 25.8047, "step": 5790 }, { "epoch": 0.05481773175187664, "grad_norm": 709.1638793945312, "learning_rate": 1.9967732290162426e-06, "loss": 43.9531, "step": 5791 }, { "epoch": 0.05482719777359169, "grad_norm": 1125.298828125, "learning_rate": 1.996770767628661e-06, "loss": 41.7188, "step": 5792 }, { "epoch": 0.054836663795306745, "grad_norm": 222.33689880371094, "learning_rate": 1.99676830530418e-06, "loss": 22.2266, "step": 5793 }, { "epoch": 0.0548461298170218, "grad_norm": 446.6675109863281, "learning_rate": 1.9967658420428015e-06, "loss": 31.1562, "step": 5794 }, { "epoch": 0.05485559583873685, "grad_norm": 594.694580078125, "learning_rate": 1.996763377844528e-06, "loss": 50.9219, "step": 5795 }, { "epoch": 0.05486506186045191, "grad_norm": 430.0958251953125, "learning_rate": 1.9967609127093615e-06, "loss": 28.0547, "step": 5796 }, { "epoch": 0.054874527882166964, "grad_norm": 678.9195556640625, "learning_rate": 1.996758446637305e-06, "loss": 48.125, "step": 5797 }, { "epoch": 0.05488399390388202, "grad_norm": 250.9822540283203, "learning_rate": 1.99675597962836e-06, "loss": 27.6562, "step": 5798 }, { "epoch": 0.05489345992559707, "grad_norm": 3.293403387069702, "learning_rate": 1.9967535116825295e-06, "loss": 1.0171, "step": 5799 }, { "epoch": 0.05490292594731212, "grad_norm": 217.90586853027344, "learning_rate": 1.9967510427998157e-06, "loss": 28.4375, "step": 5800 }, { "epoch": 0.054912391969027176, "grad_norm": 180.6394500732422, "learning_rate": 1.9967485729802203e-06, "loss": 22.0234, "step": 5801 }, { "epoch": 0.05492185799074223, "grad_norm": 510.5787048339844, "learning_rate": 1.9967461022237463e-06, "loss": 26.9609, "step": 5802 }, { "epoch": 0.05493132401245728, "grad_norm": 3.4474947452545166, "learning_rate": 1.996743630530396e-06, "loss": 0.8535, "step": 5803 }, { "epoch": 0.05494079003417234, "grad_norm": 294.74273681640625, "learning_rate": 1.996741157900171e-06, "loss": 29.0781, "step": 5804 }, { "epoch": 0.054950256055887395, "grad_norm": 380.8851318359375, "learning_rate": 1.9967386843330745e-06, "loss": 29.2812, "step": 5805 }, { "epoch": 0.05495972207760245, "grad_norm": 3.737377882003784, "learning_rate": 1.996736209829109e-06, "loss": 0.8579, "step": 5806 }, { "epoch": 0.0549691880993175, "grad_norm": 704.2554931640625, "learning_rate": 1.9967337343882757e-06, "loss": 31.3359, "step": 5807 }, { "epoch": 0.054978654121032554, "grad_norm": 692.1821899414062, "learning_rate": 1.9967312580105777e-06, "loss": 42.2344, "step": 5808 }, { "epoch": 0.05498812014274761, "grad_norm": 1068.155517578125, "learning_rate": 1.996728780696017e-06, "loss": 53.3594, "step": 5809 }, { "epoch": 0.05499758616446266, "grad_norm": 345.7562561035156, "learning_rate": 1.9967263024445964e-06, "loss": 19.3984, "step": 5810 }, { "epoch": 0.05500705218617771, "grad_norm": 946.2713623046875, "learning_rate": 1.9967238232563177e-06, "loss": 57.2422, "step": 5811 }, { "epoch": 0.055016518207892766, "grad_norm": 200.9973602294922, "learning_rate": 1.9967213431311833e-06, "loss": 18.4609, "step": 5812 }, { "epoch": 0.055025984229607826, "grad_norm": 894.8177490234375, "learning_rate": 1.9967188620691953e-06, "loss": 41.2812, "step": 5813 }, { "epoch": 0.05503545025132288, "grad_norm": 267.4609069824219, "learning_rate": 1.996716380070357e-06, "loss": 18.2578, "step": 5814 }, { "epoch": 0.05504491627303793, "grad_norm": 134.5675048828125, "learning_rate": 1.9967138971346704e-06, "loss": 21.1719, "step": 5815 }, { "epoch": 0.055054382294752985, "grad_norm": 716.7022094726562, "learning_rate": 1.9967114132621373e-06, "loss": 32.3359, "step": 5816 }, { "epoch": 0.05506384831646804, "grad_norm": 300.36126708984375, "learning_rate": 1.99670892845276e-06, "loss": 19.3203, "step": 5817 }, { "epoch": 0.05507331433818309, "grad_norm": 219.91807556152344, "learning_rate": 1.9967064427065417e-06, "loss": 25.2344, "step": 5818 }, { "epoch": 0.055082780359898144, "grad_norm": 252.606201171875, "learning_rate": 1.996703956023484e-06, "loss": 18.0078, "step": 5819 }, { "epoch": 0.0550922463816132, "grad_norm": 810.482666015625, "learning_rate": 1.996701468403589e-06, "loss": 55.1875, "step": 5820 }, { "epoch": 0.05510171240332825, "grad_norm": 404.83282470703125, "learning_rate": 1.99669897984686e-06, "loss": 60.0156, "step": 5821 }, { "epoch": 0.05511117842504331, "grad_norm": 834.7433471679688, "learning_rate": 1.9966964903532983e-06, "loss": 43.7969, "step": 5822 }, { "epoch": 0.05512064444675836, "grad_norm": 266.20184326171875, "learning_rate": 1.996693999922907e-06, "loss": 18.0703, "step": 5823 }, { "epoch": 0.055130110468473416, "grad_norm": 215.77999877929688, "learning_rate": 1.9966915085556883e-06, "loss": 26.6719, "step": 5824 }, { "epoch": 0.05513957649018847, "grad_norm": 760.3345336914062, "learning_rate": 1.9966890162516444e-06, "loss": 67.5625, "step": 5825 }, { "epoch": 0.05514904251190352, "grad_norm": 220.17596435546875, "learning_rate": 1.9966865230107774e-06, "loss": 32.5469, "step": 5826 }, { "epoch": 0.055158508533618575, "grad_norm": 490.73992919921875, "learning_rate": 1.9966840288330903e-06, "loss": 26.8672, "step": 5827 }, { "epoch": 0.05516797455533363, "grad_norm": 900.350830078125, "learning_rate": 1.9966815337185843e-06, "loss": 24.3711, "step": 5828 }, { "epoch": 0.05517744057704868, "grad_norm": 1273.1627197265625, "learning_rate": 1.9966790376672633e-06, "loss": 47.2188, "step": 5829 }, { "epoch": 0.05518690659876374, "grad_norm": 288.6801452636719, "learning_rate": 1.9966765406791285e-06, "loss": 21.5156, "step": 5830 }, { "epoch": 0.055196372620478794, "grad_norm": 246.97854614257812, "learning_rate": 1.996674042754183e-06, "loss": 18.7109, "step": 5831 }, { "epoch": 0.05520583864219385, "grad_norm": 377.6014709472656, "learning_rate": 1.996671543892428e-06, "loss": 19.2969, "step": 5832 }, { "epoch": 0.0552153046639089, "grad_norm": 617.341552734375, "learning_rate": 1.996669044093867e-06, "loss": 25.0859, "step": 5833 }, { "epoch": 0.05522477068562395, "grad_norm": 1084.240234375, "learning_rate": 1.9966665433585018e-06, "loss": 49.0781, "step": 5834 }, { "epoch": 0.055234236707339006, "grad_norm": 472.3929748535156, "learning_rate": 1.9966640416863353e-06, "loss": 50.4219, "step": 5835 }, { "epoch": 0.05524370272905406, "grad_norm": 284.5583801269531, "learning_rate": 1.996661539077369e-06, "loss": 27.4219, "step": 5836 }, { "epoch": 0.05525316875076911, "grad_norm": 233.41537475585938, "learning_rate": 1.996659035531606e-06, "loss": 19.4219, "step": 5837 }, { "epoch": 0.055262634772484165, "grad_norm": 604.974365234375, "learning_rate": 1.996656531049048e-06, "loss": 80.0312, "step": 5838 }, { "epoch": 0.055272100794199225, "grad_norm": 162.1331024169922, "learning_rate": 1.996654025629698e-06, "loss": 18.8359, "step": 5839 }, { "epoch": 0.05528156681591428, "grad_norm": 485.5374450683594, "learning_rate": 1.9966515192735576e-06, "loss": 37.875, "step": 5840 }, { "epoch": 0.05529103283762933, "grad_norm": 814.5239868164062, "learning_rate": 1.9966490119806305e-06, "loss": 31.5781, "step": 5841 }, { "epoch": 0.055300498859344384, "grad_norm": 309.14788818359375, "learning_rate": 1.9966465037509175e-06, "loss": 16.9023, "step": 5842 }, { "epoch": 0.05530996488105944, "grad_norm": 526.0236206054688, "learning_rate": 1.9966439945844216e-06, "loss": 50.9961, "step": 5843 }, { "epoch": 0.05531943090277449, "grad_norm": 519.3023071289062, "learning_rate": 1.9966414844811454e-06, "loss": 64.3594, "step": 5844 }, { "epoch": 0.05532889692448954, "grad_norm": 169.30276489257812, "learning_rate": 1.9966389734410906e-06, "loss": 26.9922, "step": 5845 }, { "epoch": 0.055338362946204596, "grad_norm": 464.257080078125, "learning_rate": 1.9966364614642605e-06, "loss": 20.1406, "step": 5846 }, { "epoch": 0.055347828967919656, "grad_norm": 859.1212158203125, "learning_rate": 1.996633948550657e-06, "loss": 37.0156, "step": 5847 }, { "epoch": 0.05535729498963471, "grad_norm": 606.3424682617188, "learning_rate": 1.9966314347002823e-06, "loss": 46.6719, "step": 5848 }, { "epoch": 0.05536676101134976, "grad_norm": 295.8800048828125, "learning_rate": 1.996628919913139e-06, "loss": 26.2266, "step": 5849 }, { "epoch": 0.055376227033064815, "grad_norm": 315.70037841796875, "learning_rate": 1.9966264041892286e-06, "loss": 32.25, "step": 5850 }, { "epoch": 0.05538569305477987, "grad_norm": 738.1337280273438, "learning_rate": 1.996623887528555e-06, "loss": 23.8984, "step": 5851 }, { "epoch": 0.05539515907649492, "grad_norm": 304.07891845703125, "learning_rate": 1.9966213699311196e-06, "loss": 29.1406, "step": 5852 }, { "epoch": 0.055404625098209974, "grad_norm": 550.8828735351562, "learning_rate": 1.9966188513969247e-06, "loss": 40.6172, "step": 5853 }, { "epoch": 0.05541409111992503, "grad_norm": 226.49148559570312, "learning_rate": 1.996616331925973e-06, "loss": 26.6562, "step": 5854 }, { "epoch": 0.05542355714164008, "grad_norm": 502.90802001953125, "learning_rate": 1.996613811518267e-06, "loss": 53.3125, "step": 5855 }, { "epoch": 0.05543302316335514, "grad_norm": 176.33937072753906, "learning_rate": 1.996611290173809e-06, "loss": 20.7969, "step": 5856 }, { "epoch": 0.05544248918507019, "grad_norm": 2.9278295040130615, "learning_rate": 1.996608767892601e-06, "loss": 0.9365, "step": 5857 }, { "epoch": 0.055451955206785246, "grad_norm": 224.92710876464844, "learning_rate": 1.9966062446746455e-06, "loss": 26.9766, "step": 5858 }, { "epoch": 0.0554614212285003, "grad_norm": 478.9147033691406, "learning_rate": 1.996603720519945e-06, "loss": 20.4648, "step": 5859 }, { "epoch": 0.05547088725021535, "grad_norm": 520.0282592773438, "learning_rate": 1.996601195428502e-06, "loss": 45.9062, "step": 5860 }, { "epoch": 0.055480353271930405, "grad_norm": 543.88525390625, "learning_rate": 1.996598669400318e-06, "loss": 38.1055, "step": 5861 }, { "epoch": 0.05548981929364546, "grad_norm": 313.1460266113281, "learning_rate": 1.996596142435397e-06, "loss": 34.0469, "step": 5862 }, { "epoch": 0.05549928531536051, "grad_norm": 301.33673095703125, "learning_rate": 1.99659361453374e-06, "loss": 15.0117, "step": 5863 }, { "epoch": 0.055508751337075564, "grad_norm": 388.2138977050781, "learning_rate": 1.9965910856953503e-06, "loss": 31.75, "step": 5864 }, { "epoch": 0.055518217358790624, "grad_norm": 379.0223083496094, "learning_rate": 1.9965885559202295e-06, "loss": 40.2812, "step": 5865 }, { "epoch": 0.05552768338050568, "grad_norm": 287.1973571777344, "learning_rate": 1.99658602520838e-06, "loss": 28.2188, "step": 5866 }, { "epoch": 0.05553714940222073, "grad_norm": 596.8421630859375, "learning_rate": 1.9965834935598047e-06, "loss": 37.8828, "step": 5867 }, { "epoch": 0.05554661542393578, "grad_norm": 674.7907104492188, "learning_rate": 1.996580960974506e-06, "loss": 63.4531, "step": 5868 }, { "epoch": 0.055556081445650836, "grad_norm": 334.4691467285156, "learning_rate": 1.9965784274524857e-06, "loss": 30.7031, "step": 5869 }, { "epoch": 0.05556554746736589, "grad_norm": 442.9364929199219, "learning_rate": 1.9965758929937465e-06, "loss": 37.7188, "step": 5870 }, { "epoch": 0.05557501348908094, "grad_norm": 192.75999450683594, "learning_rate": 1.996573357598291e-06, "loss": 23.2969, "step": 5871 }, { "epoch": 0.055584479510795995, "grad_norm": 442.0125732421875, "learning_rate": 1.9965708212661212e-06, "loss": 43.9297, "step": 5872 }, { "epoch": 0.055593945532511055, "grad_norm": 410.4358215332031, "learning_rate": 1.9965682839972397e-06, "loss": 27.6406, "step": 5873 }, { "epoch": 0.05560341155422611, "grad_norm": 695.2293701171875, "learning_rate": 1.996565745791649e-06, "loss": 43.4141, "step": 5874 }, { "epoch": 0.05561287757594116, "grad_norm": 238.68966674804688, "learning_rate": 1.9965632066493514e-06, "loss": 26.9375, "step": 5875 }, { "epoch": 0.055622343597656214, "grad_norm": 504.7596435546875, "learning_rate": 1.996560666570349e-06, "loss": 58.4375, "step": 5876 }, { "epoch": 0.05563180961937127, "grad_norm": 432.21221923828125, "learning_rate": 1.996558125554644e-06, "loss": 29.9219, "step": 5877 }, { "epoch": 0.05564127564108632, "grad_norm": 563.1358032226562, "learning_rate": 1.99655558360224e-06, "loss": 58.25, "step": 5878 }, { "epoch": 0.05565074166280137, "grad_norm": 464.103759765625, "learning_rate": 1.996553040713138e-06, "loss": 34.2695, "step": 5879 }, { "epoch": 0.055660207684516426, "grad_norm": 363.8839111328125, "learning_rate": 1.9965504968873416e-06, "loss": 27.9141, "step": 5880 }, { "epoch": 0.05566967370623148, "grad_norm": 710.8051147460938, "learning_rate": 1.996547952124852e-06, "loss": 50.7969, "step": 5881 }, { "epoch": 0.05567913972794654, "grad_norm": 298.399169921875, "learning_rate": 1.996545406425672e-06, "loss": 20.1016, "step": 5882 }, { "epoch": 0.05568860574966159, "grad_norm": 855.5828857421875, "learning_rate": 1.9965428597898044e-06, "loss": 39.25, "step": 5883 }, { "epoch": 0.055698071771376645, "grad_norm": 253.22251892089844, "learning_rate": 1.9965403122172516e-06, "loss": 21.0469, "step": 5884 }, { "epoch": 0.0557075377930917, "grad_norm": 249.9491424560547, "learning_rate": 1.9965377637080155e-06, "loss": 23.0, "step": 5885 }, { "epoch": 0.05571700381480675, "grad_norm": 398.1078796386719, "learning_rate": 1.996535214262099e-06, "loss": 44.6719, "step": 5886 }, { "epoch": 0.055726469836521804, "grad_norm": 298.50152587890625, "learning_rate": 1.996532663879504e-06, "loss": 33.0703, "step": 5887 }, { "epoch": 0.05573593585823686, "grad_norm": 598.7393798828125, "learning_rate": 1.996530112560233e-06, "loss": 56.3594, "step": 5888 }, { "epoch": 0.05574540187995191, "grad_norm": 623.9262084960938, "learning_rate": 1.9965275603042884e-06, "loss": 51.4375, "step": 5889 }, { "epoch": 0.05575486790166697, "grad_norm": 192.5454559326172, "learning_rate": 1.996525007111673e-06, "loss": 24.3984, "step": 5890 }, { "epoch": 0.05576433392338202, "grad_norm": 398.5856628417969, "learning_rate": 1.996522452982389e-06, "loss": 22.7734, "step": 5891 }, { "epoch": 0.055773799945097076, "grad_norm": 519.1632690429688, "learning_rate": 1.9965198979164388e-06, "loss": 20.8203, "step": 5892 }, { "epoch": 0.05578326596681213, "grad_norm": 428.55865478515625, "learning_rate": 1.996517341913824e-06, "loss": 27.0781, "step": 5893 }, { "epoch": 0.05579273198852718, "grad_norm": 447.1426086425781, "learning_rate": 1.9965147849745484e-06, "loss": 23.9062, "step": 5894 }, { "epoch": 0.055802198010242235, "grad_norm": 918.0801391601562, "learning_rate": 1.9965122270986135e-06, "loss": 64.7344, "step": 5895 }, { "epoch": 0.05581166403195729, "grad_norm": 787.7216186523438, "learning_rate": 1.996509668286022e-06, "loss": 46.25, "step": 5896 }, { "epoch": 0.05582113005367234, "grad_norm": 196.533447265625, "learning_rate": 1.9965071085367764e-06, "loss": 18.2969, "step": 5897 }, { "epoch": 0.055830596075387394, "grad_norm": 452.271484375, "learning_rate": 1.996504547850879e-06, "loss": 29.3438, "step": 5898 }, { "epoch": 0.055840062097102454, "grad_norm": 384.43310546875, "learning_rate": 1.9965019862283318e-06, "loss": 24.3125, "step": 5899 }, { "epoch": 0.05584952811881751, "grad_norm": 281.0685729980469, "learning_rate": 1.996499423669138e-06, "loss": 32.6406, "step": 5900 }, { "epoch": 0.05585899414053256, "grad_norm": 492.1231994628906, "learning_rate": 1.996496860173299e-06, "loss": 18.0781, "step": 5901 }, { "epoch": 0.05586846016224761, "grad_norm": 604.3898315429688, "learning_rate": 1.996494295740818e-06, "loss": 48.3281, "step": 5902 }, { "epoch": 0.055877926183962666, "grad_norm": 313.40020751953125, "learning_rate": 1.9964917303716973e-06, "loss": 24.0312, "step": 5903 }, { "epoch": 0.05588739220567772, "grad_norm": 239.508056640625, "learning_rate": 1.996489164065939e-06, "loss": 24.0156, "step": 5904 }, { "epoch": 0.05589685822739277, "grad_norm": 211.16099548339844, "learning_rate": 1.996486596823546e-06, "loss": 22.9531, "step": 5905 }, { "epoch": 0.055906324249107825, "grad_norm": 479.8813171386719, "learning_rate": 1.99648402864452e-06, "loss": 21.0195, "step": 5906 }, { "epoch": 0.05591579027082288, "grad_norm": 564.0578002929688, "learning_rate": 1.9964814595288645e-06, "loss": 37.0781, "step": 5907 }, { "epoch": 0.05592525629253794, "grad_norm": 173.801513671875, "learning_rate": 1.996478889476581e-06, "loss": 15.5625, "step": 5908 }, { "epoch": 0.05593472231425299, "grad_norm": 319.0535583496094, "learning_rate": 1.9964763184876717e-06, "loss": 42.5391, "step": 5909 }, { "epoch": 0.055944188335968044, "grad_norm": 209.28993225097656, "learning_rate": 1.99647374656214e-06, "loss": 25.7812, "step": 5910 }, { "epoch": 0.0559536543576831, "grad_norm": 357.8801574707031, "learning_rate": 1.9964711736999874e-06, "loss": 24.5312, "step": 5911 }, { "epoch": 0.05596312037939815, "grad_norm": 3.008938789367676, "learning_rate": 1.9964685999012168e-06, "loss": 0.9849, "step": 5912 }, { "epoch": 0.0559725864011132, "grad_norm": 311.1399841308594, "learning_rate": 1.996466025165831e-06, "loss": 30.5859, "step": 5913 }, { "epoch": 0.055982052422828256, "grad_norm": 172.6392059326172, "learning_rate": 1.9964634494938314e-06, "loss": 25.4219, "step": 5914 }, { "epoch": 0.05599151844454331, "grad_norm": 194.3601837158203, "learning_rate": 1.9964608728852212e-06, "loss": 22.0, "step": 5915 }, { "epoch": 0.05600098446625837, "grad_norm": 313.2875671386719, "learning_rate": 1.9964582953400027e-06, "loss": 36.25, "step": 5916 }, { "epoch": 0.05601045048797342, "grad_norm": 312.7739562988281, "learning_rate": 1.9964557168581784e-06, "loss": 20.4453, "step": 5917 }, { "epoch": 0.056019916509688475, "grad_norm": 3.2622361183166504, "learning_rate": 1.9964531374397503e-06, "loss": 0.9829, "step": 5918 }, { "epoch": 0.05602938253140353, "grad_norm": 343.8244934082031, "learning_rate": 1.996450557084721e-06, "loss": 32.4531, "step": 5919 }, { "epoch": 0.05603884855311858, "grad_norm": 242.9092254638672, "learning_rate": 1.996447975793093e-06, "loss": 28.8516, "step": 5920 }, { "epoch": 0.056048314574833634, "grad_norm": 321.96697998046875, "learning_rate": 1.9964453935648688e-06, "loss": 27.2578, "step": 5921 }, { "epoch": 0.05605778059654869, "grad_norm": 195.5787353515625, "learning_rate": 1.996442810400051e-06, "loss": 19.0781, "step": 5922 }, { "epoch": 0.05606724661826374, "grad_norm": 1015.7767944335938, "learning_rate": 1.9964402262986415e-06, "loss": 64.4219, "step": 5923 }, { "epoch": 0.05607671263997879, "grad_norm": 750.4161987304688, "learning_rate": 1.9964376412606428e-06, "loss": 37.8594, "step": 5924 }, { "epoch": 0.05608617866169385, "grad_norm": 1129.849853515625, "learning_rate": 1.9964350552860577e-06, "loss": 66.25, "step": 5925 }, { "epoch": 0.056095644683408906, "grad_norm": 427.5364074707031, "learning_rate": 1.996432468374889e-06, "loss": 31.2969, "step": 5926 }, { "epoch": 0.05610511070512396, "grad_norm": 406.7435607910156, "learning_rate": 1.996429880527138e-06, "loss": 26.2656, "step": 5927 }, { "epoch": 0.05611457672683901, "grad_norm": 396.15667724609375, "learning_rate": 1.9964272917428077e-06, "loss": 38.8984, "step": 5928 }, { "epoch": 0.056124042748554065, "grad_norm": 654.1126098632812, "learning_rate": 1.996424702021901e-06, "loss": 56.7344, "step": 5929 }, { "epoch": 0.05613350877026912, "grad_norm": 505.5486755371094, "learning_rate": 1.9964221113644197e-06, "loss": 27.1016, "step": 5930 }, { "epoch": 0.05614297479198417, "grad_norm": 242.86773681640625, "learning_rate": 1.996419519770366e-06, "loss": 19.3125, "step": 5931 }, { "epoch": 0.056152440813699224, "grad_norm": 275.6515808105469, "learning_rate": 1.9964169272397435e-06, "loss": 29.9375, "step": 5932 }, { "epoch": 0.05616190683541428, "grad_norm": 369.4269714355469, "learning_rate": 1.9964143337725536e-06, "loss": 46.4219, "step": 5933 }, { "epoch": 0.05617137285712934, "grad_norm": 3.4647533893585205, "learning_rate": 1.996411739368799e-06, "loss": 0.9634, "step": 5934 }, { "epoch": 0.05618083887884439, "grad_norm": 368.1000061035156, "learning_rate": 1.996409144028482e-06, "loss": 35.7344, "step": 5935 }, { "epoch": 0.05619030490055944, "grad_norm": 242.17689514160156, "learning_rate": 1.9964065477516056e-06, "loss": 21.1953, "step": 5936 }, { "epoch": 0.056199770922274496, "grad_norm": 484.1561279296875, "learning_rate": 1.996403950538172e-06, "loss": 26.5078, "step": 5937 }, { "epoch": 0.05620923694398955, "grad_norm": 778.7901000976562, "learning_rate": 1.9964013523881832e-06, "loss": 50.3906, "step": 5938 }, { "epoch": 0.0562187029657046, "grad_norm": 489.4440002441406, "learning_rate": 1.9963987533016417e-06, "loss": 34.3281, "step": 5939 }, { "epoch": 0.056228168987419655, "grad_norm": 554.1152954101562, "learning_rate": 1.9963961532785506e-06, "loss": 43.0312, "step": 5940 }, { "epoch": 0.05623763500913471, "grad_norm": 265.2211608886719, "learning_rate": 1.9963935523189117e-06, "loss": 21.5, "step": 5941 }, { "epoch": 0.05624710103084977, "grad_norm": 767.5594482421875, "learning_rate": 1.996390950422728e-06, "loss": 50.875, "step": 5942 }, { "epoch": 0.05625656705256482, "grad_norm": 1035.9884033203125, "learning_rate": 1.996388347590001e-06, "loss": 54.7969, "step": 5943 }, { "epoch": 0.056266033074279874, "grad_norm": 476.7633361816406, "learning_rate": 1.9963857438207344e-06, "loss": 53.5625, "step": 5944 }, { "epoch": 0.05627549909599493, "grad_norm": 462.16143798828125, "learning_rate": 1.9963831391149297e-06, "loss": 37.25, "step": 5945 }, { "epoch": 0.05628496511770998, "grad_norm": 222.39834594726562, "learning_rate": 1.99638053347259e-06, "loss": 27.9453, "step": 5946 }, { "epoch": 0.05629443113942503, "grad_norm": 270.9798889160156, "learning_rate": 1.996377926893717e-06, "loss": 21.6641, "step": 5947 }, { "epoch": 0.056303897161140086, "grad_norm": 842.5490112304688, "learning_rate": 1.996375319378314e-06, "loss": 31.7344, "step": 5948 }, { "epoch": 0.05631336318285514, "grad_norm": 233.71734619140625, "learning_rate": 1.9963727109263825e-06, "loss": 22.0664, "step": 5949 }, { "epoch": 0.05632282920457019, "grad_norm": 501.1098327636719, "learning_rate": 1.9963701015379256e-06, "loss": 40.5, "step": 5950 }, { "epoch": 0.05633229522628525, "grad_norm": 614.6891479492188, "learning_rate": 1.996367491212946e-06, "loss": 51.7734, "step": 5951 }, { "epoch": 0.056341761248000305, "grad_norm": 459.8998107910156, "learning_rate": 1.9963648799514453e-06, "loss": 22.1719, "step": 5952 }, { "epoch": 0.05635122726971536, "grad_norm": 452.5777282714844, "learning_rate": 1.996362267753427e-06, "loss": 40.875, "step": 5953 }, { "epoch": 0.05636069329143041, "grad_norm": 495.5680847167969, "learning_rate": 1.9963596546188923e-06, "loss": 51.7188, "step": 5954 }, { "epoch": 0.056370159313145464, "grad_norm": 188.32371520996094, "learning_rate": 1.996357040547845e-06, "loss": 25.6328, "step": 5955 }, { "epoch": 0.05637962533486052, "grad_norm": 588.9437255859375, "learning_rate": 1.9963544255402865e-06, "loss": 34.5781, "step": 5956 }, { "epoch": 0.05638909135657557, "grad_norm": 365.0994567871094, "learning_rate": 1.9963518095962198e-06, "loss": 20.6523, "step": 5957 }, { "epoch": 0.05639855737829062, "grad_norm": 263.25390625, "learning_rate": 1.996349192715647e-06, "loss": 30.7188, "step": 5958 }, { "epoch": 0.05640802340000568, "grad_norm": 689.253173828125, "learning_rate": 1.996346574898571e-06, "loss": 44.8125, "step": 5959 }, { "epoch": 0.056417489421720736, "grad_norm": 355.2746276855469, "learning_rate": 1.9963439561449943e-06, "loss": 27.0156, "step": 5960 }, { "epoch": 0.05642695544343579, "grad_norm": 1856.3203125, "learning_rate": 1.996341336454919e-06, "loss": 32.0703, "step": 5961 }, { "epoch": 0.05643642146515084, "grad_norm": 400.7957458496094, "learning_rate": 1.9963387158283468e-06, "loss": 46.543, "step": 5962 }, { "epoch": 0.056445887486865895, "grad_norm": 658.3197021484375, "learning_rate": 1.996336094265282e-06, "loss": 55.125, "step": 5963 }, { "epoch": 0.05645535350858095, "grad_norm": 737.3098754882812, "learning_rate": 1.9963334717657252e-06, "loss": 53.375, "step": 5964 }, { "epoch": 0.056464819530296, "grad_norm": 372.07244873046875, "learning_rate": 1.9963308483296804e-06, "loss": 26.8359, "step": 5965 }, { "epoch": 0.056474285552011054, "grad_norm": 184.05409240722656, "learning_rate": 1.9963282239571496e-06, "loss": 26.8281, "step": 5966 }, { "epoch": 0.05648375157372611, "grad_norm": 409.6349182128906, "learning_rate": 1.9963255986481345e-06, "loss": 46.2969, "step": 5967 }, { "epoch": 0.05649321759544117, "grad_norm": 275.34674072265625, "learning_rate": 1.9963229724026385e-06, "loss": 24.0234, "step": 5968 }, { "epoch": 0.05650268361715622, "grad_norm": 347.6881408691406, "learning_rate": 1.9963203452206638e-06, "loss": 20.8672, "step": 5969 }, { "epoch": 0.05651214963887127, "grad_norm": 409.62493896484375, "learning_rate": 1.996317717102213e-06, "loss": 22.3594, "step": 5970 }, { "epoch": 0.056521615660586326, "grad_norm": 191.59320068359375, "learning_rate": 1.9963150880472877e-06, "loss": 24.4375, "step": 5971 }, { "epoch": 0.05653108168230138, "grad_norm": 701.7164306640625, "learning_rate": 1.9963124580558914e-06, "loss": 55.1719, "step": 5972 }, { "epoch": 0.05654054770401643, "grad_norm": 233.51388549804688, "learning_rate": 1.996309827128026e-06, "loss": 34.1797, "step": 5973 }, { "epoch": 0.056550013725731485, "grad_norm": 385.7482604980469, "learning_rate": 1.9963071952636947e-06, "loss": 34.9062, "step": 5974 }, { "epoch": 0.05655947974744654, "grad_norm": 392.1423034667969, "learning_rate": 1.996304562462899e-06, "loss": 30.4375, "step": 5975 }, { "epoch": 0.05656894576916159, "grad_norm": 472.9474792480469, "learning_rate": 1.9963019287256418e-06, "loss": 51.0781, "step": 5976 }, { "epoch": 0.05657841179087665, "grad_norm": 603.6931762695312, "learning_rate": 1.9962992940519257e-06, "loss": 53.2812, "step": 5977 }, { "epoch": 0.056587877812591704, "grad_norm": 472.1724853515625, "learning_rate": 1.9962966584417533e-06, "loss": 27.0469, "step": 5978 }, { "epoch": 0.05659734383430676, "grad_norm": 146.69288635253906, "learning_rate": 1.9962940218951267e-06, "loss": 19.8047, "step": 5979 }, { "epoch": 0.05660680985602181, "grad_norm": 229.28378295898438, "learning_rate": 1.9962913844120484e-06, "loss": 20.0703, "step": 5980 }, { "epoch": 0.05661627587773686, "grad_norm": 599.288330078125, "learning_rate": 1.996288745992521e-06, "loss": 44.3438, "step": 5981 }, { "epoch": 0.056625741899451916, "grad_norm": 232.11016845703125, "learning_rate": 1.996286106636547e-06, "loss": 22.9922, "step": 5982 }, { "epoch": 0.05663520792116697, "grad_norm": 1020.9251098632812, "learning_rate": 1.996283466344129e-06, "loss": 27.0371, "step": 5983 }, { "epoch": 0.05664467394288202, "grad_norm": 807.2850952148438, "learning_rate": 1.9962808251152694e-06, "loss": 68.5469, "step": 5984 }, { "epoch": 0.05665413996459708, "grad_norm": 268.6888122558594, "learning_rate": 1.9962781829499704e-06, "loss": 24.8672, "step": 5985 }, { "epoch": 0.056663605986312135, "grad_norm": 270.078857421875, "learning_rate": 1.996275539848235e-06, "loss": 21.7656, "step": 5986 }, { "epoch": 0.05667307200802719, "grad_norm": 673.930419921875, "learning_rate": 1.9962728958100653e-06, "loss": 30.0625, "step": 5987 }, { "epoch": 0.05668253802974224, "grad_norm": 2.5740957260131836, "learning_rate": 1.996270250835464e-06, "loss": 0.835, "step": 5988 }, { "epoch": 0.056692004051457294, "grad_norm": 339.7935485839844, "learning_rate": 1.996267604924433e-06, "loss": 24.7188, "step": 5989 }, { "epoch": 0.05670147007317235, "grad_norm": 441.92999267578125, "learning_rate": 1.996264958076976e-06, "loss": 27.375, "step": 5990 }, { "epoch": 0.0567109360948874, "grad_norm": 1058.60888671875, "learning_rate": 1.996262310293094e-06, "loss": 37.0391, "step": 5991 }, { "epoch": 0.05672040211660245, "grad_norm": 616.6779174804688, "learning_rate": 1.9962596615727907e-06, "loss": 53.0625, "step": 5992 }, { "epoch": 0.056729868138317506, "grad_norm": 566.9111938476562, "learning_rate": 1.996257011916068e-06, "loss": 26.7656, "step": 5993 }, { "epoch": 0.056739334160032566, "grad_norm": 441.0768737792969, "learning_rate": 1.9962543613229285e-06, "loss": 41.7812, "step": 5994 }, { "epoch": 0.05674880018174762, "grad_norm": 577.2061767578125, "learning_rate": 1.9962517097933746e-06, "loss": 30.0859, "step": 5995 }, { "epoch": 0.05675826620346267, "grad_norm": 252.3997039794922, "learning_rate": 1.996249057327409e-06, "loss": 21.9258, "step": 5996 }, { "epoch": 0.056767732225177725, "grad_norm": 172.93797302246094, "learning_rate": 1.9962464039250344e-06, "loss": 24.6094, "step": 5997 }, { "epoch": 0.05677719824689278, "grad_norm": 197.77999877929688, "learning_rate": 1.996243749586253e-06, "loss": 19.6406, "step": 5998 }, { "epoch": 0.05678666426860783, "grad_norm": 1617.9158935546875, "learning_rate": 1.9962410943110667e-06, "loss": 40.1719, "step": 5999 }, { "epoch": 0.056796130290322884, "grad_norm": 543.5609741210938, "learning_rate": 1.996238438099479e-06, "loss": 55.8672, "step": 6000 }, { "epoch": 0.05680559631203794, "grad_norm": 662.2120361328125, "learning_rate": 1.996235780951492e-06, "loss": 25.1875, "step": 6001 }, { "epoch": 0.056815062333753, "grad_norm": 344.6439514160156, "learning_rate": 1.996233122867108e-06, "loss": 23.3984, "step": 6002 }, { "epoch": 0.05682452835546805, "grad_norm": 3.2073068618774414, "learning_rate": 1.9962304638463296e-06, "loss": 0.7883, "step": 6003 }, { "epoch": 0.0568339943771831, "grad_norm": 736.3690185546875, "learning_rate": 1.9962278038891595e-06, "loss": 40.7383, "step": 6004 }, { "epoch": 0.056843460398898156, "grad_norm": 448.8374938964844, "learning_rate": 1.9962251429956e-06, "loss": 39.8125, "step": 6005 }, { "epoch": 0.05685292642061321, "grad_norm": 375.8259582519531, "learning_rate": 1.996222481165654e-06, "loss": 13.5664, "step": 6006 }, { "epoch": 0.05686239244232826, "grad_norm": 480.1714172363281, "learning_rate": 1.9962198183993232e-06, "loss": 27.1484, "step": 6007 }, { "epoch": 0.056871858464043315, "grad_norm": 627.6149291992188, "learning_rate": 1.996217154696611e-06, "loss": 45.8906, "step": 6008 }, { "epoch": 0.05688132448575837, "grad_norm": 487.7811584472656, "learning_rate": 1.9962144900575193e-06, "loss": 26.3125, "step": 6009 }, { "epoch": 0.05689079050747342, "grad_norm": 415.16302490234375, "learning_rate": 1.996211824482051e-06, "loss": 49.7969, "step": 6010 }, { "epoch": 0.05690025652918848, "grad_norm": 207.4975128173828, "learning_rate": 1.9962091579702078e-06, "loss": 22.4375, "step": 6011 }, { "epoch": 0.056909722550903534, "grad_norm": 832.2964477539062, "learning_rate": 1.9962064905219935e-06, "loss": 67.5625, "step": 6012 }, { "epoch": 0.05691918857261859, "grad_norm": 260.93157958984375, "learning_rate": 1.9962038221374092e-06, "loss": 26.8438, "step": 6013 }, { "epoch": 0.05692865459433364, "grad_norm": 646.7507934570312, "learning_rate": 1.9962011528164584e-06, "loss": 59.375, "step": 6014 }, { "epoch": 0.05693812061604869, "grad_norm": 366.0468444824219, "learning_rate": 1.9961984825591436e-06, "loss": 22.2891, "step": 6015 }, { "epoch": 0.056947586637763746, "grad_norm": 2.9575798511505127, "learning_rate": 1.996195811365467e-06, "loss": 0.8589, "step": 6016 }, { "epoch": 0.0569570526594788, "grad_norm": 258.05963134765625, "learning_rate": 1.996193139235431e-06, "loss": 20.1953, "step": 6017 }, { "epoch": 0.05696651868119385, "grad_norm": 541.1207885742188, "learning_rate": 1.996190466169038e-06, "loss": 33.6289, "step": 6018 }, { "epoch": 0.056975984702908905, "grad_norm": 882.7634887695312, "learning_rate": 1.996187792166291e-06, "loss": 52.6719, "step": 6019 }, { "epoch": 0.056985450724623965, "grad_norm": 739.6248779296875, "learning_rate": 1.9961851172271926e-06, "loss": 45.6562, "step": 6020 }, { "epoch": 0.05699491674633902, "grad_norm": 598.9683837890625, "learning_rate": 1.9961824413517446e-06, "loss": 45.5312, "step": 6021 }, { "epoch": 0.05700438276805407, "grad_norm": 316.1513366699219, "learning_rate": 1.99617976453995e-06, "loss": 35.1875, "step": 6022 }, { "epoch": 0.057013848789769124, "grad_norm": 355.3245849609375, "learning_rate": 1.996177086791811e-06, "loss": 30.7812, "step": 6023 }, { "epoch": 0.05702331481148418, "grad_norm": 260.68841552734375, "learning_rate": 1.996174408107331e-06, "loss": 24.3281, "step": 6024 }, { "epoch": 0.05703278083319923, "grad_norm": 2.6083550453186035, "learning_rate": 1.9961717284865116e-06, "loss": 0.8057, "step": 6025 }, { "epoch": 0.05704224685491428, "grad_norm": 470.07440185546875, "learning_rate": 1.9961690479293553e-06, "loss": 45.9062, "step": 6026 }, { "epoch": 0.057051712876629336, "grad_norm": 410.11505126953125, "learning_rate": 1.9961663664358647e-06, "loss": 25.9688, "step": 6027 }, { "epoch": 0.057061178898344396, "grad_norm": 2.7941408157348633, "learning_rate": 1.9961636840060428e-06, "loss": 0.814, "step": 6028 }, { "epoch": 0.05707064492005945, "grad_norm": 445.6219482421875, "learning_rate": 1.996161000639892e-06, "loss": 28.4609, "step": 6029 }, { "epoch": 0.0570801109417745, "grad_norm": 477.0326843261719, "learning_rate": 1.9961583163374148e-06, "loss": 37.1562, "step": 6030 }, { "epoch": 0.057089576963489555, "grad_norm": 537.1514282226562, "learning_rate": 1.996155631098613e-06, "loss": 28.1797, "step": 6031 }, { "epoch": 0.05709904298520461, "grad_norm": 545.252197265625, "learning_rate": 1.99615294492349e-06, "loss": 46.6094, "step": 6032 }, { "epoch": 0.05710850900691966, "grad_norm": 274.0960693359375, "learning_rate": 1.9961502578120483e-06, "loss": 29.7969, "step": 6033 }, { "epoch": 0.057117975028634714, "grad_norm": 340.5997619628906, "learning_rate": 1.99614756976429e-06, "loss": 19.7344, "step": 6034 }, { "epoch": 0.05712744105034977, "grad_norm": 3.4164512157440186, "learning_rate": 1.9961448807802177e-06, "loss": 0.9756, "step": 6035 }, { "epoch": 0.05713690707206482, "grad_norm": 476.407958984375, "learning_rate": 1.996142190859834e-06, "loss": 60.5156, "step": 6036 }, { "epoch": 0.05714637309377988, "grad_norm": 504.6916809082031, "learning_rate": 1.996139500003141e-06, "loss": 53.9375, "step": 6037 }, { "epoch": 0.05715583911549493, "grad_norm": 365.3038024902344, "learning_rate": 1.9961368082101425e-06, "loss": 50.2188, "step": 6038 }, { "epoch": 0.057165305137209986, "grad_norm": 281.85589599609375, "learning_rate": 1.99613411548084e-06, "loss": 23.8828, "step": 6039 }, { "epoch": 0.05717477115892504, "grad_norm": 262.01751708984375, "learning_rate": 1.996131421815236e-06, "loss": 26.9766, "step": 6040 }, { "epoch": 0.05718423718064009, "grad_norm": 281.5522766113281, "learning_rate": 1.9961287272133337e-06, "loss": 22.8594, "step": 6041 }, { "epoch": 0.057193703202355145, "grad_norm": 2.9328384399414062, "learning_rate": 1.9961260316751347e-06, "loss": 0.8245, "step": 6042 }, { "epoch": 0.0572031692240702, "grad_norm": 424.8170166015625, "learning_rate": 1.996123335200642e-06, "loss": 30.2344, "step": 6043 }, { "epoch": 0.05721263524578525, "grad_norm": 320.5901184082031, "learning_rate": 1.9961206377898587e-06, "loss": 29.5078, "step": 6044 }, { "epoch": 0.05722210126750031, "grad_norm": 552.461181640625, "learning_rate": 1.9961179394427863e-06, "loss": 32.9297, "step": 6045 }, { "epoch": 0.057231567289215364, "grad_norm": 246.4752655029297, "learning_rate": 1.9961152401594277e-06, "loss": 19.0859, "step": 6046 }, { "epoch": 0.05724103331093042, "grad_norm": 574.8311767578125, "learning_rate": 1.996112539939786e-06, "loss": 57.9375, "step": 6047 }, { "epoch": 0.05725049933264547, "grad_norm": 466.3529968261719, "learning_rate": 1.9961098387838635e-06, "loss": 33.7656, "step": 6048 }, { "epoch": 0.05725996535436052, "grad_norm": 2.738769054412842, "learning_rate": 1.996107136691662e-06, "loss": 0.855, "step": 6049 }, { "epoch": 0.057269431376075576, "grad_norm": 629.6502075195312, "learning_rate": 1.996104433663185e-06, "loss": 71.4688, "step": 6050 }, { "epoch": 0.05727889739779063, "grad_norm": 205.0482177734375, "learning_rate": 1.996101729698434e-06, "loss": 25.0703, "step": 6051 }, { "epoch": 0.05728836341950568, "grad_norm": 581.24755859375, "learning_rate": 1.996099024797413e-06, "loss": 52.1719, "step": 6052 }, { "epoch": 0.057297829441220735, "grad_norm": 258.4392395019531, "learning_rate": 1.9960963189601235e-06, "loss": 22.8047, "step": 6053 }, { "epoch": 0.057307295462935795, "grad_norm": 2308.922119140625, "learning_rate": 1.9960936121865683e-06, "loss": 30.0625, "step": 6054 }, { "epoch": 0.05731676148465085, "grad_norm": 575.9912109375, "learning_rate": 1.9960909044767494e-06, "loss": 49.75, "step": 6055 }, { "epoch": 0.0573262275063659, "grad_norm": 560.9868774414062, "learning_rate": 1.9960881958306703e-06, "loss": 41.3281, "step": 6056 }, { "epoch": 0.057335693528080954, "grad_norm": 443.5106201171875, "learning_rate": 1.996085486248333e-06, "loss": 44.9688, "step": 6057 }, { "epoch": 0.05734515954979601, "grad_norm": 521.419189453125, "learning_rate": 1.99608277572974e-06, "loss": 42.1875, "step": 6058 }, { "epoch": 0.05735462557151106, "grad_norm": 324.8987121582031, "learning_rate": 1.9960800642748943e-06, "loss": 32.5625, "step": 6059 }, { "epoch": 0.05736409159322611, "grad_norm": 309.31011962890625, "learning_rate": 1.9960773518837975e-06, "loss": 29.3125, "step": 6060 }, { "epoch": 0.057373557614941166, "grad_norm": 487.1173400878906, "learning_rate": 1.9960746385564536e-06, "loss": 28.0391, "step": 6061 }, { "epoch": 0.05738302363665622, "grad_norm": 816.8126831054688, "learning_rate": 1.9960719242928642e-06, "loss": 42.6094, "step": 6062 }, { "epoch": 0.05739248965837128, "grad_norm": 359.6516418457031, "learning_rate": 1.9960692090930316e-06, "loss": 25.0312, "step": 6063 }, { "epoch": 0.05740195568008633, "grad_norm": 820.0055541992188, "learning_rate": 1.996066492956959e-06, "loss": 50.5781, "step": 6064 }, { "epoch": 0.057411421701801385, "grad_norm": 189.71844482421875, "learning_rate": 1.9960637758846482e-06, "loss": 19.375, "step": 6065 }, { "epoch": 0.05742088772351644, "grad_norm": 493.0027770996094, "learning_rate": 1.996061057876103e-06, "loss": 28.9844, "step": 6066 }, { "epoch": 0.05743035374523149, "grad_norm": 619.5789184570312, "learning_rate": 1.9960583389313247e-06, "loss": 28.3047, "step": 6067 }, { "epoch": 0.057439819766946544, "grad_norm": 570.5950927734375, "learning_rate": 1.9960556190503166e-06, "loss": 32.8672, "step": 6068 }, { "epoch": 0.0574492857886616, "grad_norm": 557.8634643554688, "learning_rate": 1.996052898233081e-06, "loss": 19.5078, "step": 6069 }, { "epoch": 0.05745875181037665, "grad_norm": 350.1029052734375, "learning_rate": 1.99605017647962e-06, "loss": 43.8125, "step": 6070 }, { "epoch": 0.05746821783209171, "grad_norm": 265.56988525390625, "learning_rate": 1.9960474537899375e-06, "loss": 23.25, "step": 6071 }, { "epoch": 0.05747768385380676, "grad_norm": 475.6175537109375, "learning_rate": 1.9960447301640346e-06, "loss": 69.1406, "step": 6072 }, { "epoch": 0.057487149875521816, "grad_norm": 233.93174743652344, "learning_rate": 1.9960420056019146e-06, "loss": 24.2266, "step": 6073 }, { "epoch": 0.05749661589723687, "grad_norm": 343.20147705078125, "learning_rate": 1.99603928010358e-06, "loss": 26.9531, "step": 6074 }, { "epoch": 0.05750608191895192, "grad_norm": 795.4180908203125, "learning_rate": 1.9960365536690327e-06, "loss": 35.2734, "step": 6075 }, { "epoch": 0.057515547940666975, "grad_norm": 472.4461364746094, "learning_rate": 1.9960338262982764e-06, "loss": 27.3203, "step": 6076 }, { "epoch": 0.05752501396238203, "grad_norm": 484.9478759765625, "learning_rate": 1.9960310979913132e-06, "loss": 21.5078, "step": 6077 }, { "epoch": 0.05753447998409708, "grad_norm": 149.27859497070312, "learning_rate": 1.9960283687481453e-06, "loss": 20.5312, "step": 6078 }, { "epoch": 0.057543946005812134, "grad_norm": 622.5048828125, "learning_rate": 1.9960256385687755e-06, "loss": 24.6211, "step": 6079 }, { "epoch": 0.057553412027527194, "grad_norm": 340.9183044433594, "learning_rate": 1.9960229074532065e-06, "loss": 25.0312, "step": 6080 }, { "epoch": 0.05756287804924225, "grad_norm": 494.12725830078125, "learning_rate": 1.996020175401441e-06, "loss": 41.375, "step": 6081 }, { "epoch": 0.0575723440709573, "grad_norm": 733.7509155273438, "learning_rate": 1.996017442413481e-06, "loss": 26.6406, "step": 6082 }, { "epoch": 0.05758181009267235, "grad_norm": 179.0654296875, "learning_rate": 1.9960147084893297e-06, "loss": 20.3203, "step": 6083 }, { "epoch": 0.057591276114387406, "grad_norm": 421.48834228515625, "learning_rate": 1.996011973628989e-06, "loss": 29.9844, "step": 6084 }, { "epoch": 0.05760074213610246, "grad_norm": 612.7059326171875, "learning_rate": 1.996009237832462e-06, "loss": 30.9922, "step": 6085 }, { "epoch": 0.05761020815781751, "grad_norm": 294.0478515625, "learning_rate": 1.996006501099751e-06, "loss": 19.2891, "step": 6086 }, { "epoch": 0.057619674179532565, "grad_norm": 296.74542236328125, "learning_rate": 1.996003763430859e-06, "loss": 25.2734, "step": 6087 }, { "epoch": 0.057629140201247625, "grad_norm": 305.5855407714844, "learning_rate": 1.9960010248257876e-06, "loss": 16.8672, "step": 6088 }, { "epoch": 0.05763860622296268, "grad_norm": 445.32537841796875, "learning_rate": 1.995998285284541e-06, "loss": 53.625, "step": 6089 }, { "epoch": 0.05764807224467773, "grad_norm": 356.7520446777344, "learning_rate": 1.99599554480712e-06, "loss": 23.1641, "step": 6090 }, { "epoch": 0.057657538266392784, "grad_norm": 463.4225158691406, "learning_rate": 1.9959928033935284e-06, "loss": 30.7188, "step": 6091 }, { "epoch": 0.05766700428810784, "grad_norm": 884.9656372070312, "learning_rate": 1.995990061043768e-06, "loss": 36.2266, "step": 6092 }, { "epoch": 0.05767647030982289, "grad_norm": 436.34881591796875, "learning_rate": 1.995987317757842e-06, "loss": 43.2031, "step": 6093 }, { "epoch": 0.05768593633153794, "grad_norm": 437.49847412109375, "learning_rate": 1.9959845735357524e-06, "loss": 32.5, "step": 6094 }, { "epoch": 0.057695402353252996, "grad_norm": 1481.4610595703125, "learning_rate": 1.9959818283775024e-06, "loss": 30.6875, "step": 6095 }, { "epoch": 0.05770486837496805, "grad_norm": 371.42626953125, "learning_rate": 1.9959790822830943e-06, "loss": 27.7188, "step": 6096 }, { "epoch": 0.05771433439668311, "grad_norm": 496.5184326171875, "learning_rate": 1.995976335252531e-06, "loss": 44.7188, "step": 6097 }, { "epoch": 0.05772380041839816, "grad_norm": 601.0010375976562, "learning_rate": 1.995973587285814e-06, "loss": 42.2969, "step": 6098 }, { "epoch": 0.057733266440113215, "grad_norm": 206.8031463623047, "learning_rate": 1.995970838382947e-06, "loss": 30.9141, "step": 6099 }, { "epoch": 0.05774273246182827, "grad_norm": 451.1141357421875, "learning_rate": 1.9959680885439323e-06, "loss": 21.1797, "step": 6100 }, { "epoch": 0.05775219848354332, "grad_norm": 214.62611389160156, "learning_rate": 1.995965337768772e-06, "loss": 18.0547, "step": 6101 }, { "epoch": 0.057761664505258374, "grad_norm": 253.42626953125, "learning_rate": 1.9959625860574695e-06, "loss": 32.4219, "step": 6102 }, { "epoch": 0.05777113052697343, "grad_norm": 422.7759704589844, "learning_rate": 1.9959598334100265e-06, "loss": 22.6328, "step": 6103 }, { "epoch": 0.05778059654868848, "grad_norm": 319.71099853515625, "learning_rate": 1.9959570798264465e-06, "loss": 24.125, "step": 6104 }, { "epoch": 0.05779006257040353, "grad_norm": 466.8223571777344, "learning_rate": 1.9959543253067316e-06, "loss": 21.2617, "step": 6105 }, { "epoch": 0.05779952859211859, "grad_norm": 437.90771484375, "learning_rate": 1.9959515698508844e-06, "loss": 45.4453, "step": 6106 }, { "epoch": 0.057808994613833646, "grad_norm": 734.5560913085938, "learning_rate": 1.995948813458907e-06, "loss": 75.7812, "step": 6107 }, { "epoch": 0.0578184606355487, "grad_norm": 521.664794921875, "learning_rate": 1.995946056130803e-06, "loss": 55.0312, "step": 6108 }, { "epoch": 0.05782792665726375, "grad_norm": 543.9450073242188, "learning_rate": 1.9959432978665745e-06, "loss": 40.8281, "step": 6109 }, { "epoch": 0.057837392678978805, "grad_norm": 294.8009338378906, "learning_rate": 1.995940538666224e-06, "loss": 14.8398, "step": 6110 }, { "epoch": 0.05784685870069386, "grad_norm": 629.453369140625, "learning_rate": 1.995937778529754e-06, "loss": 46.6484, "step": 6111 }, { "epoch": 0.05785632472240891, "grad_norm": 564.6889038085938, "learning_rate": 1.9959350174571675e-06, "loss": 54.5625, "step": 6112 }, { "epoch": 0.057865790744123964, "grad_norm": 1076.77197265625, "learning_rate": 1.995932255448467e-06, "loss": 66.2188, "step": 6113 }, { "epoch": 0.057875256765839024, "grad_norm": 152.33499145507812, "learning_rate": 1.9959294925036548e-06, "loss": 22.8438, "step": 6114 }, { "epoch": 0.05788472278755408, "grad_norm": 187.46165466308594, "learning_rate": 1.9959267286227333e-06, "loss": 20.25, "step": 6115 }, { "epoch": 0.05789418880926913, "grad_norm": 506.64862060546875, "learning_rate": 1.9959239638057062e-06, "loss": 16.2852, "step": 6116 }, { "epoch": 0.05790365483098418, "grad_norm": 597.6337280273438, "learning_rate": 1.995921198052575e-06, "loss": 51.2422, "step": 6117 }, { "epoch": 0.057913120852699236, "grad_norm": 452.3586730957031, "learning_rate": 1.9959184313633427e-06, "loss": 36.3125, "step": 6118 }, { "epoch": 0.05792258687441429, "grad_norm": 250.21087646484375, "learning_rate": 1.9959156637380117e-06, "loss": 22.5156, "step": 6119 }, { "epoch": 0.05793205289612934, "grad_norm": 619.9223022460938, "learning_rate": 1.995912895176585e-06, "loss": 47.6719, "step": 6120 }, { "epoch": 0.057941518917844395, "grad_norm": 155.6558380126953, "learning_rate": 1.9959101256790646e-06, "loss": 26.4531, "step": 6121 }, { "epoch": 0.05795098493955945, "grad_norm": 351.5205383300781, "learning_rate": 1.995907355245454e-06, "loss": 47.8594, "step": 6122 }, { "epoch": 0.05796045096127451, "grad_norm": 939.7988891601562, "learning_rate": 1.995904583875755e-06, "loss": 25.0977, "step": 6123 }, { "epoch": 0.05796991698298956, "grad_norm": 462.1754455566406, "learning_rate": 1.9959018115699703e-06, "loss": 32.9766, "step": 6124 }, { "epoch": 0.057979383004704614, "grad_norm": 3.2234838008880615, "learning_rate": 1.995899038328103e-06, "loss": 0.8818, "step": 6125 }, { "epoch": 0.05798884902641967, "grad_norm": 368.183837890625, "learning_rate": 1.995896264150155e-06, "loss": 27.5, "step": 6126 }, { "epoch": 0.05799831504813472, "grad_norm": 179.4762420654297, "learning_rate": 1.99589348903613e-06, "loss": 18.8984, "step": 6127 }, { "epoch": 0.05800778106984977, "grad_norm": 465.8351745605469, "learning_rate": 1.9958907129860294e-06, "loss": 44.625, "step": 6128 }, { "epoch": 0.058017247091564826, "grad_norm": 410.76513671875, "learning_rate": 1.995887935999856e-06, "loss": 36.0781, "step": 6129 }, { "epoch": 0.05802671311327988, "grad_norm": 502.5469970703125, "learning_rate": 1.995885158077613e-06, "loss": 41.0938, "step": 6130 }, { "epoch": 0.05803617913499494, "grad_norm": 701.6943969726562, "learning_rate": 1.995882379219303e-06, "loss": 33.6484, "step": 6131 }, { "epoch": 0.05804564515670999, "grad_norm": 300.2444152832031, "learning_rate": 1.995879599424928e-06, "loss": 28.6094, "step": 6132 }, { "epoch": 0.058055111178425045, "grad_norm": 454.6190490722656, "learning_rate": 1.9958768186944914e-06, "loss": 19.6562, "step": 6133 }, { "epoch": 0.0580645772001401, "grad_norm": 2.9063475131988525, "learning_rate": 1.9958740370279953e-06, "loss": 0.9331, "step": 6134 }, { "epoch": 0.05807404322185515, "grad_norm": 432.82568359375, "learning_rate": 1.995871254425442e-06, "loss": 43.2812, "step": 6135 }, { "epoch": 0.058083509243570204, "grad_norm": 327.11181640625, "learning_rate": 1.995868470886835e-06, "loss": 29.0781, "step": 6136 }, { "epoch": 0.05809297526528526, "grad_norm": 319.7499084472656, "learning_rate": 1.995865686412176e-06, "loss": 26.7656, "step": 6137 }, { "epoch": 0.05810244128700031, "grad_norm": 492.23907470703125, "learning_rate": 1.995862901001468e-06, "loss": 33.9961, "step": 6138 }, { "epoch": 0.05811190730871536, "grad_norm": 265.81634521484375, "learning_rate": 1.995860114654714e-06, "loss": 19.4375, "step": 6139 }, { "epoch": 0.05812137333043042, "grad_norm": 260.2234191894531, "learning_rate": 1.995857327371916e-06, "loss": 26.6484, "step": 6140 }, { "epoch": 0.058130839352145476, "grad_norm": 330.59149169921875, "learning_rate": 1.9958545391530772e-06, "loss": 49.7344, "step": 6141 }, { "epoch": 0.05814030537386053, "grad_norm": 163.14785766601562, "learning_rate": 1.9958517499981995e-06, "loss": 22.8594, "step": 6142 }, { "epoch": 0.05814977139557558, "grad_norm": 200.0049591064453, "learning_rate": 1.9958489599072865e-06, "loss": 24.4688, "step": 6143 }, { "epoch": 0.058159237417290635, "grad_norm": 704.4409790039062, "learning_rate": 1.9958461688803398e-06, "loss": 23.1094, "step": 6144 }, { "epoch": 0.05816870343900569, "grad_norm": 238.40333557128906, "learning_rate": 1.9958433769173628e-06, "loss": 18.1875, "step": 6145 }, { "epoch": 0.05817816946072074, "grad_norm": 2.998922348022461, "learning_rate": 1.9958405840183576e-06, "loss": 0.8958, "step": 6146 }, { "epoch": 0.058187635482435794, "grad_norm": 382.3394775390625, "learning_rate": 1.995837790183327e-06, "loss": 22.4609, "step": 6147 }, { "epoch": 0.05819710150415085, "grad_norm": 566.8423461914062, "learning_rate": 1.995834995412274e-06, "loss": 37.3867, "step": 6148 }, { "epoch": 0.05820656752586591, "grad_norm": 490.437744140625, "learning_rate": 1.9958321997052003e-06, "loss": 25.5625, "step": 6149 }, { "epoch": 0.05821603354758096, "grad_norm": 419.26678466796875, "learning_rate": 1.9958294030621096e-06, "loss": 15.8555, "step": 6150 }, { "epoch": 0.05822549956929601, "grad_norm": 765.2266845703125, "learning_rate": 1.995826605483004e-06, "loss": 35.7656, "step": 6151 }, { "epoch": 0.058234965591011066, "grad_norm": 576.5387573242188, "learning_rate": 1.9958238069678857e-06, "loss": 39.8203, "step": 6152 }, { "epoch": 0.05824443161272612, "grad_norm": 599.8582763671875, "learning_rate": 1.9958210075167584e-06, "loss": 33.5781, "step": 6153 }, { "epoch": 0.05825389763444117, "grad_norm": 308.6522216796875, "learning_rate": 1.995818207129624e-06, "loss": 25.2578, "step": 6154 }, { "epoch": 0.058263363656156225, "grad_norm": 718.4147338867188, "learning_rate": 1.995815405806485e-06, "loss": 48.1875, "step": 6155 }, { "epoch": 0.05827282967787128, "grad_norm": 178.73890686035156, "learning_rate": 1.9958126035473443e-06, "loss": 26.3203, "step": 6156 }, { "epoch": 0.05828229569958634, "grad_norm": 295.66571044921875, "learning_rate": 1.9958098003522048e-06, "loss": 42.0781, "step": 6157 }, { "epoch": 0.05829176172130139, "grad_norm": 385.95257568359375, "learning_rate": 1.995806996221069e-06, "loss": 43.5625, "step": 6158 }, { "epoch": 0.058301227743016444, "grad_norm": 1058.294677734375, "learning_rate": 1.995804191153939e-06, "loss": 66.7031, "step": 6159 }, { "epoch": 0.0583106937647315, "grad_norm": 411.1528625488281, "learning_rate": 1.995801385150818e-06, "loss": 24.8906, "step": 6160 }, { "epoch": 0.05832015978644655, "grad_norm": 534.4652099609375, "learning_rate": 1.9957985782117084e-06, "loss": 31.625, "step": 6161 }, { "epoch": 0.0583296258081616, "grad_norm": 444.7084045410156, "learning_rate": 1.995795770336613e-06, "loss": 24.3047, "step": 6162 }, { "epoch": 0.058339091829876656, "grad_norm": 195.21707153320312, "learning_rate": 1.9957929615255346e-06, "loss": 20.6797, "step": 6163 }, { "epoch": 0.05834855785159171, "grad_norm": 1075.02001953125, "learning_rate": 1.9957901517784753e-06, "loss": 45.4141, "step": 6164 }, { "epoch": 0.05835802387330676, "grad_norm": 3.8759095668792725, "learning_rate": 1.995787341095438e-06, "loss": 0.9312, "step": 6165 }, { "epoch": 0.05836748989502182, "grad_norm": 313.6600036621094, "learning_rate": 1.9957845294764257e-06, "loss": 27.8594, "step": 6166 }, { "epoch": 0.058376955916736875, "grad_norm": 686.0399780273438, "learning_rate": 1.9957817169214403e-06, "loss": 53.2188, "step": 6167 }, { "epoch": 0.05838642193845193, "grad_norm": 608.9328002929688, "learning_rate": 1.995778903430485e-06, "loss": 20.7031, "step": 6168 }, { "epoch": 0.05839588796016698, "grad_norm": 3.335322141647339, "learning_rate": 1.9957760890035626e-06, "loss": 0.9502, "step": 6169 }, { "epoch": 0.058405353981882034, "grad_norm": 413.2901916503906, "learning_rate": 1.9957732736406753e-06, "loss": 26.7656, "step": 6170 }, { "epoch": 0.05841482000359709, "grad_norm": 437.28759765625, "learning_rate": 1.9957704573418255e-06, "loss": 41.5625, "step": 6171 }, { "epoch": 0.05842428602531214, "grad_norm": 237.4875030517578, "learning_rate": 1.9957676401070166e-06, "loss": 22.9688, "step": 6172 }, { "epoch": 0.05843375204702719, "grad_norm": 1336.7020263671875, "learning_rate": 1.9957648219362507e-06, "loss": 85.8281, "step": 6173 }, { "epoch": 0.05844321806874225, "grad_norm": 490.8914794921875, "learning_rate": 1.995762002829531e-06, "loss": 43.1406, "step": 6174 }, { "epoch": 0.058452684090457306, "grad_norm": 266.9607849121094, "learning_rate": 1.9957591827868597e-06, "loss": 39.2812, "step": 6175 }, { "epoch": 0.05846215011217236, "grad_norm": 936.501220703125, "learning_rate": 1.9957563618082393e-06, "loss": 36.0703, "step": 6176 }, { "epoch": 0.05847161613388741, "grad_norm": 1127.9208984375, "learning_rate": 1.995753539893673e-06, "loss": 47.7969, "step": 6177 }, { "epoch": 0.058481082155602465, "grad_norm": 3.464064836502075, "learning_rate": 1.995750717043163e-06, "loss": 0.8115, "step": 6178 }, { "epoch": 0.05849054817731752, "grad_norm": 433.02545166015625, "learning_rate": 1.995747893256712e-06, "loss": 15.0234, "step": 6179 }, { "epoch": 0.05850001419903257, "grad_norm": 3.4729361534118652, "learning_rate": 1.995745068534323e-06, "loss": 0.9966, "step": 6180 }, { "epoch": 0.058509480220747624, "grad_norm": 308.822509765625, "learning_rate": 1.9957422428759984e-06, "loss": 35.5625, "step": 6181 }, { "epoch": 0.05851894624246268, "grad_norm": 670.1209106445312, "learning_rate": 1.9957394162817406e-06, "loss": 36.2344, "step": 6182 }, { "epoch": 0.05852841226417774, "grad_norm": 550.752685546875, "learning_rate": 1.995736588751553e-06, "loss": 43.6562, "step": 6183 }, { "epoch": 0.05853787828589279, "grad_norm": 175.62399291992188, "learning_rate": 1.9957337602854373e-06, "loss": 25.3672, "step": 6184 }, { "epoch": 0.05854734430760784, "grad_norm": 293.2277526855469, "learning_rate": 1.995730930883397e-06, "loss": 20.8047, "step": 6185 }, { "epoch": 0.058556810329322896, "grad_norm": 342.6553955078125, "learning_rate": 1.9957281005454342e-06, "loss": 19.4219, "step": 6186 }, { "epoch": 0.05856627635103795, "grad_norm": 273.6746520996094, "learning_rate": 1.9957252692715514e-06, "loss": 18.6758, "step": 6187 }, { "epoch": 0.058575742372753, "grad_norm": 265.65087890625, "learning_rate": 1.9957224370617523e-06, "loss": 23.5469, "step": 6188 }, { "epoch": 0.058585208394468055, "grad_norm": 206.0570068359375, "learning_rate": 1.9957196039160385e-06, "loss": 19.6719, "step": 6189 }, { "epoch": 0.05859467441618311, "grad_norm": 942.96484375, "learning_rate": 1.9957167698344134e-06, "loss": 54.6328, "step": 6190 }, { "epoch": 0.05860414043789816, "grad_norm": 253.86111450195312, "learning_rate": 1.9957139348168787e-06, "loss": 22.5234, "step": 6191 }, { "epoch": 0.05861360645961322, "grad_norm": 566.4148559570312, "learning_rate": 1.9957110988634383e-06, "loss": 55.9062, "step": 6192 }, { "epoch": 0.058623072481328274, "grad_norm": 555.9630737304688, "learning_rate": 1.995708261974094e-06, "loss": 36.6719, "step": 6193 }, { "epoch": 0.05863253850304333, "grad_norm": 479.6463623046875, "learning_rate": 1.9957054241488487e-06, "loss": 16.3438, "step": 6194 }, { "epoch": 0.05864200452475838, "grad_norm": 3.297760009765625, "learning_rate": 1.995702585387705e-06, "loss": 0.9038, "step": 6195 }, { "epoch": 0.05865147054647343, "grad_norm": 154.11647033691406, "learning_rate": 1.9956997456906656e-06, "loss": 23.1797, "step": 6196 }, { "epoch": 0.058660936568188486, "grad_norm": 397.64306640625, "learning_rate": 1.9956969050577332e-06, "loss": 23.3672, "step": 6197 }, { "epoch": 0.05867040258990354, "grad_norm": 281.5032653808594, "learning_rate": 1.9956940634889108e-06, "loss": 23.3789, "step": 6198 }, { "epoch": 0.05867986861161859, "grad_norm": 1884.361572265625, "learning_rate": 1.9956912209842003e-06, "loss": 73.7812, "step": 6199 }, { "epoch": 0.05868933463333365, "grad_norm": 429.50872802734375, "learning_rate": 1.995688377543605e-06, "loss": 49.5625, "step": 6200 }, { "epoch": 0.058698800655048705, "grad_norm": 187.98019409179688, "learning_rate": 1.995685533167128e-06, "loss": 18.0469, "step": 6201 }, { "epoch": 0.05870826667676376, "grad_norm": 578.8253784179688, "learning_rate": 1.9956826878547704e-06, "loss": 48.4375, "step": 6202 }, { "epoch": 0.05871773269847881, "grad_norm": 299.3757629394531, "learning_rate": 1.9956798416065364e-06, "loss": 26.7969, "step": 6203 }, { "epoch": 0.058727198720193864, "grad_norm": 368.16339111328125, "learning_rate": 1.995676994422428e-06, "loss": 31.7891, "step": 6204 }, { "epoch": 0.05873666474190892, "grad_norm": 460.57525634765625, "learning_rate": 1.995674146302448e-06, "loss": 30.8047, "step": 6205 }, { "epoch": 0.05874613076362397, "grad_norm": 531.7388916015625, "learning_rate": 1.995671297246599e-06, "loss": 34.0, "step": 6206 }, { "epoch": 0.05875559678533902, "grad_norm": 311.6280517578125, "learning_rate": 1.995668447254884e-06, "loss": 22.1562, "step": 6207 }, { "epoch": 0.058765062807054076, "grad_norm": 473.8367004394531, "learning_rate": 1.9956655963273053e-06, "loss": 53.8594, "step": 6208 }, { "epoch": 0.058774528828769136, "grad_norm": 231.59571838378906, "learning_rate": 1.9956627444638655e-06, "loss": 19.8438, "step": 6209 }, { "epoch": 0.05878399485048419, "grad_norm": 377.1076965332031, "learning_rate": 1.9956598916645677e-06, "loss": 42.5312, "step": 6210 }, { "epoch": 0.05879346087219924, "grad_norm": 637.999755859375, "learning_rate": 1.9956570379294142e-06, "loss": 17.6016, "step": 6211 }, { "epoch": 0.058802926893914295, "grad_norm": 298.9466857910156, "learning_rate": 1.9956541832584083e-06, "loss": 30.6016, "step": 6212 }, { "epoch": 0.05881239291562935, "grad_norm": 537.0502319335938, "learning_rate": 1.9956513276515517e-06, "loss": 23.9531, "step": 6213 }, { "epoch": 0.0588218589373444, "grad_norm": 2.71209454536438, "learning_rate": 1.9956484711088477e-06, "loss": 0.8457, "step": 6214 }, { "epoch": 0.058831324959059454, "grad_norm": 291.3915100097656, "learning_rate": 1.9956456136302994e-06, "loss": 19.0, "step": 6215 }, { "epoch": 0.05884079098077451, "grad_norm": 567.547607421875, "learning_rate": 1.995642755215909e-06, "loss": 23.2891, "step": 6216 }, { "epoch": 0.05885025700248957, "grad_norm": 2.9538023471832275, "learning_rate": 1.9956398958656787e-06, "loss": 0.6895, "step": 6217 }, { "epoch": 0.05885972302420462, "grad_norm": 695.7349243164062, "learning_rate": 1.9956370355796117e-06, "loss": 32.5312, "step": 6218 }, { "epoch": 0.05886918904591967, "grad_norm": 229.80381774902344, "learning_rate": 1.9956341743577106e-06, "loss": 30.2812, "step": 6219 }, { "epoch": 0.058878655067634726, "grad_norm": 359.1683654785156, "learning_rate": 1.9956313121999786e-06, "loss": 25.2656, "step": 6220 }, { "epoch": 0.05888812108934978, "grad_norm": 334.2339782714844, "learning_rate": 1.9956284491064176e-06, "loss": 24.0234, "step": 6221 }, { "epoch": 0.05889758711106483, "grad_norm": 436.1961669921875, "learning_rate": 1.995625585077031e-06, "loss": 44.0781, "step": 6222 }, { "epoch": 0.058907053132779885, "grad_norm": 316.7265930175781, "learning_rate": 1.9956227201118205e-06, "loss": 19.1016, "step": 6223 }, { "epoch": 0.05891651915449494, "grad_norm": 518.1422729492188, "learning_rate": 1.9956198542107895e-06, "loss": 19.7891, "step": 6224 }, { "epoch": 0.05892598517620999, "grad_norm": 436.12835693359375, "learning_rate": 1.9956169873739412e-06, "loss": 24.8438, "step": 6225 }, { "epoch": 0.05893545119792505, "grad_norm": 300.46630859375, "learning_rate": 1.995614119601277e-06, "loss": 19.7734, "step": 6226 }, { "epoch": 0.058944917219640104, "grad_norm": 263.4805908203125, "learning_rate": 1.995611250892801e-06, "loss": 31.4141, "step": 6227 }, { "epoch": 0.05895438324135516, "grad_norm": 937.9735107421875, "learning_rate": 1.9956083812485146e-06, "loss": 37.6328, "step": 6228 }, { "epoch": 0.05896384926307021, "grad_norm": 626.0648193359375, "learning_rate": 1.995605510668421e-06, "loss": 28.375, "step": 6229 }, { "epoch": 0.05897331528478526, "grad_norm": 461.75750732421875, "learning_rate": 1.9956026391525235e-06, "loss": 45.9141, "step": 6230 }, { "epoch": 0.058982781306500316, "grad_norm": 3.1103546619415283, "learning_rate": 1.9955997667008243e-06, "loss": 0.9331, "step": 6231 }, { "epoch": 0.05899224732821537, "grad_norm": 460.9446716308594, "learning_rate": 1.995596893313326e-06, "loss": 46.8906, "step": 6232 }, { "epoch": 0.05900171334993042, "grad_norm": 238.0462188720703, "learning_rate": 1.9955940189900313e-06, "loss": 28.0625, "step": 6233 }, { "epoch": 0.059011179371645475, "grad_norm": 363.671142578125, "learning_rate": 1.9955911437309426e-06, "loss": 49.0312, "step": 6234 }, { "epoch": 0.059020645393360535, "grad_norm": 797.9710693359375, "learning_rate": 1.9955882675360638e-06, "loss": 28.4844, "step": 6235 }, { "epoch": 0.05903011141507559, "grad_norm": 361.6568908691406, "learning_rate": 1.995585390405396e-06, "loss": 58.375, "step": 6236 }, { "epoch": 0.05903957743679064, "grad_norm": 883.0831909179688, "learning_rate": 1.995582512338943e-06, "loss": 72.3438, "step": 6237 }, { "epoch": 0.059049043458505694, "grad_norm": 353.953125, "learning_rate": 1.995579633336707e-06, "loss": 52.4688, "step": 6238 }, { "epoch": 0.05905850948022075, "grad_norm": 293.3305358886719, "learning_rate": 1.9955767533986915e-06, "loss": 28.9062, "step": 6239 }, { "epoch": 0.0590679755019358, "grad_norm": 435.7214660644531, "learning_rate": 1.9955738725248984e-06, "loss": 17.8828, "step": 6240 }, { "epoch": 0.05907744152365085, "grad_norm": 193.99864196777344, "learning_rate": 1.9955709907153303e-06, "loss": 21.6484, "step": 6241 }, { "epoch": 0.059086907545365906, "grad_norm": 244.4300079345703, "learning_rate": 1.9955681079699904e-06, "loss": 30.7969, "step": 6242 }, { "epoch": 0.059096373567080966, "grad_norm": 186.37278747558594, "learning_rate": 1.995565224288881e-06, "loss": 18.5, "step": 6243 }, { "epoch": 0.05910583958879602, "grad_norm": 513.6492919921875, "learning_rate": 1.9955623396720057e-06, "loss": 29.6953, "step": 6244 }, { "epoch": 0.05911530561051107, "grad_norm": 335.8236083984375, "learning_rate": 1.995559454119366e-06, "loss": 54.875, "step": 6245 }, { "epoch": 0.059124771632226125, "grad_norm": 286.6195983886719, "learning_rate": 1.9955565676309655e-06, "loss": 20.25, "step": 6246 }, { "epoch": 0.05913423765394118, "grad_norm": 676.4180297851562, "learning_rate": 1.9955536802068066e-06, "loss": 49.4219, "step": 6247 }, { "epoch": 0.05914370367565623, "grad_norm": 241.76443481445312, "learning_rate": 1.995550791846892e-06, "loss": 27.8281, "step": 6248 }, { "epoch": 0.059153169697371284, "grad_norm": 308.77996826171875, "learning_rate": 1.995547902551224e-06, "loss": 32.875, "step": 6249 }, { "epoch": 0.05916263571908634, "grad_norm": 519.9591064453125, "learning_rate": 1.995545012319806e-06, "loss": 48.2812, "step": 6250 }, { "epoch": 0.05917210174080139, "grad_norm": 372.7762451171875, "learning_rate": 1.995542121152641e-06, "loss": 25.2969, "step": 6251 }, { "epoch": 0.05918156776251645, "grad_norm": 216.79339599609375, "learning_rate": 1.9955392290497303e-06, "loss": 22.8906, "step": 6252 }, { "epoch": 0.0591910337842315, "grad_norm": 715.3034057617188, "learning_rate": 1.995536336011078e-06, "loss": 72.3125, "step": 6253 }, { "epoch": 0.059200499805946556, "grad_norm": 381.89251708984375, "learning_rate": 1.995533442036686e-06, "loss": 22.2422, "step": 6254 }, { "epoch": 0.05920996582766161, "grad_norm": 616.2096557617188, "learning_rate": 1.9955305471265575e-06, "loss": 47.2031, "step": 6255 }, { "epoch": 0.05921943184937666, "grad_norm": 855.9620971679688, "learning_rate": 1.995527651280695e-06, "loss": 64.3281, "step": 6256 }, { "epoch": 0.059228897871091715, "grad_norm": 3.5289416313171387, "learning_rate": 1.9955247544991015e-06, "loss": 0.8943, "step": 6257 }, { "epoch": 0.05923836389280677, "grad_norm": 368.30712890625, "learning_rate": 1.995521856781779e-06, "loss": 37.0469, "step": 6258 }, { "epoch": 0.05924782991452182, "grad_norm": 461.75677490234375, "learning_rate": 1.995518958128731e-06, "loss": 41.7812, "step": 6259 }, { "epoch": 0.05925729593623688, "grad_norm": 674.7532958984375, "learning_rate": 1.99551605853996e-06, "loss": 54.5234, "step": 6260 }, { "epoch": 0.059266761957951934, "grad_norm": 456.91595458984375, "learning_rate": 1.995513158015469e-06, "loss": 53.2656, "step": 6261 }, { "epoch": 0.05927622797966699, "grad_norm": 217.6236572265625, "learning_rate": 1.99551025655526e-06, "loss": 11.8125, "step": 6262 }, { "epoch": 0.05928569400138204, "grad_norm": 459.2733459472656, "learning_rate": 1.9955073541593358e-06, "loss": 50.2188, "step": 6263 }, { "epoch": 0.05929516002309709, "grad_norm": 302.5071716308594, "learning_rate": 1.9955044508277e-06, "loss": 23.5703, "step": 6264 }, { "epoch": 0.059304626044812146, "grad_norm": 358.3125, "learning_rate": 1.9955015465603543e-06, "loss": 52.6562, "step": 6265 }, { "epoch": 0.0593140920665272, "grad_norm": 1009.1815185546875, "learning_rate": 1.9954986413573025e-06, "loss": 43.1719, "step": 6266 }, { "epoch": 0.05932355808824225, "grad_norm": 243.70458984375, "learning_rate": 1.995495735218546e-06, "loss": 18.9062, "step": 6267 }, { "epoch": 0.059333024109957305, "grad_norm": 160.16183471679688, "learning_rate": 1.995492828144089e-06, "loss": 21.0, "step": 6268 }, { "epoch": 0.059342490131672365, "grad_norm": 164.35496520996094, "learning_rate": 1.995489920133933e-06, "loss": 19.6406, "step": 6269 }, { "epoch": 0.05935195615338742, "grad_norm": 220.97389221191406, "learning_rate": 1.9954870111880813e-06, "loss": 27.5781, "step": 6270 }, { "epoch": 0.05936142217510247, "grad_norm": 386.0002746582031, "learning_rate": 1.995484101306537e-06, "loss": 29.4766, "step": 6271 }, { "epoch": 0.059370888196817524, "grad_norm": 328.4129333496094, "learning_rate": 1.995481190489302e-06, "loss": 22.625, "step": 6272 }, { "epoch": 0.05938035421853258, "grad_norm": 313.8253173828125, "learning_rate": 1.9954782787363795e-06, "loss": 21.3906, "step": 6273 }, { "epoch": 0.05938982024024763, "grad_norm": 471.78759765625, "learning_rate": 1.9954753660477722e-06, "loss": 20.6719, "step": 6274 }, { "epoch": 0.05939928626196268, "grad_norm": 631.31396484375, "learning_rate": 1.995472452423483e-06, "loss": 40.8125, "step": 6275 }, { "epoch": 0.059408752283677736, "grad_norm": 273.0135803222656, "learning_rate": 1.9954695378635142e-06, "loss": 21.4766, "step": 6276 }, { "epoch": 0.05941821830539279, "grad_norm": 385.3695373535156, "learning_rate": 1.995466622367869e-06, "loss": 20.5117, "step": 6277 }, { "epoch": 0.05942768432710785, "grad_norm": 352.4319152832031, "learning_rate": 1.9954637059365497e-06, "loss": 34.0, "step": 6278 }, { "epoch": 0.0594371503488229, "grad_norm": 633.7155151367188, "learning_rate": 1.995460788569559e-06, "loss": 21.0078, "step": 6279 }, { "epoch": 0.059446616370537955, "grad_norm": 313.2594909667969, "learning_rate": 1.9954578702669006e-06, "loss": 23.2266, "step": 6280 }, { "epoch": 0.05945608239225301, "grad_norm": 324.1105041503906, "learning_rate": 1.9954549510285763e-06, "loss": 24.0, "step": 6281 }, { "epoch": 0.05946554841396806, "grad_norm": 3.2286155223846436, "learning_rate": 1.9954520308545894e-06, "loss": 0.9233, "step": 6282 }, { "epoch": 0.059475014435683114, "grad_norm": 636.1705932617188, "learning_rate": 1.995449109744942e-06, "loss": 62.5625, "step": 6283 }, { "epoch": 0.05948448045739817, "grad_norm": 289.4077453613281, "learning_rate": 1.995446187699637e-06, "loss": 24.0781, "step": 6284 }, { "epoch": 0.05949394647911322, "grad_norm": 258.0048522949219, "learning_rate": 1.9954432647186776e-06, "loss": 18.6484, "step": 6285 }, { "epoch": 0.05950341250082828, "grad_norm": 2.6121153831481934, "learning_rate": 1.9954403408020663e-06, "loss": 0.8503, "step": 6286 }, { "epoch": 0.05951287852254333, "grad_norm": 609.2626953125, "learning_rate": 1.995437415949806e-06, "loss": 67.1172, "step": 6287 }, { "epoch": 0.059522344544258386, "grad_norm": 457.699951171875, "learning_rate": 1.995434490161899e-06, "loss": 43.9531, "step": 6288 }, { "epoch": 0.05953181056597344, "grad_norm": 259.5176086425781, "learning_rate": 1.9954315634383483e-06, "loss": 23.6484, "step": 6289 }, { "epoch": 0.05954127658768849, "grad_norm": 430.31390380859375, "learning_rate": 1.9954286357791566e-06, "loss": 33.5312, "step": 6290 }, { "epoch": 0.059550742609403545, "grad_norm": 423.831787109375, "learning_rate": 1.995425707184327e-06, "loss": 25.9922, "step": 6291 }, { "epoch": 0.0595602086311186, "grad_norm": 154.39988708496094, "learning_rate": 1.9954227776538623e-06, "loss": 21.3828, "step": 6292 }, { "epoch": 0.05956967465283365, "grad_norm": 796.2849731445312, "learning_rate": 1.9954198471877644e-06, "loss": 51.125, "step": 6293 }, { "epoch": 0.059579140674548704, "grad_norm": 414.5036926269531, "learning_rate": 1.995416915786037e-06, "loss": 21.4609, "step": 6294 }, { "epoch": 0.059588606696263764, "grad_norm": 2.8346567153930664, "learning_rate": 1.995413983448682e-06, "loss": 0.8853, "step": 6295 }, { "epoch": 0.05959807271797882, "grad_norm": 263.94537353515625, "learning_rate": 1.9954110501757026e-06, "loss": 22.4062, "step": 6296 }, { "epoch": 0.05960753873969387, "grad_norm": 637.360107421875, "learning_rate": 1.995408115967102e-06, "loss": 57.2344, "step": 6297 }, { "epoch": 0.05961700476140892, "grad_norm": 291.7852783203125, "learning_rate": 1.995405180822882e-06, "loss": 30.6562, "step": 6298 }, { "epoch": 0.059626470783123976, "grad_norm": 494.1409606933594, "learning_rate": 1.9954022447430466e-06, "loss": 44.1172, "step": 6299 }, { "epoch": 0.05963593680483903, "grad_norm": 762.5299072265625, "learning_rate": 1.9953993077275976e-06, "loss": 45.0938, "step": 6300 }, { "epoch": 0.05964540282655408, "grad_norm": 373.4777526855469, "learning_rate": 1.9953963697765376e-06, "loss": 26.1719, "step": 6301 }, { "epoch": 0.059654868848269135, "grad_norm": 316.7348937988281, "learning_rate": 1.99539343088987e-06, "loss": 25.0859, "step": 6302 }, { "epoch": 0.05966433486998419, "grad_norm": 905.0464477539062, "learning_rate": 1.9953904910675974e-06, "loss": 31.2969, "step": 6303 }, { "epoch": 0.05967380089169925, "grad_norm": 444.50921630859375, "learning_rate": 1.9953875503097226e-06, "loss": 25.1719, "step": 6304 }, { "epoch": 0.0596832669134143, "grad_norm": 313.89306640625, "learning_rate": 1.995384608616248e-06, "loss": 23.6719, "step": 6305 }, { "epoch": 0.059692732935129354, "grad_norm": 286.1103210449219, "learning_rate": 1.9953816659871765e-06, "loss": 33.0312, "step": 6306 }, { "epoch": 0.05970219895684441, "grad_norm": 276.51068115234375, "learning_rate": 1.9953787224225116e-06, "loss": 21.3359, "step": 6307 }, { "epoch": 0.05971166497855946, "grad_norm": 3.033092498779297, "learning_rate": 1.995375777922255e-06, "loss": 0.8501, "step": 6308 }, { "epoch": 0.05972113100027451, "grad_norm": 732.479248046875, "learning_rate": 1.99537283248641e-06, "loss": 53.4375, "step": 6309 }, { "epoch": 0.059730597021989566, "grad_norm": 757.01025390625, "learning_rate": 1.9953698861149794e-06, "loss": 50.7188, "step": 6310 }, { "epoch": 0.05974006304370462, "grad_norm": 260.27593994140625, "learning_rate": 1.995366938807966e-06, "loss": 23.9922, "step": 6311 }, { "epoch": 0.05974952906541968, "grad_norm": 271.4865417480469, "learning_rate": 1.995363990565372e-06, "loss": 20.3828, "step": 6312 }, { "epoch": 0.05975899508713473, "grad_norm": 812.9490966796875, "learning_rate": 1.995361041387201e-06, "loss": 19.8438, "step": 6313 }, { "epoch": 0.059768461108849785, "grad_norm": 4.351327896118164, "learning_rate": 1.995358091273455e-06, "loss": 0.8496, "step": 6314 }, { "epoch": 0.05977792713056484, "grad_norm": 485.9508972167969, "learning_rate": 1.995355140224137e-06, "loss": 24.8828, "step": 6315 }, { "epoch": 0.05978739315227989, "grad_norm": 561.2383422851562, "learning_rate": 1.99535218823925e-06, "loss": 42.7578, "step": 6316 }, { "epoch": 0.059796859173994944, "grad_norm": 2.6409003734588623, "learning_rate": 1.995349235318797e-06, "loss": 0.7812, "step": 6317 }, { "epoch": 0.05980632519571, "grad_norm": 297.78875732421875, "learning_rate": 1.9953462814627805e-06, "loss": 27.1328, "step": 6318 }, { "epoch": 0.05981579121742505, "grad_norm": 2.9294204711914062, "learning_rate": 1.9953433266712033e-06, "loss": 1.0103, "step": 6319 }, { "epoch": 0.0598252572391401, "grad_norm": 955.739990234375, "learning_rate": 1.995340370944068e-06, "loss": 20.8828, "step": 6320 }, { "epoch": 0.05983472326085516, "grad_norm": 201.77471923828125, "learning_rate": 1.9953374142813773e-06, "loss": 22.5, "step": 6321 }, { "epoch": 0.059844189282570216, "grad_norm": 819.0091552734375, "learning_rate": 1.9953344566831344e-06, "loss": 38.4766, "step": 6322 }, { "epoch": 0.05985365530428527, "grad_norm": 346.6968994140625, "learning_rate": 1.995331498149342e-06, "loss": 51.3281, "step": 6323 }, { "epoch": 0.05986312132600032, "grad_norm": 222.225830078125, "learning_rate": 1.9953285386800024e-06, "loss": 20.6406, "step": 6324 }, { "epoch": 0.059872587347715375, "grad_norm": 288.9809265136719, "learning_rate": 1.995325578275119e-06, "loss": 22.7578, "step": 6325 }, { "epoch": 0.05988205336943043, "grad_norm": 267.2344665527344, "learning_rate": 1.995322616934694e-06, "loss": 21.0156, "step": 6326 }, { "epoch": 0.05989151939114548, "grad_norm": 336.25592041015625, "learning_rate": 1.995319654658731e-06, "loss": 23.5469, "step": 6327 }, { "epoch": 0.059900985412860534, "grad_norm": 706.9424438476562, "learning_rate": 1.9953166914472316e-06, "loss": 53.2344, "step": 6328 }, { "epoch": 0.059910451434575594, "grad_norm": 174.7903289794922, "learning_rate": 1.9953137273002e-06, "loss": 19.1094, "step": 6329 }, { "epoch": 0.05991991745629065, "grad_norm": 303.8824157714844, "learning_rate": 1.9953107622176375e-06, "loss": 30.5312, "step": 6330 }, { "epoch": 0.0599293834780057, "grad_norm": 305.72406005859375, "learning_rate": 1.9953077961995484e-06, "loss": 25.75, "step": 6331 }, { "epoch": 0.05993884949972075, "grad_norm": 462.84222412109375, "learning_rate": 1.995304829245934e-06, "loss": 20.0508, "step": 6332 }, { "epoch": 0.059948315521435806, "grad_norm": 353.6441650390625, "learning_rate": 1.9953018613567983e-06, "loss": 27.5312, "step": 6333 }, { "epoch": 0.05995778154315086, "grad_norm": 2.6646080017089844, "learning_rate": 1.9952988925321434e-06, "loss": 0.8789, "step": 6334 }, { "epoch": 0.05996724756486591, "grad_norm": 912.95458984375, "learning_rate": 1.9952959227719723e-06, "loss": 93.0859, "step": 6335 }, { "epoch": 0.059976713586580965, "grad_norm": 246.05361938476562, "learning_rate": 1.9952929520762876e-06, "loss": 38.8672, "step": 6336 }, { "epoch": 0.05998617960829602, "grad_norm": 253.16567993164062, "learning_rate": 1.9952899804450928e-06, "loss": 20.0781, "step": 6337 }, { "epoch": 0.05999564563001108, "grad_norm": 399.10382080078125, "learning_rate": 1.99528700787839e-06, "loss": 25.1484, "step": 6338 }, { "epoch": 0.06000511165172613, "grad_norm": 366.8915100097656, "learning_rate": 1.995284034376182e-06, "loss": 19.8633, "step": 6339 }, { "epoch": 0.060014577673441184, "grad_norm": 332.15069580078125, "learning_rate": 1.995281059938471e-06, "loss": 17.7266, "step": 6340 }, { "epoch": 0.06002404369515624, "grad_norm": 1136.94677734375, "learning_rate": 1.995278084565262e-06, "loss": 38.5312, "step": 6341 }, { "epoch": 0.06003350971687129, "grad_norm": 532.398681640625, "learning_rate": 1.995275108256555e-06, "loss": 47.9844, "step": 6342 }, { "epoch": 0.06004297573858634, "grad_norm": 155.5275421142578, "learning_rate": 1.995272131012355e-06, "loss": 24.9375, "step": 6343 }, { "epoch": 0.060052441760301396, "grad_norm": 423.2911682128906, "learning_rate": 1.9952691528326636e-06, "loss": 40.3906, "step": 6344 }, { "epoch": 0.06006190778201645, "grad_norm": 818.2544555664062, "learning_rate": 1.995266173717484e-06, "loss": 28.0547, "step": 6345 }, { "epoch": 0.0600713738037315, "grad_norm": 401.2441711425781, "learning_rate": 1.9952631936668195e-06, "loss": 24.9766, "step": 6346 }, { "epoch": 0.06008083982544656, "grad_norm": 785.6786499023438, "learning_rate": 1.9952602126806717e-06, "loss": 66.3594, "step": 6347 }, { "epoch": 0.060090305847161615, "grad_norm": 839.36767578125, "learning_rate": 1.9952572307590443e-06, "loss": 41.1406, "step": 6348 }, { "epoch": 0.06009977186887667, "grad_norm": 383.2018737792969, "learning_rate": 1.9952542479019392e-06, "loss": 15.168, "step": 6349 }, { "epoch": 0.06010923789059172, "grad_norm": 214.7924346923828, "learning_rate": 1.9952512641093604e-06, "loss": 24.4766, "step": 6350 }, { "epoch": 0.060118703912306774, "grad_norm": 351.66668701171875, "learning_rate": 1.9952482793813104e-06, "loss": 26.0938, "step": 6351 }, { "epoch": 0.06012816993402183, "grad_norm": 542.2760620117188, "learning_rate": 1.9952452937177917e-06, "loss": 48.1328, "step": 6352 }, { "epoch": 0.06013763595573688, "grad_norm": 207.5268096923828, "learning_rate": 1.9952423071188068e-06, "loss": 26.4297, "step": 6353 }, { "epoch": 0.06014710197745193, "grad_norm": 589.1365966796875, "learning_rate": 1.995239319584359e-06, "loss": 50.0938, "step": 6354 }, { "epoch": 0.06015656799916699, "grad_norm": 229.51954650878906, "learning_rate": 1.9952363311144512e-06, "loss": 26.9375, "step": 6355 }, { "epoch": 0.060166034020882046, "grad_norm": 310.8518371582031, "learning_rate": 1.995233341709086e-06, "loss": 15.6406, "step": 6356 }, { "epoch": 0.0601755000425971, "grad_norm": 673.6285400390625, "learning_rate": 1.995230351368266e-06, "loss": 50.8516, "step": 6357 }, { "epoch": 0.06018496606431215, "grad_norm": 574.564208984375, "learning_rate": 1.9952273600919942e-06, "loss": 48.4805, "step": 6358 }, { "epoch": 0.060194432086027205, "grad_norm": 200.95123291015625, "learning_rate": 1.9952243678802734e-06, "loss": 16.8672, "step": 6359 }, { "epoch": 0.06020389810774226, "grad_norm": 147.7655029296875, "learning_rate": 1.9952213747331064e-06, "loss": 23.8594, "step": 6360 }, { "epoch": 0.06021336412945731, "grad_norm": 536.0313110351562, "learning_rate": 1.9952183806504965e-06, "loss": 44.8984, "step": 6361 }, { "epoch": 0.060222830151172364, "grad_norm": 389.5287780761719, "learning_rate": 1.995215385632446e-06, "loss": 20.1641, "step": 6362 }, { "epoch": 0.06023229617288742, "grad_norm": 191.95663452148438, "learning_rate": 1.9952123896789574e-06, "loss": 23.2031, "step": 6363 }, { "epoch": 0.06024176219460248, "grad_norm": 202.5122528076172, "learning_rate": 1.9952093927900337e-06, "loss": 26.8516, "step": 6364 }, { "epoch": 0.06025122821631753, "grad_norm": 317.7578125, "learning_rate": 1.995206394965678e-06, "loss": 22.6406, "step": 6365 }, { "epoch": 0.06026069423803258, "grad_norm": 414.0502624511719, "learning_rate": 1.995203396205894e-06, "loss": 31.375, "step": 6366 }, { "epoch": 0.060270160259747636, "grad_norm": 421.12939453125, "learning_rate": 1.9952003965106827e-06, "loss": 43.3906, "step": 6367 }, { "epoch": 0.06027962628146269, "grad_norm": 239.49139404296875, "learning_rate": 1.9951973958800477e-06, "loss": 22.5469, "step": 6368 }, { "epoch": 0.06028909230317774, "grad_norm": 213.24456787109375, "learning_rate": 1.9951943943139923e-06, "loss": 23.2344, "step": 6369 }, { "epoch": 0.060298558324892795, "grad_norm": 614.9152221679688, "learning_rate": 1.9951913918125187e-06, "loss": 44.2969, "step": 6370 }, { "epoch": 0.06030802434660785, "grad_norm": 614.640869140625, "learning_rate": 1.9951883883756297e-06, "loss": 63.1562, "step": 6371 }, { "epoch": 0.06031749036832291, "grad_norm": 327.3179016113281, "learning_rate": 1.9951853840033287e-06, "loss": 17.6797, "step": 6372 }, { "epoch": 0.06032695639003796, "grad_norm": 299.9538879394531, "learning_rate": 1.995182378695618e-06, "loss": 25.8594, "step": 6373 }, { "epoch": 0.060336422411753014, "grad_norm": 484.2112121582031, "learning_rate": 1.9951793724525004e-06, "loss": 57.9531, "step": 6374 }, { "epoch": 0.06034588843346807, "grad_norm": 447.0586242675781, "learning_rate": 1.9951763652739793e-06, "loss": 54.5781, "step": 6375 }, { "epoch": 0.06035535445518312, "grad_norm": 1075.6336669921875, "learning_rate": 1.9951733571600572e-06, "loss": 47.2031, "step": 6376 }, { "epoch": 0.06036482047689817, "grad_norm": 189.13645935058594, "learning_rate": 1.995170348110737e-06, "loss": 27.0781, "step": 6377 }, { "epoch": 0.060374286498613226, "grad_norm": 264.81591796875, "learning_rate": 1.9951673381260206e-06, "loss": 27.9297, "step": 6378 }, { "epoch": 0.06038375252032828, "grad_norm": 1292.2562255859375, "learning_rate": 1.9951643272059123e-06, "loss": 36.6875, "step": 6379 }, { "epoch": 0.06039321854204333, "grad_norm": 433.38092041015625, "learning_rate": 1.9951613153504145e-06, "loss": 40.2188, "step": 6380 }, { "epoch": 0.06040268456375839, "grad_norm": 190.37326049804688, "learning_rate": 1.9951583025595293e-06, "loss": 29.6562, "step": 6381 }, { "epoch": 0.060412150585473445, "grad_norm": 593.2374877929688, "learning_rate": 1.99515528883326e-06, "loss": 38.1328, "step": 6382 }, { "epoch": 0.0604216166071885, "grad_norm": 261.79833984375, "learning_rate": 1.99515227417161e-06, "loss": 23.3438, "step": 6383 }, { "epoch": 0.06043108262890355, "grad_norm": 391.11749267578125, "learning_rate": 1.9951492585745813e-06, "loss": 30.5391, "step": 6384 }, { "epoch": 0.060440548650618604, "grad_norm": 309.815185546875, "learning_rate": 1.995146242042177e-06, "loss": 29.7031, "step": 6385 }, { "epoch": 0.06045001467233366, "grad_norm": 1234.6427001953125, "learning_rate": 1.9951432245743996e-06, "loss": 31.4141, "step": 6386 }, { "epoch": 0.06045948069404871, "grad_norm": 614.6873779296875, "learning_rate": 1.995140206171253e-06, "loss": 47.1328, "step": 6387 }, { "epoch": 0.06046894671576376, "grad_norm": 353.2185363769531, "learning_rate": 1.995137186832739e-06, "loss": 20.8828, "step": 6388 }, { "epoch": 0.060478412737478816, "grad_norm": 2.6680829524993896, "learning_rate": 1.9951341665588607e-06, "loss": 0.9287, "step": 6389 }, { "epoch": 0.060487878759193876, "grad_norm": 392.48675537109375, "learning_rate": 1.9951311453496213e-06, "loss": 50.7188, "step": 6390 }, { "epoch": 0.06049734478090893, "grad_norm": 313.3567810058594, "learning_rate": 1.995128123205023e-06, "loss": 21.3906, "step": 6391 }, { "epoch": 0.06050681080262398, "grad_norm": 310.4812316894531, "learning_rate": 1.9951251001250692e-06, "loss": 20.8359, "step": 6392 }, { "epoch": 0.060516276824339035, "grad_norm": 3.3456785678863525, "learning_rate": 1.9951220761097626e-06, "loss": 1.0337, "step": 6393 }, { "epoch": 0.06052574284605409, "grad_norm": 215.39596557617188, "learning_rate": 1.995119051159106e-06, "loss": 23.2969, "step": 6394 }, { "epoch": 0.06053520886776914, "grad_norm": 659.859375, "learning_rate": 1.9951160252731022e-06, "loss": 59.0156, "step": 6395 }, { "epoch": 0.060544674889484194, "grad_norm": 408.6947326660156, "learning_rate": 1.9951129984517537e-06, "loss": 50.2188, "step": 6396 }, { "epoch": 0.06055414091119925, "grad_norm": 313.92816162109375, "learning_rate": 1.9951099706950644e-06, "loss": 26.2344, "step": 6397 }, { "epoch": 0.06056360693291431, "grad_norm": 702.7360229492188, "learning_rate": 1.995106942003036e-06, "loss": 32.8672, "step": 6398 }, { "epoch": 0.06057307295462936, "grad_norm": 3.30771803855896, "learning_rate": 1.9951039123756718e-06, "loss": 0.9116, "step": 6399 }, { "epoch": 0.06058253897634441, "grad_norm": 251.19798278808594, "learning_rate": 1.995100881812975e-06, "loss": 19.3359, "step": 6400 }, { "epoch": 0.060592004998059466, "grad_norm": 383.645263671875, "learning_rate": 1.9950978503149474e-06, "loss": 29.4062, "step": 6401 }, { "epoch": 0.06060147101977452, "grad_norm": 743.726318359375, "learning_rate": 1.9950948178815935e-06, "loss": 30.5273, "step": 6402 }, { "epoch": 0.06061093704148957, "grad_norm": 586.4777221679688, "learning_rate": 1.9950917845129145e-06, "loss": 56.1406, "step": 6403 }, { "epoch": 0.060620403063204625, "grad_norm": 387.6602783203125, "learning_rate": 1.9950887502089146e-06, "loss": 20.9844, "step": 6404 }, { "epoch": 0.06062986908491968, "grad_norm": 384.4565124511719, "learning_rate": 1.9950857149695955e-06, "loss": 42.2812, "step": 6405 }, { "epoch": 0.06063933510663473, "grad_norm": 314.5341796875, "learning_rate": 1.9950826787949606e-06, "loss": 41.7969, "step": 6406 }, { "epoch": 0.06064880112834979, "grad_norm": 219.42608642578125, "learning_rate": 1.995079641685013e-06, "loss": 19.9219, "step": 6407 }, { "epoch": 0.060658267150064844, "grad_norm": 381.53900146484375, "learning_rate": 1.9950766036397548e-06, "loss": 35.8438, "step": 6408 }, { "epoch": 0.0606677331717799, "grad_norm": 221.1123809814453, "learning_rate": 1.99507356465919e-06, "loss": 19.8203, "step": 6409 }, { "epoch": 0.06067719919349495, "grad_norm": 133.74465942382812, "learning_rate": 1.99507052474332e-06, "loss": 20.6562, "step": 6410 }, { "epoch": 0.06068666521521, "grad_norm": 261.4531555175781, "learning_rate": 1.9950674838921485e-06, "loss": 21.0156, "step": 6411 }, { "epoch": 0.060696131236925056, "grad_norm": 3.1523284912109375, "learning_rate": 1.995064442105679e-06, "loss": 0.8457, "step": 6412 }, { "epoch": 0.06070559725864011, "grad_norm": 444.3761291503906, "learning_rate": 1.995061399383913e-06, "loss": 27.6328, "step": 6413 }, { "epoch": 0.06071506328035516, "grad_norm": 180.69541931152344, "learning_rate": 1.995058355726854e-06, "loss": 20.0547, "step": 6414 }, { "epoch": 0.06072452930207022, "grad_norm": 250.25909423828125, "learning_rate": 1.9950553111345054e-06, "loss": 27.1484, "step": 6415 }, { "epoch": 0.060733995323785275, "grad_norm": 669.0018310546875, "learning_rate": 1.9950522656068696e-06, "loss": 54.3984, "step": 6416 }, { "epoch": 0.06074346134550033, "grad_norm": 311.34735107421875, "learning_rate": 1.995049219143949e-06, "loss": 24.875, "step": 6417 }, { "epoch": 0.06075292736721538, "grad_norm": 381.7377014160156, "learning_rate": 1.995046171745747e-06, "loss": 27.3438, "step": 6418 }, { "epoch": 0.060762393388930434, "grad_norm": 457.1264953613281, "learning_rate": 1.9950431234122664e-06, "loss": 40.25, "step": 6419 }, { "epoch": 0.06077185941064549, "grad_norm": 300.4454345703125, "learning_rate": 1.9950400741435097e-06, "loss": 20.0703, "step": 6420 }, { "epoch": 0.06078132543236054, "grad_norm": 697.9312133789062, "learning_rate": 1.9950370239394803e-06, "loss": 24.4844, "step": 6421 }, { "epoch": 0.06079079145407559, "grad_norm": 381.72711181640625, "learning_rate": 1.995033972800181e-06, "loss": 21.7812, "step": 6422 }, { "epoch": 0.060800257475790646, "grad_norm": 192.69778442382812, "learning_rate": 1.995030920725614e-06, "loss": 23.125, "step": 6423 }, { "epoch": 0.060809723497505706, "grad_norm": 681.3165283203125, "learning_rate": 1.995027867715783e-06, "loss": 80.25, "step": 6424 }, { "epoch": 0.06081918951922076, "grad_norm": 498.2503967285156, "learning_rate": 1.995024813770691e-06, "loss": 44.4844, "step": 6425 }, { "epoch": 0.06082865554093581, "grad_norm": 319.45953369140625, "learning_rate": 1.995021758890339e-06, "loss": 20.5, "step": 6426 }, { "epoch": 0.060838121562650865, "grad_norm": 393.1156311035156, "learning_rate": 1.9950187030747326e-06, "loss": 30.1328, "step": 6427 }, { "epoch": 0.06084758758436592, "grad_norm": 230.73439025878906, "learning_rate": 1.995015646323873e-06, "loss": 23.4219, "step": 6428 }, { "epoch": 0.06085705360608097, "grad_norm": 455.84832763671875, "learning_rate": 1.995012588637763e-06, "loss": 26.6094, "step": 6429 }, { "epoch": 0.060866519627796024, "grad_norm": 373.9718017578125, "learning_rate": 1.9950095300164066e-06, "loss": 25.0078, "step": 6430 }, { "epoch": 0.06087598564951108, "grad_norm": 502.9765625, "learning_rate": 1.9950064704598054e-06, "loss": 21.1016, "step": 6431 }, { "epoch": 0.06088545167122613, "grad_norm": 819.8850708007812, "learning_rate": 1.995003409967963e-06, "loss": 60.4922, "step": 6432 }, { "epoch": 0.06089491769294119, "grad_norm": 3.2293081283569336, "learning_rate": 1.9950003485408826e-06, "loss": 0.8218, "step": 6433 }, { "epoch": 0.06090438371465624, "grad_norm": 536.1895141601562, "learning_rate": 1.9949972861785656e-06, "loss": 26.5938, "step": 6434 }, { "epoch": 0.060913849736371296, "grad_norm": 666.0015258789062, "learning_rate": 1.9949942228810167e-06, "loss": 27.0938, "step": 6435 }, { "epoch": 0.06092331575808635, "grad_norm": 498.92669677734375, "learning_rate": 1.9949911586482376e-06, "loss": 39.25, "step": 6436 }, { "epoch": 0.0609327817798014, "grad_norm": 157.0077667236328, "learning_rate": 1.994988093480232e-06, "loss": 18.0391, "step": 6437 }, { "epoch": 0.060942247801516455, "grad_norm": 354.0731201171875, "learning_rate": 1.9949850273770016e-06, "loss": 27.9062, "step": 6438 }, { "epoch": 0.06095171382323151, "grad_norm": 3.666430950164795, "learning_rate": 1.9949819603385503e-06, "loss": 0.7993, "step": 6439 }, { "epoch": 0.06096117984494656, "grad_norm": 401.12969970703125, "learning_rate": 1.994978892364881e-06, "loss": 47.5938, "step": 6440 }, { "epoch": 0.06097064586666162, "grad_norm": 897.57421875, "learning_rate": 1.994975823455996e-06, "loss": 26.1406, "step": 6441 }, { "epoch": 0.060980111888376674, "grad_norm": 226.2285919189453, "learning_rate": 1.994972753611899e-06, "loss": 25.1484, "step": 6442 }, { "epoch": 0.06098957791009173, "grad_norm": 190.5459747314453, "learning_rate": 1.9949696828325913e-06, "loss": 22.0312, "step": 6443 }, { "epoch": 0.06099904393180678, "grad_norm": 1363.632080078125, "learning_rate": 1.9949666111180777e-06, "loss": 77.1641, "step": 6444 }, { "epoch": 0.06100850995352183, "grad_norm": 359.1639709472656, "learning_rate": 1.9949635384683597e-06, "loss": 23.6875, "step": 6445 }, { "epoch": 0.061017975975236886, "grad_norm": 797.7765502929688, "learning_rate": 1.9949604648834407e-06, "loss": 44.3516, "step": 6446 }, { "epoch": 0.06102744199695194, "grad_norm": 308.7940368652344, "learning_rate": 1.994957390363324e-06, "loss": 37.3281, "step": 6447 }, { "epoch": 0.06103690801866699, "grad_norm": 918.5250854492188, "learning_rate": 1.9949543149080117e-06, "loss": 69.1641, "step": 6448 }, { "epoch": 0.061046374040382045, "grad_norm": 222.4664306640625, "learning_rate": 1.9949512385175074e-06, "loss": 26.6719, "step": 6449 }, { "epoch": 0.061055840062097105, "grad_norm": 189.8463592529297, "learning_rate": 1.9949481611918134e-06, "loss": 20.6562, "step": 6450 }, { "epoch": 0.06106530608381216, "grad_norm": 332.1270751953125, "learning_rate": 1.994945082930933e-06, "loss": 37.3594, "step": 6451 }, { "epoch": 0.06107477210552721, "grad_norm": 566.46337890625, "learning_rate": 1.9949420037348686e-06, "loss": 42.0391, "step": 6452 }, { "epoch": 0.061084238127242264, "grad_norm": 721.6871948242188, "learning_rate": 1.9949389236036236e-06, "loss": 28.5625, "step": 6453 }, { "epoch": 0.06109370414895732, "grad_norm": 353.58795166015625, "learning_rate": 1.994935842537201e-06, "loss": 26.9219, "step": 6454 }, { "epoch": 0.06110317017067237, "grad_norm": 257.77740478515625, "learning_rate": 1.994932760535603e-06, "loss": 23.7188, "step": 6455 }, { "epoch": 0.06111263619238742, "grad_norm": 286.45233154296875, "learning_rate": 1.9949296775988334e-06, "loss": 21.7344, "step": 6456 }, { "epoch": 0.061122102214102476, "grad_norm": 606.9606323242188, "learning_rate": 1.994926593726895e-06, "loss": 45.7188, "step": 6457 }, { "epoch": 0.061131568235817536, "grad_norm": 721.2406616210938, "learning_rate": 1.994923508919789e-06, "loss": 10.5664, "step": 6458 }, { "epoch": 0.06114103425753259, "grad_norm": 500.7311096191406, "learning_rate": 1.9949204231775206e-06, "loss": 31.4531, "step": 6459 }, { "epoch": 0.06115050027924764, "grad_norm": 1330.616943359375, "learning_rate": 1.9949173365000916e-06, "loss": 57.9219, "step": 6460 }, { "epoch": 0.061159966300962695, "grad_norm": 559.162841796875, "learning_rate": 1.994914248887505e-06, "loss": 48.0938, "step": 6461 }, { "epoch": 0.06116943232267775, "grad_norm": 1057.144775390625, "learning_rate": 1.9949111603397635e-06, "loss": 51.8438, "step": 6462 }, { "epoch": 0.0611788983443928, "grad_norm": 205.02252197265625, "learning_rate": 1.994908070856871e-06, "loss": 24.2188, "step": 6463 }, { "epoch": 0.061188364366107854, "grad_norm": 434.7136535644531, "learning_rate": 1.9949049804388287e-06, "loss": 60.2812, "step": 6464 }, { "epoch": 0.06119783038782291, "grad_norm": 814.7069091796875, "learning_rate": 1.9949018890856407e-06, "loss": 39.9766, "step": 6465 }, { "epoch": 0.06120729640953796, "grad_norm": 3.391071081161499, "learning_rate": 1.99489879679731e-06, "loss": 0.9766, "step": 6466 }, { "epoch": 0.06121676243125302, "grad_norm": 546.2738647460938, "learning_rate": 1.994895703573839e-06, "loss": 45.4062, "step": 6467 }, { "epoch": 0.06122622845296807, "grad_norm": 518.6997680664062, "learning_rate": 1.9948926094152303e-06, "loss": 56.8086, "step": 6468 }, { "epoch": 0.061235694474683126, "grad_norm": 1354.518310546875, "learning_rate": 1.9948895143214876e-06, "loss": 48.5781, "step": 6469 }, { "epoch": 0.06124516049639818, "grad_norm": 278.5951843261719, "learning_rate": 1.9948864182926137e-06, "loss": 8.3828, "step": 6470 }, { "epoch": 0.06125462651811323, "grad_norm": 222.98486328125, "learning_rate": 1.9948833213286108e-06, "loss": 23.4219, "step": 6471 }, { "epoch": 0.061264092539828285, "grad_norm": 3.3132994174957275, "learning_rate": 1.9948802234294827e-06, "loss": 0.9746, "step": 6472 }, { "epoch": 0.06127355856154334, "grad_norm": 345.7066955566406, "learning_rate": 1.994877124595232e-06, "loss": 24.0391, "step": 6473 }, { "epoch": 0.06128302458325839, "grad_norm": 339.7080993652344, "learning_rate": 1.994874024825861e-06, "loss": 40.25, "step": 6474 }, { "epoch": 0.061292490604973444, "grad_norm": 658.904052734375, "learning_rate": 1.9948709241213736e-06, "loss": 53.4062, "step": 6475 }, { "epoch": 0.061301956626688504, "grad_norm": 273.4133605957031, "learning_rate": 1.994867822481772e-06, "loss": 28.0078, "step": 6476 }, { "epoch": 0.06131142264840356, "grad_norm": 437.7381286621094, "learning_rate": 1.9948647199070594e-06, "loss": 25.7969, "step": 6477 }, { "epoch": 0.06132088867011861, "grad_norm": 833.6455078125, "learning_rate": 1.994861616397239e-06, "loss": 57.1719, "step": 6478 }, { "epoch": 0.06133035469183366, "grad_norm": 376.00579833984375, "learning_rate": 1.9948585119523133e-06, "loss": 22.5469, "step": 6479 }, { "epoch": 0.061339820713548716, "grad_norm": 3.658550500869751, "learning_rate": 1.994855406572285e-06, "loss": 0.8784, "step": 6480 }, { "epoch": 0.06134928673526377, "grad_norm": 503.05621337890625, "learning_rate": 1.9948523002571574e-06, "loss": 28.4531, "step": 6481 }, { "epoch": 0.06135875275697882, "grad_norm": 603.5861206054688, "learning_rate": 1.9948491930069334e-06, "loss": 43.1875, "step": 6482 }, { "epoch": 0.061368218778693875, "grad_norm": 302.04583740234375, "learning_rate": 1.9948460848216163e-06, "loss": 38.0469, "step": 6483 }, { "epoch": 0.061377684800408935, "grad_norm": 297.0077209472656, "learning_rate": 1.994842975701208e-06, "loss": 31.3906, "step": 6484 }, { "epoch": 0.06138715082212399, "grad_norm": 413.8563537597656, "learning_rate": 1.9948398656457123e-06, "loss": 27.8438, "step": 6485 }, { "epoch": 0.06139661684383904, "grad_norm": 253.59701538085938, "learning_rate": 1.9948367546551318e-06, "loss": 24.0156, "step": 6486 }, { "epoch": 0.061406082865554094, "grad_norm": 160.904052734375, "learning_rate": 1.9948336427294692e-06, "loss": 18.6719, "step": 6487 }, { "epoch": 0.06141554888726915, "grad_norm": 595.7402954101562, "learning_rate": 1.994830529868728e-06, "loss": 40.7109, "step": 6488 }, { "epoch": 0.0614250149089842, "grad_norm": 3.0041253566741943, "learning_rate": 1.994827416072911e-06, "loss": 0.9175, "step": 6489 }, { "epoch": 0.06143448093069925, "grad_norm": 936.2440795898438, "learning_rate": 1.994824301342021e-06, "loss": 46.6797, "step": 6490 }, { "epoch": 0.061443946952414306, "grad_norm": 244.76747131347656, "learning_rate": 1.9948211856760602e-06, "loss": 23.7188, "step": 6491 }, { "epoch": 0.06145341297412936, "grad_norm": 221.97462463378906, "learning_rate": 1.994818069075033e-06, "loss": 21.25, "step": 6492 }, { "epoch": 0.06146287899584442, "grad_norm": 392.2120361328125, "learning_rate": 1.994814951538941e-06, "loss": 10.5039, "step": 6493 }, { "epoch": 0.06147234501755947, "grad_norm": 3.0751254558563232, "learning_rate": 1.994811833067788e-06, "loss": 0.8506, "step": 6494 }, { "epoch": 0.061481811039274525, "grad_norm": 772.3035888671875, "learning_rate": 1.9948087136615765e-06, "loss": 43.9219, "step": 6495 }, { "epoch": 0.06149127706098958, "grad_norm": 569.3221435546875, "learning_rate": 1.994805593320309e-06, "loss": 56.0312, "step": 6496 }, { "epoch": 0.06150074308270463, "grad_norm": 309.8354797363281, "learning_rate": 1.9948024720439894e-06, "loss": 35.625, "step": 6497 }, { "epoch": 0.061510209104419684, "grad_norm": 689.0972900390625, "learning_rate": 1.9947993498326203e-06, "loss": 26.0234, "step": 6498 }, { "epoch": 0.06151967512613474, "grad_norm": 404.84326171875, "learning_rate": 1.9947962266862043e-06, "loss": 21.4922, "step": 6499 }, { "epoch": 0.06152914114784979, "grad_norm": 369.83660888671875, "learning_rate": 1.994793102604745e-06, "loss": 28.9922, "step": 6500 }, { "epoch": 0.06153860716956485, "grad_norm": 213.2903289794922, "learning_rate": 1.9947899775882444e-06, "loss": 23.5156, "step": 6501 }, { "epoch": 0.0615480731912799, "grad_norm": 357.76177978515625, "learning_rate": 1.994786851636706e-06, "loss": 48.6094, "step": 6502 }, { "epoch": 0.061557539212994956, "grad_norm": 346.45849609375, "learning_rate": 1.994783724750133e-06, "loss": 39.8594, "step": 6503 }, { "epoch": 0.06156700523471001, "grad_norm": 283.2763366699219, "learning_rate": 1.994780596928528e-06, "loss": 23.2734, "step": 6504 }, { "epoch": 0.06157647125642506, "grad_norm": 548.2725219726562, "learning_rate": 1.9947774681718934e-06, "loss": 14.082, "step": 6505 }, { "epoch": 0.061585937278140115, "grad_norm": 263.8515625, "learning_rate": 1.994774338480233e-06, "loss": 24.4531, "step": 6506 }, { "epoch": 0.06159540329985517, "grad_norm": 3.179226875305176, "learning_rate": 1.9947712078535494e-06, "loss": 0.8589, "step": 6507 }, { "epoch": 0.06160486932157022, "grad_norm": 638.1685791015625, "learning_rate": 1.994768076291846e-06, "loss": 55.9609, "step": 6508 }, { "epoch": 0.061614335343285274, "grad_norm": 285.19183349609375, "learning_rate": 1.994764943795125e-06, "loss": 27.7344, "step": 6509 }, { "epoch": 0.061623801365000334, "grad_norm": 252.89337158203125, "learning_rate": 1.9947618103633895e-06, "loss": 23.9453, "step": 6510 }, { "epoch": 0.06163326738671539, "grad_norm": 268.19171142578125, "learning_rate": 1.9947586759966427e-06, "loss": 25.625, "step": 6511 }, { "epoch": 0.06164273340843044, "grad_norm": 263.1337890625, "learning_rate": 1.9947555406948876e-06, "loss": 21.2891, "step": 6512 }, { "epoch": 0.06165219943014549, "grad_norm": 182.66665649414062, "learning_rate": 1.994752404458127e-06, "loss": 18.625, "step": 6513 }, { "epoch": 0.061661665451860546, "grad_norm": 166.4856414794922, "learning_rate": 1.994749267286364e-06, "loss": 18.1172, "step": 6514 }, { "epoch": 0.0616711314735756, "grad_norm": 506.74273681640625, "learning_rate": 1.9947461291796007e-06, "loss": 44.0625, "step": 6515 }, { "epoch": 0.06168059749529065, "grad_norm": 563.1497802734375, "learning_rate": 1.9947429901378415e-06, "loss": 57.6562, "step": 6516 }, { "epoch": 0.061690063517005705, "grad_norm": 486.1155700683594, "learning_rate": 1.994739850161088e-06, "loss": 22.5938, "step": 6517 }, { "epoch": 0.06169952953872076, "grad_norm": 299.6109924316406, "learning_rate": 1.9947367092493443e-06, "loss": 30.7969, "step": 6518 }, { "epoch": 0.06170899556043582, "grad_norm": 522.7945556640625, "learning_rate": 1.9947335674026126e-06, "loss": 50.7422, "step": 6519 }, { "epoch": 0.06171846158215087, "grad_norm": 269.0880432128906, "learning_rate": 1.9947304246208963e-06, "loss": 22.5781, "step": 6520 }, { "epoch": 0.061727927603865924, "grad_norm": 249.52621459960938, "learning_rate": 1.994727280904198e-06, "loss": 24.7031, "step": 6521 }, { "epoch": 0.06173739362558098, "grad_norm": 459.7331848144531, "learning_rate": 1.9947241362525205e-06, "loss": 26.3516, "step": 6522 }, { "epoch": 0.06174685964729603, "grad_norm": 4.190451145172119, "learning_rate": 1.9947209906658673e-06, "loss": 1.0054, "step": 6523 }, { "epoch": 0.06175632566901108, "grad_norm": 271.57879638671875, "learning_rate": 1.994717844144241e-06, "loss": 26.9766, "step": 6524 }, { "epoch": 0.061765791690726136, "grad_norm": 298.4772644042969, "learning_rate": 1.994714696687644e-06, "loss": 15.2656, "step": 6525 }, { "epoch": 0.06177525771244119, "grad_norm": 323.3160095214844, "learning_rate": 1.9947115482960805e-06, "loss": 35.7422, "step": 6526 }, { "epoch": 0.06178472373415625, "grad_norm": 2.8903868198394775, "learning_rate": 1.994708398969553e-06, "loss": 0.8906, "step": 6527 }, { "epoch": 0.0617941897558713, "grad_norm": 2.8263099193573, "learning_rate": 1.994705248708064e-06, "loss": 0.8003, "step": 6528 }, { "epoch": 0.061803655777586355, "grad_norm": 200.31028747558594, "learning_rate": 1.9947020975116172e-06, "loss": 19.7109, "step": 6529 }, { "epoch": 0.06181312179930141, "grad_norm": 303.52020263671875, "learning_rate": 1.9946989453802145e-06, "loss": 15.3047, "step": 6530 }, { "epoch": 0.06182258782101646, "grad_norm": 295.1498718261719, "learning_rate": 1.9946957923138597e-06, "loss": 21.7188, "step": 6531 }, { "epoch": 0.061832053842731514, "grad_norm": 267.7174377441406, "learning_rate": 1.994692638312556e-06, "loss": 35.3906, "step": 6532 }, { "epoch": 0.06184151986444657, "grad_norm": 318.5130615234375, "learning_rate": 1.9946894833763056e-06, "loss": 29.1094, "step": 6533 }, { "epoch": 0.06185098588616162, "grad_norm": 332.28472900390625, "learning_rate": 1.994686327505112e-06, "loss": 31.7344, "step": 6534 }, { "epoch": 0.061860451907876673, "grad_norm": 333.67144775390625, "learning_rate": 1.994683170698978e-06, "loss": 29.3594, "step": 6535 }, { "epoch": 0.06186991792959173, "grad_norm": 371.3656311035156, "learning_rate": 1.994680012957906e-06, "loss": 24.8516, "step": 6536 }, { "epoch": 0.061879383951306786, "grad_norm": 205.0545654296875, "learning_rate": 1.9946768542819e-06, "loss": 18.2578, "step": 6537 }, { "epoch": 0.06188884997302184, "grad_norm": 256.2228698730469, "learning_rate": 1.994673694670962e-06, "loss": 20.2188, "step": 6538 }, { "epoch": 0.06189831599473689, "grad_norm": 317.9997863769531, "learning_rate": 1.994670534125096e-06, "loss": 27.3672, "step": 6539 }, { "epoch": 0.061907782016451945, "grad_norm": 168.1892547607422, "learning_rate": 1.994667372644304e-06, "loss": 21.9375, "step": 6540 }, { "epoch": 0.061917248038167, "grad_norm": 530.1842041015625, "learning_rate": 1.9946642102285898e-06, "loss": 32.6953, "step": 6541 }, { "epoch": 0.06192671405988205, "grad_norm": 567.0525512695312, "learning_rate": 1.9946610468779552e-06, "loss": 41.6641, "step": 6542 }, { "epoch": 0.061936180081597104, "grad_norm": 321.6238098144531, "learning_rate": 1.9946578825924047e-06, "loss": 22.5703, "step": 6543 }, { "epoch": 0.061945646103312164, "grad_norm": 468.16094970703125, "learning_rate": 1.9946547173719403e-06, "loss": 49.125, "step": 6544 }, { "epoch": 0.06195511212502722, "grad_norm": 509.3931884765625, "learning_rate": 1.994651551216565e-06, "loss": 32.4141, "step": 6545 }, { "epoch": 0.06196457814674227, "grad_norm": 244.98410034179688, "learning_rate": 1.9946483841262817e-06, "loss": 19.2266, "step": 6546 }, { "epoch": 0.06197404416845732, "grad_norm": 448.8028869628906, "learning_rate": 1.994645216101094e-06, "loss": 39.0938, "step": 6547 }, { "epoch": 0.061983510190172376, "grad_norm": 401.77398681640625, "learning_rate": 1.9946420471410044e-06, "loss": 40.5938, "step": 6548 }, { "epoch": 0.06199297621188743, "grad_norm": 222.12440490722656, "learning_rate": 1.994638877246016e-06, "loss": 25.2891, "step": 6549 }, { "epoch": 0.06200244223360248, "grad_norm": 262.3053894042969, "learning_rate": 1.9946357064161317e-06, "loss": 22.4805, "step": 6550 }, { "epoch": 0.062011908255317535, "grad_norm": 551.2672119140625, "learning_rate": 1.9946325346513548e-06, "loss": 44.5625, "step": 6551 }, { "epoch": 0.06202137427703259, "grad_norm": 421.7435302734375, "learning_rate": 1.9946293619516877e-06, "loss": 34.8906, "step": 6552 }, { "epoch": 0.06203084029874765, "grad_norm": 299.5782470703125, "learning_rate": 1.994626188317134e-06, "loss": 37.4844, "step": 6553 }, { "epoch": 0.0620403063204627, "grad_norm": 303.88323974609375, "learning_rate": 1.994623013747696e-06, "loss": 18.5547, "step": 6554 }, { "epoch": 0.062049772342177754, "grad_norm": 488.7344055175781, "learning_rate": 1.9946198382433772e-06, "loss": 24.7969, "step": 6555 }, { "epoch": 0.06205923836389281, "grad_norm": 338.234375, "learning_rate": 1.994616661804181e-06, "loss": 43.9375, "step": 6556 }, { "epoch": 0.06206870438560786, "grad_norm": 248.6620635986328, "learning_rate": 1.994613484430109e-06, "loss": 20.9141, "step": 6557 }, { "epoch": 0.062078170407322913, "grad_norm": 303.9061279296875, "learning_rate": 1.9946103061211654e-06, "loss": 25.3438, "step": 6558 }, { "epoch": 0.062087636429037966, "grad_norm": 214.03416442871094, "learning_rate": 1.994607126877353e-06, "loss": 25.9766, "step": 6559 }, { "epoch": 0.06209710245075302, "grad_norm": 429.9839782714844, "learning_rate": 1.9946039466986743e-06, "loss": 36.0156, "step": 6560 }, { "epoch": 0.06210656847246807, "grad_norm": 183.0668182373047, "learning_rate": 1.994600765585133e-06, "loss": 23.9766, "step": 6561 }, { "epoch": 0.06211603449418313, "grad_norm": 2.3982865810394287, "learning_rate": 1.994597583536731e-06, "loss": 0.7388, "step": 6562 }, { "epoch": 0.062125500515898185, "grad_norm": 602.2152709960938, "learning_rate": 1.994594400553473e-06, "loss": 62.7812, "step": 6563 }, { "epoch": 0.06213496653761324, "grad_norm": 499.3941650390625, "learning_rate": 1.99459121663536e-06, "loss": 60.4219, "step": 6564 }, { "epoch": 0.06214443255932829, "grad_norm": 237.35263061523438, "learning_rate": 1.9945880317823965e-06, "loss": 22.875, "step": 6565 }, { "epoch": 0.062153898581043344, "grad_norm": 237.043701171875, "learning_rate": 1.9945848459945847e-06, "loss": 17.6016, "step": 6566 }, { "epoch": 0.0621633646027584, "grad_norm": 581.7696533203125, "learning_rate": 1.9945816592719276e-06, "loss": 24.9688, "step": 6567 }, { "epoch": 0.06217283062447345, "grad_norm": 347.5903015136719, "learning_rate": 1.994578471614429e-06, "loss": 19.332, "step": 6568 }, { "epoch": 0.062182296646188503, "grad_norm": 302.53082275390625, "learning_rate": 1.994575283022091e-06, "loss": 31.3125, "step": 6569 }, { "epoch": 0.06219176266790356, "grad_norm": 444.3642883300781, "learning_rate": 1.9945720934949173e-06, "loss": 22.8828, "step": 6570 }, { "epoch": 0.062201228689618616, "grad_norm": 181.65528869628906, "learning_rate": 1.99456890303291e-06, "loss": 20.3828, "step": 6571 }, { "epoch": 0.06221069471133367, "grad_norm": 327.5412902832031, "learning_rate": 1.994565711636073e-06, "loss": 21.9219, "step": 6572 }, { "epoch": 0.06222016073304872, "grad_norm": 366.6715087890625, "learning_rate": 1.994562519304409e-06, "loss": 54.2969, "step": 6573 }, { "epoch": 0.062229626754763775, "grad_norm": 311.2423095703125, "learning_rate": 1.9945593260379204e-06, "loss": 27.25, "step": 6574 }, { "epoch": 0.06223909277647883, "grad_norm": 232.86087036132812, "learning_rate": 1.994556131836611e-06, "loss": 20.6484, "step": 6575 }, { "epoch": 0.06224855879819388, "grad_norm": 424.0889587402344, "learning_rate": 1.994552936700484e-06, "loss": 29.2656, "step": 6576 }, { "epoch": 0.062258024819908934, "grad_norm": 358.9302673339844, "learning_rate": 1.9945497406295417e-06, "loss": 39.2344, "step": 6577 }, { "epoch": 0.06226749084162399, "grad_norm": 281.44281005859375, "learning_rate": 1.994546543623787e-06, "loss": 30.3125, "step": 6578 }, { "epoch": 0.06227695686333905, "grad_norm": 331.20196533203125, "learning_rate": 1.9945433456832233e-06, "loss": 28.6953, "step": 6579 }, { "epoch": 0.0622864228850541, "grad_norm": 316.4100646972656, "learning_rate": 1.994540146807854e-06, "loss": 27.7188, "step": 6580 }, { "epoch": 0.062295888906769153, "grad_norm": 523.3636474609375, "learning_rate": 1.9945369469976813e-06, "loss": 51.2188, "step": 6581 }, { "epoch": 0.062305354928484206, "grad_norm": 900.6707153320312, "learning_rate": 1.9945337462527084e-06, "loss": 101.9375, "step": 6582 }, { "epoch": 0.06231482095019926, "grad_norm": 212.4016571044922, "learning_rate": 1.994530544572939e-06, "loss": 27.1016, "step": 6583 }, { "epoch": 0.06232428697191431, "grad_norm": 1079.9776611328125, "learning_rate": 1.994527341958375e-06, "loss": 58.6016, "step": 6584 }, { "epoch": 0.062333752993629366, "grad_norm": 542.1168823242188, "learning_rate": 1.9945241384090206e-06, "loss": 27.3438, "step": 6585 }, { "epoch": 0.06234321901534442, "grad_norm": 915.556396484375, "learning_rate": 1.994520933924878e-06, "loss": 16.3516, "step": 6586 }, { "epoch": 0.06235268503705948, "grad_norm": 289.3629455566406, "learning_rate": 1.99451772850595e-06, "loss": 25.6953, "step": 6587 }, { "epoch": 0.06236215105877453, "grad_norm": 283.9156188964844, "learning_rate": 1.9945145221522407e-06, "loss": 19.7656, "step": 6588 }, { "epoch": 0.062371617080489584, "grad_norm": 489.14520263671875, "learning_rate": 1.994511314863752e-06, "loss": 32.8281, "step": 6589 }, { "epoch": 0.06238108310220464, "grad_norm": 608.2365112304688, "learning_rate": 1.9945081066404875e-06, "loss": 30.0547, "step": 6590 }, { "epoch": 0.06239054912391969, "grad_norm": 180.87991333007812, "learning_rate": 1.99450489748245e-06, "loss": 21.6094, "step": 6591 }, { "epoch": 0.062400015145634743, "grad_norm": 609.9078369140625, "learning_rate": 1.9945016873896424e-06, "loss": 34.1953, "step": 6592 }, { "epoch": 0.062409481167349797, "grad_norm": 214.10850524902344, "learning_rate": 1.9944984763620686e-06, "loss": 22.3906, "step": 6593 }, { "epoch": 0.06241894718906485, "grad_norm": 655.64990234375, "learning_rate": 1.9944952643997304e-06, "loss": 43.9844, "step": 6594 }, { "epoch": 0.0624284132107799, "grad_norm": 229.20582580566406, "learning_rate": 1.9944920515026312e-06, "loss": 19.0938, "step": 6595 }, { "epoch": 0.06243787923249496, "grad_norm": 551.6954345703125, "learning_rate": 1.9944888376707745e-06, "loss": 34.4531, "step": 6596 }, { "epoch": 0.062447345254210015, "grad_norm": 265.348388671875, "learning_rate": 1.994485622904163e-06, "loss": 32.2656, "step": 6597 }, { "epoch": 0.06245681127592507, "grad_norm": 587.7361450195312, "learning_rate": 1.9944824072027995e-06, "loss": 24.7266, "step": 6598 }, { "epoch": 0.06246627729764012, "grad_norm": 3.1008243560791016, "learning_rate": 1.9944791905666873e-06, "loss": 0.8882, "step": 6599 }, { "epoch": 0.062475743319355174, "grad_norm": 414.8764953613281, "learning_rate": 1.9944759729958297e-06, "loss": 17.4688, "step": 6600 }, { "epoch": 0.06248520934107023, "grad_norm": 403.3591613769531, "learning_rate": 1.994472754490229e-06, "loss": 41.6406, "step": 6601 }, { "epoch": 0.06249467536278528, "grad_norm": 279.6030578613281, "learning_rate": 1.9944695350498884e-06, "loss": 21.4453, "step": 6602 }, { "epoch": 0.06250414138450033, "grad_norm": 898.2662353515625, "learning_rate": 1.9944663146748114e-06, "loss": 49.1953, "step": 6603 }, { "epoch": 0.0625136074062154, "grad_norm": 323.9005432128906, "learning_rate": 1.994463093365001e-06, "loss": 23.1328, "step": 6604 }, { "epoch": 0.06252307342793044, "grad_norm": 497.93505859375, "learning_rate": 1.9944598711204595e-06, "loss": 49.4375, "step": 6605 }, { "epoch": 0.0625325394496455, "grad_norm": 3.4791600704193115, "learning_rate": 1.994456647941191e-06, "loss": 0.957, "step": 6606 }, { "epoch": 0.06254200547136055, "grad_norm": 665.6234741210938, "learning_rate": 1.9944534238271974e-06, "loss": 47.9609, "step": 6607 }, { "epoch": 0.0625514714930756, "grad_norm": 277.54290771484375, "learning_rate": 1.9944501987784827e-06, "loss": 28.1562, "step": 6608 }, { "epoch": 0.06256093751479067, "grad_norm": 463.15435791015625, "learning_rate": 1.9944469727950493e-06, "loss": 59.7031, "step": 6609 }, { "epoch": 0.06257040353650571, "grad_norm": 500.3600769042969, "learning_rate": 1.9944437458769003e-06, "loss": 44.4844, "step": 6610 }, { "epoch": 0.06257986955822077, "grad_norm": 329.5493469238281, "learning_rate": 1.994440518024039e-06, "loss": 28.1094, "step": 6611 }, { "epoch": 0.06258933557993582, "grad_norm": 565.6698608398438, "learning_rate": 1.9944372892364683e-06, "loss": 25.0, "step": 6612 }, { "epoch": 0.06259880160165088, "grad_norm": 170.04965209960938, "learning_rate": 1.9944340595141913e-06, "loss": 16.9141, "step": 6613 }, { "epoch": 0.06260826762336592, "grad_norm": 374.5801696777344, "learning_rate": 1.994430828857211e-06, "loss": 62.7266, "step": 6614 }, { "epoch": 0.06261773364508098, "grad_norm": 275.47491455078125, "learning_rate": 1.9944275972655304e-06, "loss": 29.5938, "step": 6615 }, { "epoch": 0.06262719966679603, "grad_norm": 334.5172119140625, "learning_rate": 1.9944243647391525e-06, "loss": 20.7031, "step": 6616 }, { "epoch": 0.06263666568851109, "grad_norm": 885.1593017578125, "learning_rate": 1.9944211312780805e-06, "loss": 31.4375, "step": 6617 }, { "epoch": 0.06264613171022615, "grad_norm": 282.8595886230469, "learning_rate": 1.994417896882317e-06, "loss": 22.7812, "step": 6618 }, { "epoch": 0.0626555977319412, "grad_norm": 354.2593994140625, "learning_rate": 1.9944146615518657e-06, "loss": 27.75, "step": 6619 }, { "epoch": 0.06266506375365626, "grad_norm": 393.98016357421875, "learning_rate": 1.994411425286729e-06, "loss": 33.5469, "step": 6620 }, { "epoch": 0.0626745297753713, "grad_norm": 776.6715087890625, "learning_rate": 1.99440818808691e-06, "loss": 22.4453, "step": 6621 }, { "epoch": 0.06268399579708636, "grad_norm": 382.8057556152344, "learning_rate": 1.994404949952413e-06, "loss": 29.8359, "step": 6622 }, { "epoch": 0.06269346181880141, "grad_norm": 349.7347717285156, "learning_rate": 1.9944017108832393e-06, "loss": 28.0078, "step": 6623 }, { "epoch": 0.06270292784051647, "grad_norm": 939.6514282226562, "learning_rate": 1.9943984708793927e-06, "loss": 52.3438, "step": 6624 }, { "epoch": 0.06271239386223151, "grad_norm": 511.7294616699219, "learning_rate": 1.9943952299408764e-06, "loss": 33.0391, "step": 6625 }, { "epoch": 0.06272185988394657, "grad_norm": 704.9512939453125, "learning_rate": 1.9943919880676933e-06, "loss": 37.0156, "step": 6626 }, { "epoch": 0.06273132590566163, "grad_norm": 545.3443603515625, "learning_rate": 1.9943887452598465e-06, "loss": 28.7266, "step": 6627 }, { "epoch": 0.06274079192737668, "grad_norm": 684.4176635742188, "learning_rate": 1.9943855015173387e-06, "loss": 45.4688, "step": 6628 }, { "epoch": 0.06275025794909174, "grad_norm": 546.9070434570312, "learning_rate": 1.994382256840173e-06, "loss": 48.7969, "step": 6629 }, { "epoch": 0.06275972397080679, "grad_norm": 382.7539367675781, "learning_rate": 1.9943790112283534e-06, "loss": 42.0312, "step": 6630 }, { "epoch": 0.06276918999252185, "grad_norm": 146.4769287109375, "learning_rate": 1.994375764681882e-06, "loss": 17.7656, "step": 6631 }, { "epoch": 0.06277865601423689, "grad_norm": 269.55206298828125, "learning_rate": 1.9943725172007617e-06, "loss": 31.0469, "step": 6632 }, { "epoch": 0.06278812203595195, "grad_norm": 473.8993225097656, "learning_rate": 1.994369268784996e-06, "loss": 15.2148, "step": 6633 }, { "epoch": 0.062797588057667, "grad_norm": 647.9005737304688, "learning_rate": 1.994366019434588e-06, "loss": 53.75, "step": 6634 }, { "epoch": 0.06280705407938206, "grad_norm": 3.618703603744507, "learning_rate": 1.994362769149541e-06, "loss": 0.9863, "step": 6635 }, { "epoch": 0.06281652010109712, "grad_norm": 602.4993286132812, "learning_rate": 1.994359517929857e-06, "loss": 30.7109, "step": 6636 }, { "epoch": 0.06282598612281216, "grad_norm": 272.9553527832031, "learning_rate": 1.9943562657755404e-06, "loss": 25.7344, "step": 6637 }, { "epoch": 0.06283545214452722, "grad_norm": 508.6067810058594, "learning_rate": 1.994353012686593e-06, "loss": 57.3125, "step": 6638 }, { "epoch": 0.06284491816624227, "grad_norm": 3.6155178546905518, "learning_rate": 1.994349758663019e-06, "loss": 0.9292, "step": 6639 }, { "epoch": 0.06285438418795733, "grad_norm": 1027.609375, "learning_rate": 1.9943465037048203e-06, "loss": 64.3555, "step": 6640 }, { "epoch": 0.06286385020967238, "grad_norm": 428.33770751953125, "learning_rate": 1.994343247812001e-06, "loss": 33.6641, "step": 6641 }, { "epoch": 0.06287331623138744, "grad_norm": 867.6676635742188, "learning_rate": 1.9943399909845637e-06, "loss": 47.7344, "step": 6642 }, { "epoch": 0.0628827822531025, "grad_norm": 312.7489929199219, "learning_rate": 1.9943367332225116e-06, "loss": 28.2031, "step": 6643 }, { "epoch": 0.06289224827481754, "grad_norm": 192.0654754638672, "learning_rate": 1.994333474525847e-06, "loss": 27.0703, "step": 6644 }, { "epoch": 0.0629017142965326, "grad_norm": 386.3223571777344, "learning_rate": 1.9943302148945746e-06, "loss": 20.9609, "step": 6645 }, { "epoch": 0.06291118031824765, "grad_norm": 360.61907958984375, "learning_rate": 1.9943269543286956e-06, "loss": 21.0859, "step": 6646 }, { "epoch": 0.06292064633996271, "grad_norm": 461.377197265625, "learning_rate": 1.9943236928282145e-06, "loss": 30.6055, "step": 6647 }, { "epoch": 0.06293011236167775, "grad_norm": 2.7292845249176025, "learning_rate": 1.9943204303931337e-06, "loss": 0.8647, "step": 6648 }, { "epoch": 0.06293957838339281, "grad_norm": 342.566162109375, "learning_rate": 1.9943171670234563e-06, "loss": 28.3047, "step": 6649 }, { "epoch": 0.06294904440510786, "grad_norm": 408.58258056640625, "learning_rate": 1.9943139027191855e-06, "loss": 18.4297, "step": 6650 }, { "epoch": 0.06295851042682292, "grad_norm": 352.99078369140625, "learning_rate": 1.994310637480324e-06, "loss": 11.1719, "step": 6651 }, { "epoch": 0.06296797644853798, "grad_norm": 345.1142883300781, "learning_rate": 1.994307371306876e-06, "loss": 22.0234, "step": 6652 }, { "epoch": 0.06297744247025303, "grad_norm": 373.28070068359375, "learning_rate": 1.994304104198843e-06, "loss": 28.1641, "step": 6653 }, { "epoch": 0.06298690849196809, "grad_norm": 232.0203857421875, "learning_rate": 1.994300836156229e-06, "loss": 24.8906, "step": 6654 }, { "epoch": 0.06299637451368313, "grad_norm": 273.58642578125, "learning_rate": 1.994297567179037e-06, "loss": 18.9297, "step": 6655 }, { "epoch": 0.06300584053539819, "grad_norm": 199.5651397705078, "learning_rate": 1.99429429726727e-06, "loss": 22.2891, "step": 6656 }, { "epoch": 0.06301530655711324, "grad_norm": 455.1708984375, "learning_rate": 1.994291026420931e-06, "loss": 43.9062, "step": 6657 }, { "epoch": 0.0630247725788283, "grad_norm": 553.3795776367188, "learning_rate": 1.994287754640023e-06, "loss": 39.2656, "step": 6658 }, { "epoch": 0.06303423860054334, "grad_norm": 452.1964416503906, "learning_rate": 1.994284481924549e-06, "loss": 19.8789, "step": 6659 }, { "epoch": 0.0630437046222584, "grad_norm": 133.99713134765625, "learning_rate": 1.994281208274513e-06, "loss": 15.1797, "step": 6660 }, { "epoch": 0.06305317064397346, "grad_norm": 228.34942626953125, "learning_rate": 1.994277933689917e-06, "loss": 16.1328, "step": 6661 }, { "epoch": 0.06306263666568851, "grad_norm": 1012.8562622070312, "learning_rate": 1.994274658170764e-06, "loss": 67.2188, "step": 6662 }, { "epoch": 0.06307210268740357, "grad_norm": 222.49501037597656, "learning_rate": 1.994271381717058e-06, "loss": 23.2031, "step": 6663 }, { "epoch": 0.06308156870911862, "grad_norm": 746.7752075195312, "learning_rate": 1.9942681043288013e-06, "loss": 54.0391, "step": 6664 }, { "epoch": 0.06309103473083368, "grad_norm": 2.6500465869903564, "learning_rate": 1.994264826005997e-06, "loss": 0.8252, "step": 6665 }, { "epoch": 0.06310050075254872, "grad_norm": 216.68743896484375, "learning_rate": 1.9942615467486494e-06, "loss": 22.8281, "step": 6666 }, { "epoch": 0.06310996677426378, "grad_norm": 588.1904907226562, "learning_rate": 1.99425826655676e-06, "loss": 19.2656, "step": 6667 }, { "epoch": 0.06311943279597883, "grad_norm": 515.8344116210938, "learning_rate": 1.9942549854303327e-06, "loss": 36.5938, "step": 6668 }, { "epoch": 0.06312889881769389, "grad_norm": 297.4466247558594, "learning_rate": 1.99425170336937e-06, "loss": 27.6406, "step": 6669 }, { "epoch": 0.06313836483940895, "grad_norm": 550.3272094726562, "learning_rate": 1.9942484203738755e-06, "loss": 25.7422, "step": 6670 }, { "epoch": 0.063147830861124, "grad_norm": 367.9181213378906, "learning_rate": 1.994245136443852e-06, "loss": 46.2188, "step": 6671 }, { "epoch": 0.06315729688283905, "grad_norm": 603.8129272460938, "learning_rate": 1.9942418515793034e-06, "loss": 35.1484, "step": 6672 }, { "epoch": 0.0631667629045541, "grad_norm": 387.41595458984375, "learning_rate": 1.9942385657802316e-06, "loss": 24.125, "step": 6673 }, { "epoch": 0.06317622892626916, "grad_norm": 242.595703125, "learning_rate": 1.9942352790466407e-06, "loss": 21.6797, "step": 6674 }, { "epoch": 0.0631856949479842, "grad_norm": 321.00048828125, "learning_rate": 1.994231991378533e-06, "loss": 22.9609, "step": 6675 }, { "epoch": 0.06319516096969927, "grad_norm": 697.1222534179688, "learning_rate": 1.994228702775912e-06, "loss": 53.6875, "step": 6676 }, { "epoch": 0.06320462699141431, "grad_norm": 256.42822265625, "learning_rate": 1.9942254132387808e-06, "loss": 20.7031, "step": 6677 }, { "epoch": 0.06321409301312937, "grad_norm": 335.9339294433594, "learning_rate": 1.994222122767142e-06, "loss": 22.0234, "step": 6678 }, { "epoch": 0.06322355903484443, "grad_norm": 1188.4654541015625, "learning_rate": 1.9942188313609992e-06, "loss": 41.9453, "step": 6679 }, { "epoch": 0.06323302505655948, "grad_norm": 209.3517608642578, "learning_rate": 1.9942155390203555e-06, "loss": 20.4141, "step": 6680 }, { "epoch": 0.06324249107827454, "grad_norm": 292.9600524902344, "learning_rate": 1.994212245745214e-06, "loss": 16.3086, "step": 6681 }, { "epoch": 0.06325195709998958, "grad_norm": 338.4494323730469, "learning_rate": 1.9942089515355775e-06, "loss": 30.3438, "step": 6682 }, { "epoch": 0.06326142312170464, "grad_norm": 832.0562133789062, "learning_rate": 1.994205656391449e-06, "loss": 43.4297, "step": 6683 }, { "epoch": 0.06327088914341969, "grad_norm": 254.0127716064453, "learning_rate": 1.9942023603128325e-06, "loss": 20.3125, "step": 6684 }, { "epoch": 0.06328035516513475, "grad_norm": 316.4044189453125, "learning_rate": 1.99419906329973e-06, "loss": 21.5156, "step": 6685 }, { "epoch": 0.06328982118684981, "grad_norm": 297.1955261230469, "learning_rate": 1.994195765352145e-06, "loss": 27.5078, "step": 6686 }, { "epoch": 0.06329928720856486, "grad_norm": 203.60964965820312, "learning_rate": 1.9941924664700807e-06, "loss": 19.8047, "step": 6687 }, { "epoch": 0.06330875323027992, "grad_norm": 353.3258972167969, "learning_rate": 1.9941891666535407e-06, "loss": 18.2422, "step": 6688 }, { "epoch": 0.06331821925199496, "grad_norm": 598.8959350585938, "learning_rate": 1.994185865902527e-06, "loss": 14.7812, "step": 6689 }, { "epoch": 0.06332768527371002, "grad_norm": 218.29974365234375, "learning_rate": 1.9941825642170435e-06, "loss": 18.5, "step": 6690 }, { "epoch": 0.06333715129542507, "grad_norm": 936.3271484375, "learning_rate": 1.9941792615970925e-06, "loss": 77.5625, "step": 6691 }, { "epoch": 0.06334661731714013, "grad_norm": 533.9515991210938, "learning_rate": 1.9941759580426783e-06, "loss": 48.0938, "step": 6692 }, { "epoch": 0.06335608333885517, "grad_norm": 268.0434265136719, "learning_rate": 1.9941726535538034e-06, "loss": 16.7344, "step": 6693 }, { "epoch": 0.06336554936057023, "grad_norm": 2.9863734245300293, "learning_rate": 1.9941693481304707e-06, "loss": 0.9204, "step": 6694 }, { "epoch": 0.0633750153822853, "grad_norm": 610.7343139648438, "learning_rate": 1.994166041772683e-06, "loss": 50.1719, "step": 6695 }, { "epoch": 0.06338448140400034, "grad_norm": 3.437141180038452, "learning_rate": 1.9941627344804447e-06, "loss": 0.9609, "step": 6696 }, { "epoch": 0.0633939474257154, "grad_norm": 409.10595703125, "learning_rate": 1.994159426253758e-06, "loss": 25.3359, "step": 6697 }, { "epoch": 0.06340341344743045, "grad_norm": 399.8074951171875, "learning_rate": 1.994156117092626e-06, "loss": 26.8516, "step": 6698 }, { "epoch": 0.0634128794691455, "grad_norm": 605.4617309570312, "learning_rate": 1.9941528069970514e-06, "loss": 36.7422, "step": 6699 }, { "epoch": 0.06342234549086055, "grad_norm": 561.4711303710938, "learning_rate": 1.9941494959670382e-06, "loss": 64.2812, "step": 6700 }, { "epoch": 0.06343181151257561, "grad_norm": 456.05322265625, "learning_rate": 1.9941461840025893e-06, "loss": 25.9062, "step": 6701 }, { "epoch": 0.06344127753429066, "grad_norm": 450.1174011230469, "learning_rate": 1.9941428711037076e-06, "loss": 27.9141, "step": 6702 }, { "epoch": 0.06345074355600572, "grad_norm": 3.881624460220337, "learning_rate": 1.994139557270396e-06, "loss": 0.8594, "step": 6703 }, { "epoch": 0.06346020957772078, "grad_norm": 971.6464233398438, "learning_rate": 1.9941362425026586e-06, "loss": 41.0547, "step": 6704 }, { "epoch": 0.06346967559943582, "grad_norm": 187.8203582763672, "learning_rate": 1.9941329268004973e-06, "loss": 24.0156, "step": 6705 }, { "epoch": 0.06347914162115088, "grad_norm": 532.7601928710938, "learning_rate": 1.994129610163916e-06, "loss": 37.3984, "step": 6706 }, { "epoch": 0.06348860764286593, "grad_norm": 295.7820739746094, "learning_rate": 1.994126292592917e-06, "loss": 23.6328, "step": 6707 }, { "epoch": 0.06349807366458099, "grad_norm": 1261.7890625, "learning_rate": 1.9941229740875042e-06, "loss": 70.3047, "step": 6708 }, { "epoch": 0.06350753968629604, "grad_norm": 411.4134521484375, "learning_rate": 1.9941196546476804e-06, "loss": 23.5859, "step": 6709 }, { "epoch": 0.0635170057080111, "grad_norm": 618.9721069335938, "learning_rate": 1.994116334273449e-06, "loss": 38.2891, "step": 6710 }, { "epoch": 0.06352647172972614, "grad_norm": 309.77227783203125, "learning_rate": 1.9941130129648127e-06, "loss": 29.3984, "step": 6711 }, { "epoch": 0.0635359377514412, "grad_norm": 1241.9078369140625, "learning_rate": 1.9941096907217753e-06, "loss": 105.0625, "step": 6712 }, { "epoch": 0.06354540377315626, "grad_norm": 2.5295939445495605, "learning_rate": 1.994106367544339e-06, "loss": 0.8105, "step": 6713 }, { "epoch": 0.06355486979487131, "grad_norm": 205.5342559814453, "learning_rate": 1.9941030434325075e-06, "loss": 30.9922, "step": 6714 }, { "epoch": 0.06356433581658637, "grad_norm": 569.7839965820312, "learning_rate": 1.9940997183862836e-06, "loss": 54.0312, "step": 6715 }, { "epoch": 0.06357380183830141, "grad_norm": 466.4731140136719, "learning_rate": 1.994096392405671e-06, "loss": 58.2812, "step": 6716 }, { "epoch": 0.06358326786001647, "grad_norm": 828.9198608398438, "learning_rate": 1.994093065490672e-06, "loss": 60.0469, "step": 6717 }, { "epoch": 0.06359273388173152, "grad_norm": 427.0306396484375, "learning_rate": 1.994089737641291e-06, "loss": 53.6875, "step": 6718 }, { "epoch": 0.06360219990344658, "grad_norm": 296.9304504394531, "learning_rate": 1.9940864088575297e-06, "loss": 38.1719, "step": 6719 }, { "epoch": 0.06361166592516163, "grad_norm": 292.32415771484375, "learning_rate": 1.9940830791393922e-06, "loss": 18.2109, "step": 6720 }, { "epoch": 0.06362113194687669, "grad_norm": 189.47280883789062, "learning_rate": 1.994079748486881e-06, "loss": 27.5, "step": 6721 }, { "epoch": 0.06363059796859175, "grad_norm": 425.1897888183594, "learning_rate": 1.9940764168999997e-06, "loss": 18.0391, "step": 6722 }, { "epoch": 0.06364006399030679, "grad_norm": 511.52850341796875, "learning_rate": 1.9940730843787507e-06, "loss": 44.9375, "step": 6723 }, { "epoch": 0.06364953001202185, "grad_norm": 502.69390869140625, "learning_rate": 1.9940697509231383e-06, "loss": 37.8594, "step": 6724 }, { "epoch": 0.0636589960337369, "grad_norm": 426.2101135253906, "learning_rate": 1.9940664165331647e-06, "loss": 40.875, "step": 6725 }, { "epoch": 0.06366846205545196, "grad_norm": 568.5416259765625, "learning_rate": 1.9940630812088333e-06, "loss": 24.2734, "step": 6726 }, { "epoch": 0.063677928077167, "grad_norm": 691.176513671875, "learning_rate": 1.9940597449501475e-06, "loss": 18.0547, "step": 6727 }, { "epoch": 0.06368739409888206, "grad_norm": 357.111328125, "learning_rate": 1.99405640775711e-06, "loss": 20.75, "step": 6728 }, { "epoch": 0.06369686012059712, "grad_norm": 324.0435791015625, "learning_rate": 1.994053069629724e-06, "loss": 26.2969, "step": 6729 }, { "epoch": 0.06370632614231217, "grad_norm": 352.7721862792969, "learning_rate": 1.9940497305679933e-06, "loss": 20.1797, "step": 6730 }, { "epoch": 0.06371579216402723, "grad_norm": 342.6409606933594, "learning_rate": 1.9940463905719203e-06, "loss": 22.0469, "step": 6731 }, { "epoch": 0.06372525818574228, "grad_norm": 275.0731201171875, "learning_rate": 1.994043049641508e-06, "loss": 26.3047, "step": 6732 }, { "epoch": 0.06373472420745734, "grad_norm": 3.5069103240966797, "learning_rate": 1.9940397077767606e-06, "loss": 0.9619, "step": 6733 }, { "epoch": 0.06374419022917238, "grad_norm": 376.8876037597656, "learning_rate": 1.99403636497768e-06, "loss": 28.3906, "step": 6734 }, { "epoch": 0.06375365625088744, "grad_norm": 3.182755947113037, "learning_rate": 1.99403302124427e-06, "loss": 0.9131, "step": 6735 }, { "epoch": 0.06376312227260249, "grad_norm": 505.15789794921875, "learning_rate": 1.9940296765765338e-06, "loss": 18.875, "step": 6736 }, { "epoch": 0.06377258829431755, "grad_norm": 268.4615173339844, "learning_rate": 1.994026330974474e-06, "loss": 21.9844, "step": 6737 }, { "epoch": 0.06378205431603261, "grad_norm": 753.83984375, "learning_rate": 1.994022984438094e-06, "loss": 27.2656, "step": 6738 }, { "epoch": 0.06379152033774765, "grad_norm": 239.44686889648438, "learning_rate": 1.9940196369673976e-06, "loss": 24.0625, "step": 6739 }, { "epoch": 0.06380098635946271, "grad_norm": 348.17950439453125, "learning_rate": 1.9940162885623876e-06, "loss": 47.4219, "step": 6740 }, { "epoch": 0.06381045238117776, "grad_norm": 383.3602600097656, "learning_rate": 1.994012939223066e-06, "loss": 24.3281, "step": 6741 }, { "epoch": 0.06381991840289282, "grad_norm": 337.5190734863281, "learning_rate": 1.994009588949438e-06, "loss": 13.2891, "step": 6742 }, { "epoch": 0.06382938442460787, "grad_norm": 616.780517578125, "learning_rate": 1.994006237741505e-06, "loss": 23.3164, "step": 6743 }, { "epoch": 0.06383885044632293, "grad_norm": 467.5712585449219, "learning_rate": 1.994002885599271e-06, "loss": 47.2969, "step": 6744 }, { "epoch": 0.06384831646803797, "grad_norm": 454.1737365722656, "learning_rate": 1.9939995325227386e-06, "loss": 40.0938, "step": 6745 }, { "epoch": 0.06385778248975303, "grad_norm": 226.8302764892578, "learning_rate": 1.9939961785119113e-06, "loss": 26.6875, "step": 6746 }, { "epoch": 0.06386724851146809, "grad_norm": 194.16737365722656, "learning_rate": 1.9939928235667927e-06, "loss": 24.4375, "step": 6747 }, { "epoch": 0.06387671453318314, "grad_norm": 558.4127197265625, "learning_rate": 1.9939894676873853e-06, "loss": 50.7969, "step": 6748 }, { "epoch": 0.0638861805548982, "grad_norm": 290.07257080078125, "learning_rate": 1.9939861108736926e-06, "loss": 18.6094, "step": 6749 }, { "epoch": 0.06389564657661324, "grad_norm": 348.2750244140625, "learning_rate": 1.9939827531257174e-06, "loss": 43.4688, "step": 6750 }, { "epoch": 0.0639051125983283, "grad_norm": 716.6906127929688, "learning_rate": 1.993979394443463e-06, "loss": 29.4961, "step": 6751 }, { "epoch": 0.06391457862004335, "grad_norm": 942.3460083007812, "learning_rate": 1.993976034826933e-06, "loss": 42.8516, "step": 6752 }, { "epoch": 0.06392404464175841, "grad_norm": 285.1839904785156, "learning_rate": 1.99397267427613e-06, "loss": 18.2734, "step": 6753 }, { "epoch": 0.06393351066347346, "grad_norm": 221.7361297607422, "learning_rate": 1.993969312791057e-06, "loss": 24.3516, "step": 6754 }, { "epoch": 0.06394297668518852, "grad_norm": 359.666259765625, "learning_rate": 1.9939659503717185e-06, "loss": 31.7891, "step": 6755 }, { "epoch": 0.06395244270690358, "grad_norm": 612.9922485351562, "learning_rate": 1.9939625870181157e-06, "loss": 30.2969, "step": 6756 }, { "epoch": 0.06396190872861862, "grad_norm": 261.7985534667969, "learning_rate": 1.993959222730253e-06, "loss": 25.1094, "step": 6757 }, { "epoch": 0.06397137475033368, "grad_norm": 194.78700256347656, "learning_rate": 1.9939558575081335e-06, "loss": 19.7227, "step": 6758 }, { "epoch": 0.06398084077204873, "grad_norm": 558.3529052734375, "learning_rate": 1.9939524913517597e-06, "loss": 52.6875, "step": 6759 }, { "epoch": 0.06399030679376379, "grad_norm": 420.88134765625, "learning_rate": 1.9939491242611356e-06, "loss": 25.1562, "step": 6760 }, { "epoch": 0.06399977281547883, "grad_norm": 511.2099304199219, "learning_rate": 1.9939457562362642e-06, "loss": 21.3438, "step": 6761 }, { "epoch": 0.0640092388371939, "grad_norm": 226.14028930664062, "learning_rate": 1.993942387277148e-06, "loss": 19.7031, "step": 6762 }, { "epoch": 0.06401870485890894, "grad_norm": 164.95016479492188, "learning_rate": 1.9939390173837907e-06, "loss": 16.8438, "step": 6763 }, { "epoch": 0.064028170880624, "grad_norm": 209.2526397705078, "learning_rate": 1.9939356465561957e-06, "loss": 17.7422, "step": 6764 }, { "epoch": 0.06403763690233906, "grad_norm": 734.7076416015625, "learning_rate": 1.9939322747943653e-06, "loss": 39.2891, "step": 6765 }, { "epoch": 0.0640471029240541, "grad_norm": 959.2252807617188, "learning_rate": 1.9939289020983038e-06, "loss": 41.2812, "step": 6766 }, { "epoch": 0.06405656894576917, "grad_norm": 554.6907958984375, "learning_rate": 1.9939255284680136e-06, "loss": 39.625, "step": 6767 }, { "epoch": 0.06406603496748421, "grad_norm": 346.40972900390625, "learning_rate": 1.9939221539034977e-06, "loss": 29.8828, "step": 6768 }, { "epoch": 0.06407550098919927, "grad_norm": 543.5216674804688, "learning_rate": 1.99391877840476e-06, "loss": 51.0625, "step": 6769 }, { "epoch": 0.06408496701091432, "grad_norm": 537.1832885742188, "learning_rate": 1.9939154019718034e-06, "loss": 43.3047, "step": 6770 }, { "epoch": 0.06409443303262938, "grad_norm": 631.9224853515625, "learning_rate": 1.9939120246046304e-06, "loss": 48.0703, "step": 6771 }, { "epoch": 0.06410389905434444, "grad_norm": 451.6214599609375, "learning_rate": 1.9939086463032453e-06, "loss": 34.8438, "step": 6772 }, { "epoch": 0.06411336507605948, "grad_norm": 3.453562021255493, "learning_rate": 1.9939052670676506e-06, "loss": 0.9565, "step": 6773 }, { "epoch": 0.06412283109777454, "grad_norm": 664.9557495117188, "learning_rate": 1.9939018868978498e-06, "loss": 45.0156, "step": 6774 }, { "epoch": 0.06413229711948959, "grad_norm": 432.971923828125, "learning_rate": 1.9938985057938457e-06, "loss": 10.3164, "step": 6775 }, { "epoch": 0.06414176314120465, "grad_norm": 388.8166809082031, "learning_rate": 1.9938951237556413e-06, "loss": 28.1719, "step": 6776 }, { "epoch": 0.0641512291629197, "grad_norm": 3.854192018508911, "learning_rate": 1.9938917407832405e-06, "loss": 0.9795, "step": 6777 }, { "epoch": 0.06416069518463476, "grad_norm": 543.2747192382812, "learning_rate": 1.9938883568766463e-06, "loss": 38.4844, "step": 6778 }, { "epoch": 0.0641701612063498, "grad_norm": 328.5213928222656, "learning_rate": 1.9938849720358615e-06, "loss": 22.4766, "step": 6779 }, { "epoch": 0.06417962722806486, "grad_norm": 1238.9642333984375, "learning_rate": 1.9938815862608895e-06, "loss": 44.8672, "step": 6780 }, { "epoch": 0.06418909324977992, "grad_norm": 186.721923828125, "learning_rate": 1.9938781995517334e-06, "loss": 16.9922, "step": 6781 }, { "epoch": 0.06419855927149497, "grad_norm": 340.6730651855469, "learning_rate": 1.9938748119083965e-06, "loss": 15.9688, "step": 6782 }, { "epoch": 0.06420802529321003, "grad_norm": 269.8280029296875, "learning_rate": 1.993871423330882e-06, "loss": 22.0391, "step": 6783 }, { "epoch": 0.06421749131492507, "grad_norm": 440.0679931640625, "learning_rate": 1.9938680338191927e-06, "loss": 50.7188, "step": 6784 }, { "epoch": 0.06422695733664013, "grad_norm": 367.6082763671875, "learning_rate": 1.9938646433733326e-06, "loss": 16.2383, "step": 6785 }, { "epoch": 0.06423642335835518, "grad_norm": 508.3544616699219, "learning_rate": 1.9938612519933043e-06, "loss": 46.4531, "step": 6786 }, { "epoch": 0.06424588938007024, "grad_norm": 250.93865966796875, "learning_rate": 1.9938578596791106e-06, "loss": 22.7031, "step": 6787 }, { "epoch": 0.06425535540178529, "grad_norm": 514.9205322265625, "learning_rate": 1.9938544664307555e-06, "loss": 30.7031, "step": 6788 }, { "epoch": 0.06426482142350035, "grad_norm": 518.8614501953125, "learning_rate": 1.993851072248242e-06, "loss": 36.5938, "step": 6789 }, { "epoch": 0.0642742874452154, "grad_norm": 231.544921875, "learning_rate": 1.993847677131573e-06, "loss": 23.0625, "step": 6790 }, { "epoch": 0.06428375346693045, "grad_norm": 418.1140441894531, "learning_rate": 1.9938442810807518e-06, "loss": 44.1719, "step": 6791 }, { "epoch": 0.06429321948864551, "grad_norm": 200.3647003173828, "learning_rate": 1.9938408840957816e-06, "loss": 27.8125, "step": 6792 }, { "epoch": 0.06430268551036056, "grad_norm": 263.9759521484375, "learning_rate": 1.9938374861766658e-06, "loss": 25.1328, "step": 6793 }, { "epoch": 0.06431215153207562, "grad_norm": 3.02940034866333, "learning_rate": 1.9938340873234073e-06, "loss": 0.8872, "step": 6794 }, { "epoch": 0.06432161755379066, "grad_norm": 246.05703735351562, "learning_rate": 1.9938306875360096e-06, "loss": 26.1562, "step": 6795 }, { "epoch": 0.06433108357550572, "grad_norm": 3.80065655708313, "learning_rate": 1.993827286814475e-06, "loss": 0.9409, "step": 6796 }, { "epoch": 0.06434054959722077, "grad_norm": 215.72813415527344, "learning_rate": 1.993823885158808e-06, "loss": 21.3047, "step": 6797 }, { "epoch": 0.06435001561893583, "grad_norm": 264.819580078125, "learning_rate": 1.993820482569011e-06, "loss": 21.875, "step": 6798 }, { "epoch": 0.06435948164065089, "grad_norm": 820.5552368164062, "learning_rate": 1.993817079045088e-06, "loss": 41.4766, "step": 6799 }, { "epoch": 0.06436894766236594, "grad_norm": 3.3449745178222656, "learning_rate": 1.993813674587041e-06, "loss": 1.0513, "step": 6800 }, { "epoch": 0.064378413684081, "grad_norm": 741.0602416992188, "learning_rate": 1.9938102691948736e-06, "loss": 43.5469, "step": 6801 }, { "epoch": 0.06438787970579604, "grad_norm": 179.12844848632812, "learning_rate": 1.99380686286859e-06, "loss": 25.1094, "step": 6802 }, { "epoch": 0.0643973457275111, "grad_norm": 476.2220764160156, "learning_rate": 1.993803455608192e-06, "loss": 34.7344, "step": 6803 }, { "epoch": 0.06440681174922615, "grad_norm": 203.05166625976562, "learning_rate": 1.9938000474136834e-06, "loss": 27.0391, "step": 6804 }, { "epoch": 0.06441627777094121, "grad_norm": 1141.520263671875, "learning_rate": 1.9937966382850674e-06, "loss": 14.7969, "step": 6805 }, { "epoch": 0.06442574379265625, "grad_norm": 511.64569091796875, "learning_rate": 1.9937932282223477e-06, "loss": 25.0391, "step": 6806 }, { "epoch": 0.06443520981437131, "grad_norm": 1415.902099609375, "learning_rate": 1.9937898172255264e-06, "loss": 53.1953, "step": 6807 }, { "epoch": 0.06444467583608637, "grad_norm": 193.03933715820312, "learning_rate": 1.993786405294608e-06, "loss": 19.8125, "step": 6808 }, { "epoch": 0.06445414185780142, "grad_norm": 437.5309143066406, "learning_rate": 1.9937829924295944e-06, "loss": 39.2656, "step": 6809 }, { "epoch": 0.06446360787951648, "grad_norm": 561.9658203125, "learning_rate": 1.9937795786304895e-06, "loss": 58.5938, "step": 6810 }, { "epoch": 0.06447307390123153, "grad_norm": 482.51116943359375, "learning_rate": 1.9937761638972966e-06, "loss": 27.4219, "step": 6811 }, { "epoch": 0.06448253992294659, "grad_norm": 3.623176336288452, "learning_rate": 1.9937727482300187e-06, "loss": 0.8447, "step": 6812 }, { "epoch": 0.06449200594466163, "grad_norm": 596.6619873046875, "learning_rate": 1.993769331628659e-06, "loss": 57.9219, "step": 6813 }, { "epoch": 0.06450147196637669, "grad_norm": 583.9107666015625, "learning_rate": 1.9937659140932207e-06, "loss": 24.7734, "step": 6814 }, { "epoch": 0.06451093798809175, "grad_norm": 3.422287702560425, "learning_rate": 1.9937624956237075e-06, "loss": 0.9395, "step": 6815 }, { "epoch": 0.0645204040098068, "grad_norm": 439.77764892578125, "learning_rate": 1.993759076220122e-06, "loss": 17.8359, "step": 6816 }, { "epoch": 0.06452987003152186, "grad_norm": 1175.5050048828125, "learning_rate": 1.9937556558824673e-06, "loss": 44.9375, "step": 6817 }, { "epoch": 0.0645393360532369, "grad_norm": 3.1925745010375977, "learning_rate": 1.993752234610747e-06, "loss": 0.937, "step": 6818 }, { "epoch": 0.06454880207495196, "grad_norm": 2.8572919368743896, "learning_rate": 1.993748812404965e-06, "loss": 0.9341, "step": 6819 }, { "epoch": 0.06455826809666701, "grad_norm": 843.656982421875, "learning_rate": 1.9937453892651233e-06, "loss": 42.6094, "step": 6820 }, { "epoch": 0.06456773411838207, "grad_norm": 386.87481689453125, "learning_rate": 1.9937419651912255e-06, "loss": 34.7969, "step": 6821 }, { "epoch": 0.06457720014009712, "grad_norm": 508.1947021484375, "learning_rate": 1.9937385401832748e-06, "loss": 28.0469, "step": 6822 }, { "epoch": 0.06458666616181218, "grad_norm": 847.3078002929688, "learning_rate": 1.9937351142412746e-06, "loss": 36.0312, "step": 6823 }, { "epoch": 0.06459613218352724, "grad_norm": 599.9219970703125, "learning_rate": 1.9937316873652283e-06, "loss": 51.6094, "step": 6824 }, { "epoch": 0.06460559820524228, "grad_norm": 369.3455505371094, "learning_rate": 1.9937282595551384e-06, "loss": 20.375, "step": 6825 }, { "epoch": 0.06461506422695734, "grad_norm": 332.9977111816406, "learning_rate": 1.993724830811009e-06, "loss": 34.0859, "step": 6826 }, { "epoch": 0.06462453024867239, "grad_norm": 993.9578247070312, "learning_rate": 1.993721401132843e-06, "loss": 27.9844, "step": 6827 }, { "epoch": 0.06463399627038745, "grad_norm": 269.75994873046875, "learning_rate": 1.993717970520643e-06, "loss": 27.7812, "step": 6828 }, { "epoch": 0.0646434622921025, "grad_norm": 396.85467529296875, "learning_rate": 1.993714538974413e-06, "loss": 29.625, "step": 6829 }, { "epoch": 0.06465292831381755, "grad_norm": 248.7867889404297, "learning_rate": 1.993711106494156e-06, "loss": 26.0469, "step": 6830 }, { "epoch": 0.0646623943355326, "grad_norm": 510.2934875488281, "learning_rate": 1.993707673079875e-06, "loss": 32.6172, "step": 6831 }, { "epoch": 0.06467186035724766, "grad_norm": 331.2717590332031, "learning_rate": 1.993704238731574e-06, "loss": 31.1172, "step": 6832 }, { "epoch": 0.06468132637896272, "grad_norm": 318.83172607421875, "learning_rate": 1.993700803449255e-06, "loss": 10.2461, "step": 6833 }, { "epoch": 0.06469079240067777, "grad_norm": 415.1157531738281, "learning_rate": 1.993697367232922e-06, "loss": 27.6172, "step": 6834 }, { "epoch": 0.06470025842239283, "grad_norm": 602.515869140625, "learning_rate": 1.9936939300825784e-06, "loss": 58.1719, "step": 6835 }, { "epoch": 0.06470972444410787, "grad_norm": 251.56094360351562, "learning_rate": 1.993690491998227e-06, "loss": 21.1328, "step": 6836 }, { "epoch": 0.06471919046582293, "grad_norm": 335.6182556152344, "learning_rate": 1.993687052979871e-06, "loss": 17.3398, "step": 6837 }, { "epoch": 0.06472865648753798, "grad_norm": 805.90966796875, "learning_rate": 1.9936836130275144e-06, "loss": 57.0156, "step": 6838 }, { "epoch": 0.06473812250925304, "grad_norm": 557.9126586914062, "learning_rate": 1.9936801721411595e-06, "loss": 20.9062, "step": 6839 }, { "epoch": 0.06474758853096808, "grad_norm": 364.9588623046875, "learning_rate": 1.9936767303208096e-06, "loss": 21.3906, "step": 6840 }, { "epoch": 0.06475705455268314, "grad_norm": 271.81390380859375, "learning_rate": 1.9936732875664684e-06, "loss": 21.0156, "step": 6841 }, { "epoch": 0.0647665205743982, "grad_norm": 582.1041259765625, "learning_rate": 1.9936698438781394e-06, "loss": 21.0, "step": 6842 }, { "epoch": 0.06477598659611325, "grad_norm": 309.1112365722656, "learning_rate": 1.993666399255825e-06, "loss": 19.5547, "step": 6843 }, { "epoch": 0.06478545261782831, "grad_norm": 238.97169494628906, "learning_rate": 1.9936629536995288e-06, "loss": 18.2109, "step": 6844 }, { "epoch": 0.06479491863954336, "grad_norm": 443.24517822265625, "learning_rate": 1.9936595072092542e-06, "loss": 25.625, "step": 6845 }, { "epoch": 0.06480438466125842, "grad_norm": 990.4677734375, "learning_rate": 1.993656059785004e-06, "loss": 66.4531, "step": 6846 }, { "epoch": 0.06481385068297346, "grad_norm": 310.2567443847656, "learning_rate": 1.993652611426782e-06, "loss": 20.4453, "step": 6847 }, { "epoch": 0.06482331670468852, "grad_norm": 807.7911376953125, "learning_rate": 1.993649162134591e-06, "loss": 40.1094, "step": 6848 }, { "epoch": 0.06483278272640357, "grad_norm": 295.058837890625, "learning_rate": 1.9936457119084345e-06, "loss": 24.4062, "step": 6849 }, { "epoch": 0.06484224874811863, "grad_norm": 329.2454833984375, "learning_rate": 1.993642260748316e-06, "loss": 37.2656, "step": 6850 }, { "epoch": 0.06485171476983369, "grad_norm": 1815.8370361328125, "learning_rate": 1.993638808654238e-06, "loss": 43.1719, "step": 6851 }, { "epoch": 0.06486118079154873, "grad_norm": 186.4219207763672, "learning_rate": 1.9936353556262046e-06, "loss": 22.1562, "step": 6852 }, { "epoch": 0.0648706468132638, "grad_norm": 210.10174560546875, "learning_rate": 1.993631901664218e-06, "loss": 20.8047, "step": 6853 }, { "epoch": 0.06488011283497884, "grad_norm": 360.3117370605469, "learning_rate": 1.9936284467682825e-06, "loss": 22.6719, "step": 6854 }, { "epoch": 0.0648895788566939, "grad_norm": 664.08056640625, "learning_rate": 1.9936249909384008e-06, "loss": 55.875, "step": 6855 }, { "epoch": 0.06489904487840895, "grad_norm": 244.22320556640625, "learning_rate": 1.9936215341745763e-06, "loss": 24.0938, "step": 6856 }, { "epoch": 0.064908510900124, "grad_norm": 387.0411071777344, "learning_rate": 1.993618076476812e-06, "loss": 16.293, "step": 6857 }, { "epoch": 0.06491797692183905, "grad_norm": 270.5316467285156, "learning_rate": 1.9936146178451117e-06, "loss": 23.3281, "step": 6858 }, { "epoch": 0.06492744294355411, "grad_norm": 534.1759033203125, "learning_rate": 1.993611158279478e-06, "loss": 32.6719, "step": 6859 }, { "epoch": 0.06493690896526917, "grad_norm": 3.007585048675537, "learning_rate": 1.9936076977799146e-06, "loss": 0.9062, "step": 6860 }, { "epoch": 0.06494637498698422, "grad_norm": 484.61279296875, "learning_rate": 1.9936042363464246e-06, "loss": 39.1562, "step": 6861 }, { "epoch": 0.06495584100869928, "grad_norm": 189.550048828125, "learning_rate": 1.993600773979011e-06, "loss": 20.5625, "step": 6862 }, { "epoch": 0.06496530703041432, "grad_norm": 401.8994445800781, "learning_rate": 1.9935973106776777e-06, "loss": 38.2188, "step": 6863 }, { "epoch": 0.06497477305212938, "grad_norm": 236.42018127441406, "learning_rate": 1.9935938464424275e-06, "loss": 18.3984, "step": 6864 }, { "epoch": 0.06498423907384443, "grad_norm": 622.1006469726562, "learning_rate": 1.9935903812732634e-06, "loss": 47.9062, "step": 6865 }, { "epoch": 0.06499370509555949, "grad_norm": 1445.9755859375, "learning_rate": 1.9935869151701888e-06, "loss": 31.1719, "step": 6866 }, { "epoch": 0.06500317111727455, "grad_norm": 881.0568237304688, "learning_rate": 1.9935834481332077e-06, "loss": 36.4141, "step": 6867 }, { "epoch": 0.0650126371389896, "grad_norm": 305.6645812988281, "learning_rate": 1.993579980162323e-06, "loss": 27.6719, "step": 6868 }, { "epoch": 0.06502210316070466, "grad_norm": 209.15638732910156, "learning_rate": 1.9935765112575373e-06, "loss": 18.7969, "step": 6869 }, { "epoch": 0.0650315691824197, "grad_norm": 410.0610046386719, "learning_rate": 1.993573041418854e-06, "loss": 40.6562, "step": 6870 }, { "epoch": 0.06504103520413476, "grad_norm": 345.05059814453125, "learning_rate": 1.993569570646277e-06, "loss": 22.0938, "step": 6871 }, { "epoch": 0.06505050122584981, "grad_norm": 184.27740478515625, "learning_rate": 1.9935660989398096e-06, "loss": 19.0078, "step": 6872 }, { "epoch": 0.06505996724756487, "grad_norm": 329.0332336425781, "learning_rate": 1.993562626299454e-06, "loss": 26.1328, "step": 6873 }, { "epoch": 0.06506943326927991, "grad_norm": 3.336580276489258, "learning_rate": 1.9935591527252147e-06, "loss": 0.8638, "step": 6874 }, { "epoch": 0.06507889929099497, "grad_norm": 300.8702087402344, "learning_rate": 1.9935556782170943e-06, "loss": 21.9688, "step": 6875 }, { "epoch": 0.06508836531271003, "grad_norm": 764.0031127929688, "learning_rate": 1.993552202775096e-06, "loss": 46.9375, "step": 6876 }, { "epoch": 0.06509783133442508, "grad_norm": 385.5001525878906, "learning_rate": 1.9935487263992236e-06, "loss": 22.9922, "step": 6877 }, { "epoch": 0.06510729735614014, "grad_norm": 366.28759765625, "learning_rate": 1.99354524908948e-06, "loss": 26.4531, "step": 6878 }, { "epoch": 0.06511676337785519, "grad_norm": 435.46136474609375, "learning_rate": 1.9935417708458684e-06, "loss": 34.9062, "step": 6879 }, { "epoch": 0.06512622939957025, "grad_norm": 741.1604614257812, "learning_rate": 1.993538291668392e-06, "loss": 23.7734, "step": 6880 }, { "epoch": 0.06513569542128529, "grad_norm": 627.2677001953125, "learning_rate": 1.9935348115570544e-06, "loss": 28.3125, "step": 6881 }, { "epoch": 0.06514516144300035, "grad_norm": 247.19837951660156, "learning_rate": 1.9935313305118586e-06, "loss": 26.3594, "step": 6882 }, { "epoch": 0.0651546274647154, "grad_norm": 551.6948852539062, "learning_rate": 1.993527848532808e-06, "loss": 37.9688, "step": 6883 }, { "epoch": 0.06516409348643046, "grad_norm": 548.3753051757812, "learning_rate": 1.993524365619906e-06, "loss": 22.1172, "step": 6884 }, { "epoch": 0.06517355950814552, "grad_norm": 723.0301513671875, "learning_rate": 1.993520881773156e-06, "loss": 57.3594, "step": 6885 }, { "epoch": 0.06518302552986056, "grad_norm": 234.8803253173828, "learning_rate": 1.9935173969925605e-06, "loss": 20.2422, "step": 6886 }, { "epoch": 0.06519249155157562, "grad_norm": 242.2017059326172, "learning_rate": 1.9935139112781235e-06, "loss": 30.5, "step": 6887 }, { "epoch": 0.06520195757329067, "grad_norm": 461.2847900390625, "learning_rate": 1.993510424629848e-06, "loss": 34.7656, "step": 6888 }, { "epoch": 0.06521142359500573, "grad_norm": 147.1361846923828, "learning_rate": 1.993506937047737e-06, "loss": 22.9844, "step": 6889 }, { "epoch": 0.06522088961672078, "grad_norm": 349.9389343261719, "learning_rate": 1.9935034485317947e-06, "loss": 22.4805, "step": 6890 }, { "epoch": 0.06523035563843584, "grad_norm": 323.0074157714844, "learning_rate": 1.9934999590820233e-06, "loss": 21.0781, "step": 6891 }, { "epoch": 0.06523982166015088, "grad_norm": 325.5917053222656, "learning_rate": 1.993496468698427e-06, "loss": 33.7188, "step": 6892 }, { "epoch": 0.06524928768186594, "grad_norm": 553.8457641601562, "learning_rate": 1.9934929773810084e-06, "loss": 19.6641, "step": 6893 }, { "epoch": 0.065258753703581, "grad_norm": 301.255859375, "learning_rate": 1.9934894851297713e-06, "loss": 45.4688, "step": 6894 }, { "epoch": 0.06526821972529605, "grad_norm": 543.5813598632812, "learning_rate": 1.9934859919447185e-06, "loss": 36.7188, "step": 6895 }, { "epoch": 0.06527768574701111, "grad_norm": 3.636300563812256, "learning_rate": 1.9934824978258537e-06, "loss": 0.9507, "step": 6896 }, { "epoch": 0.06528715176872615, "grad_norm": 381.4173889160156, "learning_rate": 1.9934790027731796e-06, "loss": 23.0469, "step": 6897 }, { "epoch": 0.06529661779044121, "grad_norm": 435.506591796875, "learning_rate": 1.9934755067867e-06, "loss": 44.3203, "step": 6898 }, { "epoch": 0.06530608381215626, "grad_norm": 251.72735595703125, "learning_rate": 1.9934720098664178e-06, "loss": 31.5156, "step": 6899 }, { "epoch": 0.06531554983387132, "grad_norm": 237.38902282714844, "learning_rate": 1.9934685120123372e-06, "loss": 17.3828, "step": 6900 }, { "epoch": 0.06532501585558637, "grad_norm": 319.1595764160156, "learning_rate": 1.9934650132244604e-06, "loss": 29.7891, "step": 6901 }, { "epoch": 0.06533448187730143, "grad_norm": 297.1484680175781, "learning_rate": 1.9934615135027912e-06, "loss": 18.4922, "step": 6902 }, { "epoch": 0.06534394789901649, "grad_norm": 782.753173828125, "learning_rate": 1.993458012847333e-06, "loss": 59.8438, "step": 6903 }, { "epoch": 0.06535341392073153, "grad_norm": 249.27523803710938, "learning_rate": 1.9934545112580883e-06, "loss": 19.6484, "step": 6904 }, { "epoch": 0.06536287994244659, "grad_norm": 3.2020819187164307, "learning_rate": 1.9934510087350618e-06, "loss": 0.8584, "step": 6905 }, { "epoch": 0.06537234596416164, "grad_norm": 442.0157775878906, "learning_rate": 1.9934475052782555e-06, "loss": 37.7812, "step": 6906 }, { "epoch": 0.0653818119858767, "grad_norm": 480.3522033691406, "learning_rate": 1.9934440008876734e-06, "loss": 20.9453, "step": 6907 }, { "epoch": 0.06539127800759174, "grad_norm": 882.9713745117188, "learning_rate": 1.993440495563318e-06, "loss": 44.0312, "step": 6908 }, { "epoch": 0.0654007440293068, "grad_norm": 266.6661682128906, "learning_rate": 1.9934369893051937e-06, "loss": 21.9688, "step": 6909 }, { "epoch": 0.06541021005102186, "grad_norm": 428.4610595703125, "learning_rate": 1.993433482113303e-06, "loss": 35.4688, "step": 6910 }, { "epoch": 0.06541967607273691, "grad_norm": 205.68333435058594, "learning_rate": 1.9934299739876493e-06, "loss": 21.2109, "step": 6911 }, { "epoch": 0.06542914209445197, "grad_norm": 432.5670166015625, "learning_rate": 1.9934264649282364e-06, "loss": 21.3125, "step": 6912 }, { "epoch": 0.06543860811616702, "grad_norm": 410.7072448730469, "learning_rate": 1.9934229549350675e-06, "loss": 60.6328, "step": 6913 }, { "epoch": 0.06544807413788208, "grad_norm": 211.5836181640625, "learning_rate": 1.993419444008145e-06, "loss": 18.6719, "step": 6914 }, { "epoch": 0.06545754015959712, "grad_norm": 391.7228698730469, "learning_rate": 1.9934159321474736e-06, "loss": 24.1016, "step": 6915 }, { "epoch": 0.06546700618131218, "grad_norm": 156.01390075683594, "learning_rate": 1.9934124193530553e-06, "loss": 19.3281, "step": 6916 }, { "epoch": 0.06547647220302723, "grad_norm": 391.4064636230469, "learning_rate": 1.993408905624894e-06, "loss": 24.6328, "step": 6917 }, { "epoch": 0.06548593822474229, "grad_norm": 666.3419189453125, "learning_rate": 1.993405390962993e-06, "loss": 33.7188, "step": 6918 }, { "epoch": 0.06549540424645735, "grad_norm": 865.1968994140625, "learning_rate": 1.9934018753673556e-06, "loss": 45.8906, "step": 6919 }, { "epoch": 0.0655048702681724, "grad_norm": 625.274658203125, "learning_rate": 1.993398358837985e-06, "loss": 28.0273, "step": 6920 }, { "epoch": 0.06551433628988745, "grad_norm": 247.78314208984375, "learning_rate": 1.9933948413748846e-06, "loss": 23.2578, "step": 6921 }, { "epoch": 0.0655238023116025, "grad_norm": 285.0694274902344, "learning_rate": 1.993391322978058e-06, "loss": 21.5469, "step": 6922 }, { "epoch": 0.06553326833331756, "grad_norm": 485.5765075683594, "learning_rate": 1.9933878036475077e-06, "loss": 49.0547, "step": 6923 }, { "epoch": 0.0655427343550326, "grad_norm": 188.48890686035156, "learning_rate": 1.9933842833832376e-06, "loss": 19.875, "step": 6924 }, { "epoch": 0.06555220037674767, "grad_norm": 672.9385986328125, "learning_rate": 1.993380762185251e-06, "loss": 49.0469, "step": 6925 }, { "epoch": 0.06556166639846271, "grad_norm": 217.60626220703125, "learning_rate": 1.993377240053551e-06, "loss": 22.0547, "step": 6926 }, { "epoch": 0.06557113242017777, "grad_norm": 673.9539794921875, "learning_rate": 1.9933737169881412e-06, "loss": 66.7969, "step": 6927 }, { "epoch": 0.06558059844189283, "grad_norm": 433.0676574707031, "learning_rate": 1.9933701929890246e-06, "loss": 31.8438, "step": 6928 }, { "epoch": 0.06559006446360788, "grad_norm": 394.1878662109375, "learning_rate": 1.9933666680562045e-06, "loss": 25.8672, "step": 6929 }, { "epoch": 0.06559953048532294, "grad_norm": 407.73394775390625, "learning_rate": 1.9933631421896847e-06, "loss": 25.6172, "step": 6930 }, { "epoch": 0.06560899650703798, "grad_norm": 344.2967529296875, "learning_rate": 1.9933596153894683e-06, "loss": 22.1641, "step": 6931 }, { "epoch": 0.06561846252875304, "grad_norm": 212.59588623046875, "learning_rate": 1.9933560876555577e-06, "loss": 21.1094, "step": 6932 }, { "epoch": 0.06562792855046809, "grad_norm": 472.0190734863281, "learning_rate": 1.9933525589879575e-06, "loss": 33.6016, "step": 6933 }, { "epoch": 0.06563739457218315, "grad_norm": 345.7137756347656, "learning_rate": 1.9933490293866705e-06, "loss": 28.4844, "step": 6934 }, { "epoch": 0.0656468605938982, "grad_norm": 388.1624450683594, "learning_rate": 1.9933454988517e-06, "loss": 22.0, "step": 6935 }, { "epoch": 0.06565632661561326, "grad_norm": 257.78369140625, "learning_rate": 1.9933419673830494e-06, "loss": 23.2109, "step": 6936 }, { "epoch": 0.06566579263732832, "grad_norm": 293.7497253417969, "learning_rate": 1.993338434980722e-06, "loss": 21.3828, "step": 6937 }, { "epoch": 0.06567525865904336, "grad_norm": 372.8025207519531, "learning_rate": 1.993334901644721e-06, "loss": 30.1641, "step": 6938 }, { "epoch": 0.06568472468075842, "grad_norm": 335.0274963378906, "learning_rate": 1.9933313673750497e-06, "loss": 24.3906, "step": 6939 }, { "epoch": 0.06569419070247347, "grad_norm": 326.9030456542969, "learning_rate": 1.9933278321717116e-06, "loss": 34.625, "step": 6940 }, { "epoch": 0.06570365672418853, "grad_norm": 962.4365844726562, "learning_rate": 1.9933242960347102e-06, "loss": 22.1953, "step": 6941 }, { "epoch": 0.06571312274590357, "grad_norm": 987.5614624023438, "learning_rate": 1.9933207589640486e-06, "loss": 54.4922, "step": 6942 }, { "epoch": 0.06572258876761863, "grad_norm": 419.3966979980469, "learning_rate": 1.9933172209597297e-06, "loss": 25.2969, "step": 6943 }, { "epoch": 0.06573205478933368, "grad_norm": 3.0983834266662598, "learning_rate": 1.9933136820217576e-06, "loss": 0.9219, "step": 6944 }, { "epoch": 0.06574152081104874, "grad_norm": 201.05279541015625, "learning_rate": 1.993310142150135e-06, "loss": 20.9141, "step": 6945 }, { "epoch": 0.0657509868327638, "grad_norm": 1696.6710205078125, "learning_rate": 1.993306601344866e-06, "loss": 53.9688, "step": 6946 }, { "epoch": 0.06576045285447885, "grad_norm": 362.0982666015625, "learning_rate": 1.9933030596059528e-06, "loss": 34.0938, "step": 6947 }, { "epoch": 0.0657699188761939, "grad_norm": 148.44747924804688, "learning_rate": 1.9932995169333994e-06, "loss": 15.4219, "step": 6948 }, { "epoch": 0.06577938489790895, "grad_norm": 515.4299926757812, "learning_rate": 1.9932959733272094e-06, "loss": 36.6328, "step": 6949 }, { "epoch": 0.06578885091962401, "grad_norm": 540.5668334960938, "learning_rate": 1.9932924287873854e-06, "loss": 38.5859, "step": 6950 }, { "epoch": 0.06579831694133906, "grad_norm": 621.9428100585938, "learning_rate": 1.9932888833139312e-06, "loss": 48.7969, "step": 6951 }, { "epoch": 0.06580778296305412, "grad_norm": 308.141845703125, "learning_rate": 1.9932853369068506e-06, "loss": 28.9219, "step": 6952 }, { "epoch": 0.06581724898476918, "grad_norm": 491.7665710449219, "learning_rate": 1.9932817895661458e-06, "loss": 51.2031, "step": 6953 }, { "epoch": 0.06582671500648422, "grad_norm": 422.34124755859375, "learning_rate": 1.9932782412918213e-06, "loss": 28.375, "step": 6954 }, { "epoch": 0.06583618102819928, "grad_norm": 665.733642578125, "learning_rate": 1.9932746920838793e-06, "loss": 63.4062, "step": 6955 }, { "epoch": 0.06584564704991433, "grad_norm": 959.4545288085938, "learning_rate": 1.993271141942324e-06, "loss": 50.1406, "step": 6956 }, { "epoch": 0.06585511307162939, "grad_norm": 432.76641845703125, "learning_rate": 1.9932675908671585e-06, "loss": 45.2344, "step": 6957 }, { "epoch": 0.06586457909334444, "grad_norm": 312.65838623046875, "learning_rate": 1.9932640388583856e-06, "loss": 23.2812, "step": 6958 }, { "epoch": 0.0658740451150595, "grad_norm": 690.0021362304688, "learning_rate": 1.99326048591601e-06, "loss": 45.4375, "step": 6959 }, { "epoch": 0.06588351113677454, "grad_norm": 355.4925537109375, "learning_rate": 1.9932569320400334e-06, "loss": 26.7969, "step": 6960 }, { "epoch": 0.0658929771584896, "grad_norm": 384.564453125, "learning_rate": 1.99325337723046e-06, "loss": 11.6758, "step": 6961 }, { "epoch": 0.06590244318020466, "grad_norm": 877.8546142578125, "learning_rate": 1.9932498214872934e-06, "loss": 44.5781, "step": 6962 }, { "epoch": 0.06591190920191971, "grad_norm": 519.3259887695312, "learning_rate": 1.9932462648105363e-06, "loss": 28.6719, "step": 6963 }, { "epoch": 0.06592137522363477, "grad_norm": 211.10055541992188, "learning_rate": 1.9932427072001927e-06, "loss": 26.0781, "step": 6964 }, { "epoch": 0.06593084124534981, "grad_norm": 267.9244689941406, "learning_rate": 1.993239148656265e-06, "loss": 23.7578, "step": 6965 }, { "epoch": 0.06594030726706487, "grad_norm": 218.55584716796875, "learning_rate": 1.9932355891787575e-06, "loss": 21.3281, "step": 6966 }, { "epoch": 0.06594977328877992, "grad_norm": 246.0401611328125, "learning_rate": 1.993232028767673e-06, "loss": 21.9688, "step": 6967 }, { "epoch": 0.06595923931049498, "grad_norm": 912.5130004882812, "learning_rate": 1.993228467423015e-06, "loss": 56.1172, "step": 6968 }, { "epoch": 0.06596870533221003, "grad_norm": 360.392333984375, "learning_rate": 1.9932249051447866e-06, "loss": 26.7812, "step": 6969 }, { "epoch": 0.06597817135392509, "grad_norm": 449.2380676269531, "learning_rate": 1.993221341932992e-06, "loss": 19.543, "step": 6970 }, { "epoch": 0.06598763737564015, "grad_norm": 640.5650024414062, "learning_rate": 1.9932177777876337e-06, "loss": 24.2578, "step": 6971 }, { "epoch": 0.06599710339735519, "grad_norm": 285.7591552734375, "learning_rate": 1.993214212708715e-06, "loss": 27.8125, "step": 6972 }, { "epoch": 0.06600656941907025, "grad_norm": 510.09771728515625, "learning_rate": 1.99321064669624e-06, "loss": 56.6094, "step": 6973 }, { "epoch": 0.0660160354407853, "grad_norm": 347.7625732421875, "learning_rate": 1.9932070797502113e-06, "loss": 34.4141, "step": 6974 }, { "epoch": 0.06602550146250036, "grad_norm": 369.2689514160156, "learning_rate": 1.993203511870633e-06, "loss": 26.9141, "step": 6975 }, { "epoch": 0.0660349674842154, "grad_norm": 364.98651123046875, "learning_rate": 1.9931999430575073e-06, "loss": 18.2383, "step": 6976 }, { "epoch": 0.06604443350593046, "grad_norm": 426.84625244140625, "learning_rate": 1.9931963733108388e-06, "loss": 49.1562, "step": 6977 }, { "epoch": 0.06605389952764551, "grad_norm": 191.73077392578125, "learning_rate": 1.99319280263063e-06, "loss": 22.4922, "step": 6978 }, { "epoch": 0.06606336554936057, "grad_norm": 807.384521484375, "learning_rate": 1.993189231016885e-06, "loss": 76.25, "step": 6979 }, { "epoch": 0.06607283157107563, "grad_norm": 497.777099609375, "learning_rate": 1.993185658469606e-06, "loss": 40.4844, "step": 6980 }, { "epoch": 0.06608229759279068, "grad_norm": 524.8546752929688, "learning_rate": 1.993182084988798e-06, "loss": 43.2031, "step": 6981 }, { "epoch": 0.06609176361450574, "grad_norm": 488.0404968261719, "learning_rate": 1.993178510574463e-06, "loss": 54.4375, "step": 6982 }, { "epoch": 0.06610122963622078, "grad_norm": 308.05523681640625, "learning_rate": 1.9931749352266047e-06, "loss": 22.9375, "step": 6983 }, { "epoch": 0.06611069565793584, "grad_norm": 254.8191375732422, "learning_rate": 1.993171358945227e-06, "loss": 22.3359, "step": 6984 }, { "epoch": 0.06612016167965089, "grad_norm": 236.36924743652344, "learning_rate": 1.9931677817303327e-06, "loss": 20.5391, "step": 6985 }, { "epoch": 0.06612962770136595, "grad_norm": 349.7823791503906, "learning_rate": 1.993164203581925e-06, "loss": 11.6426, "step": 6986 }, { "epoch": 0.066139093723081, "grad_norm": 150.77593994140625, "learning_rate": 1.9931606245000075e-06, "loss": 19.7734, "step": 6987 }, { "epoch": 0.06614855974479605, "grad_norm": 494.4589538574219, "learning_rate": 1.993157044484584e-06, "loss": 55.6484, "step": 6988 }, { "epoch": 0.06615802576651111, "grad_norm": 302.33331298828125, "learning_rate": 1.9931534635356574e-06, "loss": 21.0312, "step": 6989 }, { "epoch": 0.06616749178822616, "grad_norm": 283.27392578125, "learning_rate": 1.993149881653231e-06, "loss": 23.3438, "step": 6990 }, { "epoch": 0.06617695780994122, "grad_norm": 387.9388122558594, "learning_rate": 1.993146298837308e-06, "loss": 45.2188, "step": 6991 }, { "epoch": 0.06618642383165627, "grad_norm": 454.2004089355469, "learning_rate": 1.9931427150878924e-06, "loss": 57.4375, "step": 6992 }, { "epoch": 0.06619588985337133, "grad_norm": 604.1514282226562, "learning_rate": 1.9931391304049875e-06, "loss": 32.4062, "step": 6993 }, { "epoch": 0.06620535587508637, "grad_norm": 617.65283203125, "learning_rate": 1.9931355447885964e-06, "loss": 55.5625, "step": 6994 }, { "epoch": 0.06621482189680143, "grad_norm": 461.3884582519531, "learning_rate": 1.9931319582387224e-06, "loss": 23.4844, "step": 6995 }, { "epoch": 0.06622428791851649, "grad_norm": 292.4773254394531, "learning_rate": 1.993128370755369e-06, "loss": 20.0703, "step": 6996 }, { "epoch": 0.06623375394023154, "grad_norm": 518.6929321289062, "learning_rate": 1.993124782338539e-06, "loss": 39.5234, "step": 6997 }, { "epoch": 0.0662432199619466, "grad_norm": 409.8071594238281, "learning_rate": 1.993121192988237e-06, "loss": 31.3125, "step": 6998 }, { "epoch": 0.06625268598366164, "grad_norm": 575.6433715820312, "learning_rate": 1.9931176027044656e-06, "loss": 58.3125, "step": 6999 }, { "epoch": 0.0662621520053767, "grad_norm": 190.38702392578125, "learning_rate": 1.993114011487228e-06, "loss": 20.8867, "step": 7000 }, { "epoch": 0.06627161802709175, "grad_norm": 3.7568228244781494, "learning_rate": 1.9931104193365277e-06, "loss": 0.9053, "step": 7001 }, { "epoch": 0.06628108404880681, "grad_norm": 302.6896057128906, "learning_rate": 1.993106826252369e-06, "loss": 25.1484, "step": 7002 }, { "epoch": 0.06629055007052186, "grad_norm": 251.35060119628906, "learning_rate": 1.993103232234754e-06, "loss": 22.2656, "step": 7003 }, { "epoch": 0.06630001609223692, "grad_norm": 537.5531616210938, "learning_rate": 1.993099637283686e-06, "loss": 35.7891, "step": 7004 }, { "epoch": 0.06630948211395198, "grad_norm": 669.38818359375, "learning_rate": 1.9930960413991697e-06, "loss": 39.5156, "step": 7005 }, { "epoch": 0.06631894813566702, "grad_norm": 265.6765441894531, "learning_rate": 1.9930924445812075e-06, "loss": 21.4609, "step": 7006 }, { "epoch": 0.06632841415738208, "grad_norm": 587.4703979492188, "learning_rate": 1.993088846829803e-06, "loss": 32.1562, "step": 7007 }, { "epoch": 0.06633788017909713, "grad_norm": 437.32659912109375, "learning_rate": 1.9930852481449594e-06, "loss": 22.0, "step": 7008 }, { "epoch": 0.06634734620081219, "grad_norm": 375.4875793457031, "learning_rate": 1.9930816485266804e-06, "loss": 18.2656, "step": 7009 }, { "epoch": 0.06635681222252723, "grad_norm": 279.2256164550781, "learning_rate": 1.9930780479749693e-06, "loss": 38.6953, "step": 7010 }, { "epoch": 0.0663662782442423, "grad_norm": 294.8179626464844, "learning_rate": 1.9930744464898293e-06, "loss": 11.1641, "step": 7011 }, { "epoch": 0.06637574426595734, "grad_norm": 278.2580871582031, "learning_rate": 1.9930708440712644e-06, "loss": 27.7891, "step": 7012 }, { "epoch": 0.0663852102876724, "grad_norm": 688.4192504882812, "learning_rate": 1.993067240719277e-06, "loss": 50.8594, "step": 7013 }, { "epoch": 0.06639467630938746, "grad_norm": 257.1264953613281, "learning_rate": 1.9930636364338713e-06, "loss": 20.6719, "step": 7014 }, { "epoch": 0.0664041423311025, "grad_norm": 389.9971923828125, "learning_rate": 1.9930600312150502e-06, "loss": 29.8125, "step": 7015 }, { "epoch": 0.06641360835281757, "grad_norm": 377.6248474121094, "learning_rate": 1.9930564250628175e-06, "loss": 50.6094, "step": 7016 }, { "epoch": 0.06642307437453261, "grad_norm": 617.25634765625, "learning_rate": 1.993052817977176e-06, "loss": 29.625, "step": 7017 }, { "epoch": 0.06643254039624767, "grad_norm": 274.4179382324219, "learning_rate": 1.9930492099581297e-06, "loss": 42.6562, "step": 7018 }, { "epoch": 0.06644200641796272, "grad_norm": 1021.3731689453125, "learning_rate": 1.9930456010056814e-06, "loss": 22.6992, "step": 7019 }, { "epoch": 0.06645147243967778, "grad_norm": 301.7784423828125, "learning_rate": 1.9930419911198354e-06, "loss": 25.6172, "step": 7020 }, { "epoch": 0.06646093846139282, "grad_norm": 253.46226501464844, "learning_rate": 1.9930383803005943e-06, "loss": 17.4141, "step": 7021 }, { "epoch": 0.06647040448310788, "grad_norm": 1057.086181640625, "learning_rate": 1.9930347685479614e-06, "loss": 28.1602, "step": 7022 }, { "epoch": 0.06647987050482294, "grad_norm": 724.939208984375, "learning_rate": 1.993031155861941e-06, "loss": 31.9219, "step": 7023 }, { "epoch": 0.06648933652653799, "grad_norm": 501.06048583984375, "learning_rate": 1.9930275422425356e-06, "loss": 45.7188, "step": 7024 }, { "epoch": 0.06649880254825305, "grad_norm": 489.389404296875, "learning_rate": 1.993023927689749e-06, "loss": 48.4375, "step": 7025 }, { "epoch": 0.0665082685699681, "grad_norm": 3.8224358558654785, "learning_rate": 1.993020312203584e-06, "loss": 1.0225, "step": 7026 }, { "epoch": 0.06651773459168316, "grad_norm": 569.6651611328125, "learning_rate": 1.9930166957840454e-06, "loss": 56.3906, "step": 7027 }, { "epoch": 0.0665272006133982, "grad_norm": 430.2263488769531, "learning_rate": 1.993013078431135e-06, "loss": 19.7734, "step": 7028 }, { "epoch": 0.06653666663511326, "grad_norm": 962.0816040039062, "learning_rate": 1.9930094601448573e-06, "loss": 36.6953, "step": 7029 }, { "epoch": 0.06654613265682831, "grad_norm": 427.25726318359375, "learning_rate": 1.993005840925215e-06, "loss": 30.7422, "step": 7030 }, { "epoch": 0.06655559867854337, "grad_norm": 292.204345703125, "learning_rate": 1.9930022207722122e-06, "loss": 33.2695, "step": 7031 }, { "epoch": 0.06656506470025843, "grad_norm": 904.6544799804688, "learning_rate": 1.992998599685852e-06, "loss": 63.2188, "step": 7032 }, { "epoch": 0.06657453072197347, "grad_norm": 444.1575927734375, "learning_rate": 1.992994977666137e-06, "loss": 25.3438, "step": 7033 }, { "epoch": 0.06658399674368853, "grad_norm": 1465.41064453125, "learning_rate": 1.9929913547130717e-06, "loss": 35.1641, "step": 7034 }, { "epoch": 0.06659346276540358, "grad_norm": 3.2639920711517334, "learning_rate": 1.9929877308266592e-06, "loss": 1.0557, "step": 7035 }, { "epoch": 0.06660292878711864, "grad_norm": 564.2757568359375, "learning_rate": 1.9929841060069024e-06, "loss": 35.8828, "step": 7036 }, { "epoch": 0.06661239480883369, "grad_norm": 202.95999145507812, "learning_rate": 1.9929804802538055e-06, "loss": 21.0156, "step": 7037 }, { "epoch": 0.06662186083054875, "grad_norm": 228.28411865234375, "learning_rate": 1.992976853567372e-06, "loss": 18.5078, "step": 7038 }, { "epoch": 0.0666313268522638, "grad_norm": 825.8689575195312, "learning_rate": 1.992973225947604e-06, "loss": 64.4375, "step": 7039 }, { "epoch": 0.06664079287397885, "grad_norm": 208.26629638671875, "learning_rate": 1.992969597394507e-06, "loss": 23.875, "step": 7040 }, { "epoch": 0.06665025889569391, "grad_norm": 764.2674560546875, "learning_rate": 1.9929659679080817e-06, "loss": 47.9766, "step": 7041 }, { "epoch": 0.06665972491740896, "grad_norm": 407.3411865234375, "learning_rate": 1.992962337488334e-06, "loss": 30.4453, "step": 7042 }, { "epoch": 0.06666919093912402, "grad_norm": 507.7070007324219, "learning_rate": 1.992958706135266e-06, "loss": 56.4375, "step": 7043 }, { "epoch": 0.06667865696083906, "grad_norm": 372.09124755859375, "learning_rate": 1.992955073848881e-06, "loss": 29.1953, "step": 7044 }, { "epoch": 0.06668812298255412, "grad_norm": 849.21875, "learning_rate": 1.9929514406291835e-06, "loss": 42.5, "step": 7045 }, { "epoch": 0.06669758900426917, "grad_norm": 603.02587890625, "learning_rate": 1.9929478064761757e-06, "loss": 48.3125, "step": 7046 }, { "epoch": 0.06670705502598423, "grad_norm": 407.5218200683594, "learning_rate": 1.992944171389862e-06, "loss": 65.7344, "step": 7047 }, { "epoch": 0.06671652104769929, "grad_norm": 708.0255737304688, "learning_rate": 1.992940535370245e-06, "loss": 51.9688, "step": 7048 }, { "epoch": 0.06672598706941434, "grad_norm": 2.991180896759033, "learning_rate": 1.9929368984173287e-06, "loss": 0.7224, "step": 7049 }, { "epoch": 0.0667354530911294, "grad_norm": 2.900747537612915, "learning_rate": 1.992933260531116e-06, "loss": 1.0098, "step": 7050 }, { "epoch": 0.06674491911284444, "grad_norm": 160.05116271972656, "learning_rate": 1.9929296217116112e-06, "loss": 18.4766, "step": 7051 }, { "epoch": 0.0667543851345595, "grad_norm": 366.960693359375, "learning_rate": 1.992925981958817e-06, "loss": 23.1406, "step": 7052 }, { "epoch": 0.06676385115627455, "grad_norm": 187.15057373046875, "learning_rate": 1.9929223412727366e-06, "loss": 24.7969, "step": 7053 }, { "epoch": 0.06677331717798961, "grad_norm": 226.2719268798828, "learning_rate": 1.992918699653374e-06, "loss": 25.3828, "step": 7054 }, { "epoch": 0.06678278319970465, "grad_norm": 322.85626220703125, "learning_rate": 1.9929150571007324e-06, "loss": 32.8281, "step": 7055 }, { "epoch": 0.06679224922141971, "grad_norm": 702.88623046875, "learning_rate": 1.9929114136148153e-06, "loss": 60.7031, "step": 7056 }, { "epoch": 0.06680171524313477, "grad_norm": 381.409423828125, "learning_rate": 1.9929077691956258e-06, "loss": 37.0781, "step": 7057 }, { "epoch": 0.06681118126484982, "grad_norm": 383.01165771484375, "learning_rate": 1.9929041238431678e-06, "loss": 31.375, "step": 7058 }, { "epoch": 0.06682064728656488, "grad_norm": 289.4263610839844, "learning_rate": 1.9929004775574446e-06, "loss": 32.875, "step": 7059 }, { "epoch": 0.06683011330827993, "grad_norm": 293.668701171875, "learning_rate": 1.9928968303384593e-06, "loss": 14.9883, "step": 7060 }, { "epoch": 0.06683957932999499, "grad_norm": 249.4683837890625, "learning_rate": 1.992893182186216e-06, "loss": 21.6172, "step": 7061 }, { "epoch": 0.06684904535171003, "grad_norm": 166.2999267578125, "learning_rate": 1.9928895331007174e-06, "loss": 20.4219, "step": 7062 }, { "epoch": 0.06685851137342509, "grad_norm": 282.2184753417969, "learning_rate": 1.992885883081967e-06, "loss": 22.0391, "step": 7063 }, { "epoch": 0.06686797739514014, "grad_norm": 385.16790771484375, "learning_rate": 1.992882232129969e-06, "loss": 29.2656, "step": 7064 }, { "epoch": 0.0668774434168552, "grad_norm": 520.1531372070312, "learning_rate": 1.9928785802447263e-06, "loss": 29.0625, "step": 7065 }, { "epoch": 0.06688690943857026, "grad_norm": 1207.67431640625, "learning_rate": 1.9928749274262416e-06, "loss": 72.3281, "step": 7066 }, { "epoch": 0.0668963754602853, "grad_norm": 181.7728271484375, "learning_rate": 1.9928712736745197e-06, "loss": 18.0234, "step": 7067 }, { "epoch": 0.06690584148200036, "grad_norm": 626.3968505859375, "learning_rate": 1.992867618989563e-06, "loss": 22.375, "step": 7068 }, { "epoch": 0.06691530750371541, "grad_norm": 245.33677673339844, "learning_rate": 1.992863963371376e-06, "loss": 22.9922, "step": 7069 }, { "epoch": 0.06692477352543047, "grad_norm": 877.3176879882812, "learning_rate": 1.9928603068199607e-06, "loss": 36.7344, "step": 7070 }, { "epoch": 0.06693423954714552, "grad_norm": 309.2678527832031, "learning_rate": 1.9928566493353215e-06, "loss": 29.9062, "step": 7071 }, { "epoch": 0.06694370556886058, "grad_norm": 229.0395050048828, "learning_rate": 1.992852990917462e-06, "loss": 21.1484, "step": 7072 }, { "epoch": 0.06695317159057562, "grad_norm": 980.53271484375, "learning_rate": 1.992849331566385e-06, "loss": 25.6562, "step": 7073 }, { "epoch": 0.06696263761229068, "grad_norm": 831.7014770507812, "learning_rate": 1.992845671282094e-06, "loss": 67.8906, "step": 7074 }, { "epoch": 0.06697210363400574, "grad_norm": 483.8699035644531, "learning_rate": 1.992842010064593e-06, "loss": 45.9062, "step": 7075 }, { "epoch": 0.06698156965572079, "grad_norm": 212.92527770996094, "learning_rate": 1.992838347913885e-06, "loss": 23.9062, "step": 7076 }, { "epoch": 0.06699103567743585, "grad_norm": 241.62460327148438, "learning_rate": 1.9928346848299735e-06, "loss": 19.9922, "step": 7077 }, { "epoch": 0.0670005016991509, "grad_norm": 220.1125946044922, "learning_rate": 1.992831020812862e-06, "loss": 16.4922, "step": 7078 }, { "epoch": 0.06700996772086595, "grad_norm": 540.07275390625, "learning_rate": 1.992827355862554e-06, "loss": 66.9219, "step": 7079 }, { "epoch": 0.067019433742581, "grad_norm": 571.005859375, "learning_rate": 1.992823689979053e-06, "loss": 33.25, "step": 7080 }, { "epoch": 0.06702889976429606, "grad_norm": 545.5924072265625, "learning_rate": 1.992820023162362e-06, "loss": 53.7031, "step": 7081 }, { "epoch": 0.06703836578601112, "grad_norm": 301.46270751953125, "learning_rate": 1.9928163554124846e-06, "loss": 26.0, "step": 7082 }, { "epoch": 0.06704783180772617, "grad_norm": 423.6510314941406, "learning_rate": 1.9928126867294252e-06, "loss": 43.6562, "step": 7083 }, { "epoch": 0.06705729782944123, "grad_norm": 278.29559326171875, "learning_rate": 1.992809017113186e-06, "loss": 22.5625, "step": 7084 }, { "epoch": 0.06706676385115627, "grad_norm": 289.5505065917969, "learning_rate": 1.9928053465637706e-06, "loss": 30.6484, "step": 7085 }, { "epoch": 0.06707622987287133, "grad_norm": 1359.153564453125, "learning_rate": 1.992801675081183e-06, "loss": 29.2656, "step": 7086 }, { "epoch": 0.06708569589458638, "grad_norm": 370.3383483886719, "learning_rate": 1.9927980026654265e-06, "loss": 36.3281, "step": 7087 }, { "epoch": 0.06709516191630144, "grad_norm": 3.362656354904175, "learning_rate": 1.9927943293165043e-06, "loss": 0.9775, "step": 7088 }, { "epoch": 0.06710462793801648, "grad_norm": 275.8749694824219, "learning_rate": 1.9927906550344203e-06, "loss": 35.1562, "step": 7089 }, { "epoch": 0.06711409395973154, "grad_norm": 411.8811950683594, "learning_rate": 1.9927869798191775e-06, "loss": 38.2812, "step": 7090 }, { "epoch": 0.0671235599814466, "grad_norm": 655.517333984375, "learning_rate": 1.9927833036707797e-06, "loss": 47.8828, "step": 7091 }, { "epoch": 0.06713302600316165, "grad_norm": 229.78744506835938, "learning_rate": 1.99277962658923e-06, "loss": 26.1719, "step": 7092 }, { "epoch": 0.06714249202487671, "grad_norm": 3.9210939407348633, "learning_rate": 1.9927759485745316e-06, "loss": 1.0649, "step": 7093 }, { "epoch": 0.06715195804659176, "grad_norm": 740.2672119140625, "learning_rate": 1.992772269626689e-06, "loss": 38.75, "step": 7094 }, { "epoch": 0.06716142406830682, "grad_norm": 190.00653076171875, "learning_rate": 1.992768589745705e-06, "loss": 23.3281, "step": 7095 }, { "epoch": 0.06717089009002186, "grad_norm": 503.8719787597656, "learning_rate": 1.992764908931583e-06, "loss": 29.1875, "step": 7096 }, { "epoch": 0.06718035611173692, "grad_norm": 430.98345947265625, "learning_rate": 1.9927612271843265e-06, "loss": 29.1797, "step": 7097 }, { "epoch": 0.06718982213345197, "grad_norm": 433.6294250488281, "learning_rate": 1.9927575445039392e-06, "loss": 51.2812, "step": 7098 }, { "epoch": 0.06719928815516703, "grad_norm": 2088.318115234375, "learning_rate": 1.992753860890424e-06, "loss": 25.5, "step": 7099 }, { "epoch": 0.06720875417688209, "grad_norm": 212.83609008789062, "learning_rate": 1.992750176343785e-06, "loss": 21.4453, "step": 7100 }, { "epoch": 0.06721822019859713, "grad_norm": 302.50738525390625, "learning_rate": 1.9927464908640256e-06, "loss": 21.2969, "step": 7101 }, { "epoch": 0.0672276862203122, "grad_norm": 387.2843017578125, "learning_rate": 1.992742804451149e-06, "loss": 24.043, "step": 7102 }, { "epoch": 0.06723715224202724, "grad_norm": 282.415283203125, "learning_rate": 1.9927391171051586e-06, "loss": 20.0352, "step": 7103 }, { "epoch": 0.0672466182637423, "grad_norm": 1269.8505859375, "learning_rate": 1.9927354288260585e-06, "loss": 39.5156, "step": 7104 }, { "epoch": 0.06725608428545735, "grad_norm": 228.5485382080078, "learning_rate": 1.992731739613851e-06, "loss": 15.7578, "step": 7105 }, { "epoch": 0.0672655503071724, "grad_norm": 312.9607238769531, "learning_rate": 1.9927280494685405e-06, "loss": 26.4453, "step": 7106 }, { "epoch": 0.06727501632888745, "grad_norm": 2.8089849948883057, "learning_rate": 1.9927243583901303e-06, "loss": 0.8828, "step": 7107 }, { "epoch": 0.06728448235060251, "grad_norm": 3.514019250869751, "learning_rate": 1.9927206663786235e-06, "loss": 0.9204, "step": 7108 }, { "epoch": 0.06729394837231757, "grad_norm": 347.05035400390625, "learning_rate": 1.9927169734340244e-06, "loss": 29.1875, "step": 7109 }, { "epoch": 0.06730341439403262, "grad_norm": 572.0265502929688, "learning_rate": 1.9927132795563355e-06, "loss": 20.4609, "step": 7110 }, { "epoch": 0.06731288041574768, "grad_norm": 846.1424560546875, "learning_rate": 1.992709584745561e-06, "loss": 38.2109, "step": 7111 }, { "epoch": 0.06732234643746272, "grad_norm": 235.14089965820312, "learning_rate": 1.9927058890017035e-06, "loss": 17.0117, "step": 7112 }, { "epoch": 0.06733181245917778, "grad_norm": 803.0020751953125, "learning_rate": 1.9927021923247676e-06, "loss": 38.2656, "step": 7113 }, { "epoch": 0.06734127848089283, "grad_norm": 781.7423095703125, "learning_rate": 1.992698494714756e-06, "loss": 45.6094, "step": 7114 }, { "epoch": 0.06735074450260789, "grad_norm": 372.0160217285156, "learning_rate": 1.9926947961716726e-06, "loss": 25.1797, "step": 7115 }, { "epoch": 0.06736021052432294, "grad_norm": 777.1068115234375, "learning_rate": 1.9926910966955202e-06, "loss": 21.3906, "step": 7116 }, { "epoch": 0.067369676546038, "grad_norm": 255.51487731933594, "learning_rate": 1.9926873962863035e-06, "loss": 19.1094, "step": 7117 }, { "epoch": 0.06737914256775306, "grad_norm": 359.28350830078125, "learning_rate": 1.9926836949440245e-06, "loss": 24.5625, "step": 7118 }, { "epoch": 0.0673886085894681, "grad_norm": 582.5042114257812, "learning_rate": 1.992679992668688e-06, "loss": 42.9219, "step": 7119 }, { "epoch": 0.06739807461118316, "grad_norm": 533.2239379882812, "learning_rate": 1.992676289460296e-06, "loss": 30.7422, "step": 7120 }, { "epoch": 0.06740754063289821, "grad_norm": 4.044207572937012, "learning_rate": 1.9926725853188537e-06, "loss": 0.8438, "step": 7121 }, { "epoch": 0.06741700665461327, "grad_norm": 517.273193359375, "learning_rate": 1.9926688802443633e-06, "loss": 52.6719, "step": 7122 }, { "epoch": 0.06742647267632831, "grad_norm": 232.94125366210938, "learning_rate": 1.992665174236829e-06, "loss": 24.875, "step": 7123 }, { "epoch": 0.06743593869804337, "grad_norm": 194.4345703125, "learning_rate": 1.992661467296254e-06, "loss": 20.125, "step": 7124 }, { "epoch": 0.06744540471975843, "grad_norm": 807.0479125976562, "learning_rate": 1.9926577594226417e-06, "loss": 45.6328, "step": 7125 }, { "epoch": 0.06745487074147348, "grad_norm": 483.4369812011719, "learning_rate": 1.9926540506159958e-06, "loss": 15.3906, "step": 7126 }, { "epoch": 0.06746433676318854, "grad_norm": 829.7083129882812, "learning_rate": 1.9926503408763194e-06, "loss": 49.2188, "step": 7127 }, { "epoch": 0.06747380278490359, "grad_norm": 757.771240234375, "learning_rate": 1.9926466302036163e-06, "loss": 38.6758, "step": 7128 }, { "epoch": 0.06748326880661865, "grad_norm": 221.250244140625, "learning_rate": 1.9926429185978903e-06, "loss": 20.0625, "step": 7129 }, { "epoch": 0.06749273482833369, "grad_norm": 540.7989501953125, "learning_rate": 1.9926392060591444e-06, "loss": 52.1094, "step": 7130 }, { "epoch": 0.06750220085004875, "grad_norm": 609.892333984375, "learning_rate": 1.992635492587382e-06, "loss": 44.3281, "step": 7131 }, { "epoch": 0.0675116668717638, "grad_norm": 210.81375122070312, "learning_rate": 1.992631778182607e-06, "loss": 9.3203, "step": 7132 }, { "epoch": 0.06752113289347886, "grad_norm": 683.7275390625, "learning_rate": 1.9926280628448223e-06, "loss": 52.4375, "step": 7133 }, { "epoch": 0.06753059891519392, "grad_norm": 638.9544067382812, "learning_rate": 1.9926243465740323e-06, "loss": 23.4062, "step": 7134 }, { "epoch": 0.06754006493690896, "grad_norm": 364.5428161621094, "learning_rate": 1.9926206293702398e-06, "loss": 14.2891, "step": 7135 }, { "epoch": 0.06754953095862402, "grad_norm": 3.409395694732666, "learning_rate": 1.9926169112334485e-06, "loss": 0.8457, "step": 7136 }, { "epoch": 0.06755899698033907, "grad_norm": 1235.0084228515625, "learning_rate": 1.9926131921636618e-06, "loss": 62.8594, "step": 7137 }, { "epoch": 0.06756846300205413, "grad_norm": 361.5188903808594, "learning_rate": 1.9926094721608836e-06, "loss": 38.0781, "step": 7138 }, { "epoch": 0.06757792902376918, "grad_norm": 371.7429504394531, "learning_rate": 1.9926057512251168e-06, "loss": 13.4531, "step": 7139 }, { "epoch": 0.06758739504548424, "grad_norm": 380.7269287109375, "learning_rate": 1.992602029356365e-06, "loss": 21.6484, "step": 7140 }, { "epoch": 0.06759686106719928, "grad_norm": 403.5377197265625, "learning_rate": 1.9925983065546323e-06, "loss": 40.3281, "step": 7141 }, { "epoch": 0.06760632708891434, "grad_norm": 1022.5817260742188, "learning_rate": 1.9925945828199214e-06, "loss": 25.8359, "step": 7142 }, { "epoch": 0.0676157931106294, "grad_norm": 239.7090301513672, "learning_rate": 1.9925908581522362e-06, "loss": 20.3203, "step": 7143 }, { "epoch": 0.06762525913234445, "grad_norm": 526.4724731445312, "learning_rate": 1.9925871325515803e-06, "loss": 32.5234, "step": 7144 }, { "epoch": 0.06763472515405951, "grad_norm": 578.18505859375, "learning_rate": 1.992583406017957e-06, "loss": 36.0781, "step": 7145 }, { "epoch": 0.06764419117577455, "grad_norm": 435.55755615234375, "learning_rate": 1.99257967855137e-06, "loss": 27.6719, "step": 7146 }, { "epoch": 0.06765365719748961, "grad_norm": 372.4281311035156, "learning_rate": 1.9925759501518226e-06, "loss": 42.8594, "step": 7147 }, { "epoch": 0.06766312321920466, "grad_norm": 614.924072265625, "learning_rate": 1.9925722208193183e-06, "loss": 53.125, "step": 7148 }, { "epoch": 0.06767258924091972, "grad_norm": 465.9676513671875, "learning_rate": 1.9925684905538605e-06, "loss": 40.6094, "step": 7149 }, { "epoch": 0.06768205526263477, "grad_norm": 388.4211730957031, "learning_rate": 1.9925647593554535e-06, "loss": 19.7266, "step": 7150 }, { "epoch": 0.06769152128434983, "grad_norm": 314.0950622558594, "learning_rate": 1.9925610272240995e-06, "loss": 21.0938, "step": 7151 }, { "epoch": 0.06770098730606489, "grad_norm": 461.9523010253906, "learning_rate": 1.9925572941598034e-06, "loss": 16.5586, "step": 7152 }, { "epoch": 0.06771045332777993, "grad_norm": 289.6257019042969, "learning_rate": 1.9925535601625675e-06, "loss": 23.4062, "step": 7153 }, { "epoch": 0.06771991934949499, "grad_norm": 874.1072998046875, "learning_rate": 1.992549825232396e-06, "loss": 35.7734, "step": 7154 }, { "epoch": 0.06772938537121004, "grad_norm": 371.47662353515625, "learning_rate": 1.992546089369292e-06, "loss": 24.0625, "step": 7155 }, { "epoch": 0.0677388513929251, "grad_norm": 577.498779296875, "learning_rate": 1.9925423525732595e-06, "loss": 44.4531, "step": 7156 }, { "epoch": 0.06774831741464014, "grad_norm": 392.4335021972656, "learning_rate": 1.9925386148443018e-06, "loss": 33.7891, "step": 7157 }, { "epoch": 0.0677577834363552, "grad_norm": 275.5910949707031, "learning_rate": 1.992534876182422e-06, "loss": 19.3359, "step": 7158 }, { "epoch": 0.06776724945807025, "grad_norm": 422.3395690917969, "learning_rate": 1.9925311365876243e-06, "loss": 32.5312, "step": 7159 }, { "epoch": 0.06777671547978531, "grad_norm": 261.5928649902344, "learning_rate": 1.9925273960599117e-06, "loss": 23.0703, "step": 7160 }, { "epoch": 0.06778618150150037, "grad_norm": 623.2176513671875, "learning_rate": 1.992523654599288e-06, "loss": 54.2812, "step": 7161 }, { "epoch": 0.06779564752321542, "grad_norm": 333.0947570800781, "learning_rate": 1.992519912205757e-06, "loss": 21.0625, "step": 7162 }, { "epoch": 0.06780511354493048, "grad_norm": 267.3924255371094, "learning_rate": 1.9925161688793216e-06, "loss": 19.9375, "step": 7163 }, { "epoch": 0.06781457956664552, "grad_norm": 558.1334228515625, "learning_rate": 1.9925124246199855e-06, "loss": 39.3438, "step": 7164 }, { "epoch": 0.06782404558836058, "grad_norm": 952.8624267578125, "learning_rate": 1.9925086794277523e-06, "loss": 38.1953, "step": 7165 }, { "epoch": 0.06783351161007563, "grad_norm": 3.462919235229492, "learning_rate": 1.992504933302626e-06, "loss": 0.8179, "step": 7166 }, { "epoch": 0.06784297763179069, "grad_norm": 3.3259918689727783, "learning_rate": 1.9925011862446086e-06, "loss": 0.9321, "step": 7167 }, { "epoch": 0.06785244365350575, "grad_norm": 254.92205810546875, "learning_rate": 1.9924974382537052e-06, "loss": 29.75, "step": 7168 }, { "epoch": 0.0678619096752208, "grad_norm": 368.19134521484375, "learning_rate": 1.9924936893299188e-06, "loss": 23.1484, "step": 7169 }, { "epoch": 0.06787137569693585, "grad_norm": 571.3197631835938, "learning_rate": 1.992489939473253e-06, "loss": 43.0156, "step": 7170 }, { "epoch": 0.0678808417186509, "grad_norm": 419.21783447265625, "learning_rate": 1.992486188683711e-06, "loss": 30.75, "step": 7171 }, { "epoch": 0.06789030774036596, "grad_norm": 3.515363931655884, "learning_rate": 1.9924824369612966e-06, "loss": 0.8833, "step": 7172 }, { "epoch": 0.067899773762081, "grad_norm": 275.60418701171875, "learning_rate": 1.9924786843060133e-06, "loss": 17.3672, "step": 7173 }, { "epoch": 0.06790923978379607, "grad_norm": 670.97021484375, "learning_rate": 1.9924749307178645e-06, "loss": 50.6797, "step": 7174 }, { "epoch": 0.06791870580551111, "grad_norm": 358.71697998046875, "learning_rate": 1.992471176196854e-06, "loss": 22.9062, "step": 7175 }, { "epoch": 0.06792817182722617, "grad_norm": 1319.7027587890625, "learning_rate": 1.992467420742985e-06, "loss": 42.7031, "step": 7176 }, { "epoch": 0.06793763784894123, "grad_norm": 201.94937133789062, "learning_rate": 1.9924636643562615e-06, "loss": 26.375, "step": 7177 }, { "epoch": 0.06794710387065628, "grad_norm": 525.1111450195312, "learning_rate": 1.992459907036686e-06, "loss": 38.1875, "step": 7178 }, { "epoch": 0.06795656989237134, "grad_norm": 249.53758239746094, "learning_rate": 1.9924561487842637e-06, "loss": 26.0625, "step": 7179 }, { "epoch": 0.06796603591408638, "grad_norm": 318.7208557128906, "learning_rate": 1.9924523895989967e-06, "loss": 22.0625, "step": 7180 }, { "epoch": 0.06797550193580144, "grad_norm": 3.792858839035034, "learning_rate": 1.9924486294808893e-06, "loss": 0.8296, "step": 7181 }, { "epoch": 0.06798496795751649, "grad_norm": 497.7381896972656, "learning_rate": 1.992444868429944e-06, "loss": 47.6797, "step": 7182 }, { "epoch": 0.06799443397923155, "grad_norm": 726.7936401367188, "learning_rate": 1.9924411064461657e-06, "loss": 57.7812, "step": 7183 }, { "epoch": 0.0680039000009466, "grad_norm": 464.4272766113281, "learning_rate": 1.992437343529557e-06, "loss": 38.2969, "step": 7184 }, { "epoch": 0.06801336602266166, "grad_norm": 920.5757446289062, "learning_rate": 1.9924335796801223e-06, "loss": 39.6875, "step": 7185 }, { "epoch": 0.06802283204437672, "grad_norm": 572.5682983398438, "learning_rate": 1.9924298148978644e-06, "loss": 48.5, "step": 7186 }, { "epoch": 0.06803229806609176, "grad_norm": 248.76382446289062, "learning_rate": 1.992426049182787e-06, "loss": 28.6562, "step": 7187 }, { "epoch": 0.06804176408780682, "grad_norm": 796.7581176757812, "learning_rate": 1.9924222825348936e-06, "loss": 23.3672, "step": 7188 }, { "epoch": 0.06805123010952187, "grad_norm": 244.17369079589844, "learning_rate": 1.9924185149541875e-06, "loss": 24.7969, "step": 7189 }, { "epoch": 0.06806069613123693, "grad_norm": 288.3608703613281, "learning_rate": 1.9924147464406732e-06, "loss": 19.8828, "step": 7190 }, { "epoch": 0.06807016215295197, "grad_norm": 454.4216003417969, "learning_rate": 1.992410976994353e-06, "loss": 32.1875, "step": 7191 }, { "epoch": 0.06807962817466703, "grad_norm": 224.9293670654297, "learning_rate": 1.9924072066152316e-06, "loss": 21.375, "step": 7192 }, { "epoch": 0.06808909419638208, "grad_norm": 346.10150146484375, "learning_rate": 1.9924034353033115e-06, "loss": 22.4531, "step": 7193 }, { "epoch": 0.06809856021809714, "grad_norm": 205.3007049560547, "learning_rate": 1.992399663058597e-06, "loss": 26.75, "step": 7194 }, { "epoch": 0.0681080262398122, "grad_norm": 1220.62451171875, "learning_rate": 1.9923958898810913e-06, "loss": 54.4219, "step": 7195 }, { "epoch": 0.06811749226152725, "grad_norm": 644.1692504882812, "learning_rate": 1.992392115770798e-06, "loss": 28.8281, "step": 7196 }, { "epoch": 0.0681269582832423, "grad_norm": 223.60032653808594, "learning_rate": 1.9923883407277206e-06, "loss": 16.6797, "step": 7197 }, { "epoch": 0.06813642430495735, "grad_norm": 220.91775512695312, "learning_rate": 1.992384564751863e-06, "loss": 18.9766, "step": 7198 }, { "epoch": 0.06814589032667241, "grad_norm": 177.74273681640625, "learning_rate": 1.992380787843228e-06, "loss": 20.6953, "step": 7199 }, { "epoch": 0.06815535634838746, "grad_norm": 3.635741710662842, "learning_rate": 1.99237701000182e-06, "loss": 1.1011, "step": 7200 }, { "epoch": 0.06816482237010252, "grad_norm": 207.12049865722656, "learning_rate": 1.992373231227642e-06, "loss": 19.0469, "step": 7201 }, { "epoch": 0.06817428839181756, "grad_norm": 211.16168212890625, "learning_rate": 1.992369451520698e-06, "loss": 20.5156, "step": 7202 }, { "epoch": 0.06818375441353262, "grad_norm": 446.1520690917969, "learning_rate": 1.992365670880991e-06, "loss": 16.1836, "step": 7203 }, { "epoch": 0.06819322043524768, "grad_norm": 292.8475646972656, "learning_rate": 1.992361889308525e-06, "loss": 20.375, "step": 7204 }, { "epoch": 0.06820268645696273, "grad_norm": 403.8297424316406, "learning_rate": 1.9923581068033032e-06, "loss": 46.1719, "step": 7205 }, { "epoch": 0.06821215247867779, "grad_norm": 488.4284973144531, "learning_rate": 1.9923543233653293e-06, "loss": 45.2031, "step": 7206 }, { "epoch": 0.06822161850039284, "grad_norm": 703.1409301757812, "learning_rate": 1.992350538994607e-06, "loss": 17.5039, "step": 7207 }, { "epoch": 0.0682310845221079, "grad_norm": 315.4235534667969, "learning_rate": 1.9923467536911395e-06, "loss": 12.1426, "step": 7208 }, { "epoch": 0.06824055054382294, "grad_norm": 241.82199096679688, "learning_rate": 1.992342967454931e-06, "loss": 23.5781, "step": 7209 }, { "epoch": 0.068250016565538, "grad_norm": 490.59698486328125, "learning_rate": 1.9923391802859844e-06, "loss": 65.707, "step": 7210 }, { "epoch": 0.06825948258725306, "grad_norm": 618.9129028320312, "learning_rate": 1.992335392184304e-06, "loss": 25.5625, "step": 7211 }, { "epoch": 0.06826894860896811, "grad_norm": 549.33154296875, "learning_rate": 1.992331603149892e-06, "loss": 35.1797, "step": 7212 }, { "epoch": 0.06827841463068317, "grad_norm": 2.9776875972747803, "learning_rate": 1.992327813182754e-06, "loss": 0.9795, "step": 7213 }, { "epoch": 0.06828788065239821, "grad_norm": 465.29010009765625, "learning_rate": 1.9923240222828913e-06, "loss": 31.0859, "step": 7214 }, { "epoch": 0.06829734667411327, "grad_norm": 164.41929626464844, "learning_rate": 1.9923202304503093e-06, "loss": 22.1406, "step": 7215 }, { "epoch": 0.06830681269582832, "grad_norm": 421.6223449707031, "learning_rate": 1.9923164376850106e-06, "loss": 42.9609, "step": 7216 }, { "epoch": 0.06831627871754338, "grad_norm": 405.4000244140625, "learning_rate": 1.992312643986999e-06, "loss": 63.7812, "step": 7217 }, { "epoch": 0.06832574473925843, "grad_norm": 171.373046875, "learning_rate": 1.992308849356278e-06, "loss": 23.4375, "step": 7218 }, { "epoch": 0.06833521076097349, "grad_norm": 632.029052734375, "learning_rate": 1.9923050537928516e-06, "loss": 42.9922, "step": 7219 }, { "epoch": 0.06834467678268855, "grad_norm": 1888.36865234375, "learning_rate": 1.9923012572967226e-06, "loss": 40.9062, "step": 7220 }, { "epoch": 0.06835414280440359, "grad_norm": 930.0281982421875, "learning_rate": 1.992297459867895e-06, "loss": 50.6406, "step": 7221 }, { "epoch": 0.06836360882611865, "grad_norm": 1469.2366943359375, "learning_rate": 1.9922936615063725e-06, "loss": 53.0234, "step": 7222 }, { "epoch": 0.0683730748478337, "grad_norm": 532.34033203125, "learning_rate": 1.9922898622121583e-06, "loss": 48.6875, "step": 7223 }, { "epoch": 0.06838254086954876, "grad_norm": 376.48193359375, "learning_rate": 1.9922860619852566e-06, "loss": 42.9375, "step": 7224 }, { "epoch": 0.0683920068912638, "grad_norm": 173.93328857421875, "learning_rate": 1.9922822608256703e-06, "loss": 14.8203, "step": 7225 }, { "epoch": 0.06840147291297886, "grad_norm": 309.63671875, "learning_rate": 1.992278458733403e-06, "loss": 26.7031, "step": 7226 }, { "epoch": 0.06841093893469391, "grad_norm": 258.81341552734375, "learning_rate": 1.992274655708459e-06, "loss": 20.0156, "step": 7227 }, { "epoch": 0.06842040495640897, "grad_norm": 273.595947265625, "learning_rate": 1.9922708517508407e-06, "loss": 22.9766, "step": 7228 }, { "epoch": 0.06842987097812403, "grad_norm": 263.84368896484375, "learning_rate": 1.9922670468605525e-06, "loss": 22.6484, "step": 7229 }, { "epoch": 0.06843933699983908, "grad_norm": 256.5375671386719, "learning_rate": 1.9922632410375984e-06, "loss": 13.9766, "step": 7230 }, { "epoch": 0.06844880302155414, "grad_norm": 1566.0938720703125, "learning_rate": 1.992259434281981e-06, "loss": 23.4375, "step": 7231 }, { "epoch": 0.06845826904326918, "grad_norm": 571.6846313476562, "learning_rate": 1.9922556265937046e-06, "loss": 48.25, "step": 7232 }, { "epoch": 0.06846773506498424, "grad_norm": 349.6444396972656, "learning_rate": 1.992251817972772e-06, "loss": 43.4219, "step": 7233 }, { "epoch": 0.06847720108669929, "grad_norm": 376.4237060546875, "learning_rate": 1.9922480084191875e-06, "loss": 17.6602, "step": 7234 }, { "epoch": 0.06848666710841435, "grad_norm": 323.5259704589844, "learning_rate": 1.992244197932954e-06, "loss": 34.1875, "step": 7235 }, { "epoch": 0.0684961331301294, "grad_norm": 1007.5625610351562, "learning_rate": 1.992240386514076e-06, "loss": 61.1484, "step": 7236 }, { "epoch": 0.06850559915184445, "grad_norm": 1125.7484130859375, "learning_rate": 1.9922365741625567e-06, "loss": 41.1406, "step": 7237 }, { "epoch": 0.06851506517355951, "grad_norm": 478.22235107421875, "learning_rate": 1.9922327608783994e-06, "loss": 43.5, "step": 7238 }, { "epoch": 0.06852453119527456, "grad_norm": 620.0689086914062, "learning_rate": 1.9922289466616076e-06, "loss": 58.3125, "step": 7239 }, { "epoch": 0.06853399721698962, "grad_norm": 166.89840698242188, "learning_rate": 1.9922251315121855e-06, "loss": 19.5312, "step": 7240 }, { "epoch": 0.06854346323870467, "grad_norm": 243.13241577148438, "learning_rate": 1.992221315430136e-06, "loss": 18.0391, "step": 7241 }, { "epoch": 0.06855292926041973, "grad_norm": 329.12158203125, "learning_rate": 1.9922174984154632e-06, "loss": 27.2188, "step": 7242 }, { "epoch": 0.06856239528213477, "grad_norm": 3.7341501712799072, "learning_rate": 1.9922136804681706e-06, "loss": 1.0103, "step": 7243 }, { "epoch": 0.06857186130384983, "grad_norm": 142.6222381591797, "learning_rate": 1.9922098615882613e-06, "loss": 21.0391, "step": 7244 }, { "epoch": 0.06858132732556488, "grad_norm": 323.0128479003906, "learning_rate": 1.9922060417757394e-06, "loss": 24.4297, "step": 7245 }, { "epoch": 0.06859079334727994, "grad_norm": 1174.357666015625, "learning_rate": 1.9922022210306085e-06, "loss": 41.8906, "step": 7246 }, { "epoch": 0.068600259368995, "grad_norm": 502.02655029296875, "learning_rate": 1.992198399352872e-06, "loss": 30.1641, "step": 7247 }, { "epoch": 0.06860972539071004, "grad_norm": 713.5994873046875, "learning_rate": 1.992194576742534e-06, "loss": 47.9219, "step": 7248 }, { "epoch": 0.0686191914124251, "grad_norm": 529.8552856445312, "learning_rate": 1.9921907531995974e-06, "loss": 56.0625, "step": 7249 }, { "epoch": 0.06862865743414015, "grad_norm": 256.2414855957031, "learning_rate": 1.992186928724066e-06, "loss": 24.25, "step": 7250 }, { "epoch": 0.06863812345585521, "grad_norm": 343.6942443847656, "learning_rate": 1.9921831033159433e-06, "loss": 24.0781, "step": 7251 }, { "epoch": 0.06864758947757026, "grad_norm": 788.3477783203125, "learning_rate": 1.9921792769752326e-06, "loss": 40.3906, "step": 7252 }, { "epoch": 0.06865705549928532, "grad_norm": 244.6634063720703, "learning_rate": 1.9921754497019387e-06, "loss": 23.0, "step": 7253 }, { "epoch": 0.06866652152100038, "grad_norm": 590.38330078125, "learning_rate": 1.9921716214960636e-06, "loss": 36.2031, "step": 7254 }, { "epoch": 0.06867598754271542, "grad_norm": 857.8365478515625, "learning_rate": 1.9921677923576125e-06, "loss": 69.6562, "step": 7255 }, { "epoch": 0.06868545356443048, "grad_norm": 245.41925048828125, "learning_rate": 1.9921639622865878e-06, "loss": 22.9141, "step": 7256 }, { "epoch": 0.06869491958614553, "grad_norm": 631.0693969726562, "learning_rate": 1.9921601312829938e-06, "loss": 59.5234, "step": 7257 }, { "epoch": 0.06870438560786059, "grad_norm": 273.6990966796875, "learning_rate": 1.992156299346834e-06, "loss": 36.5781, "step": 7258 }, { "epoch": 0.06871385162957563, "grad_norm": 469.0268859863281, "learning_rate": 1.992152466478111e-06, "loss": 21.3125, "step": 7259 }, { "epoch": 0.0687233176512907, "grad_norm": 390.9794616699219, "learning_rate": 1.99214863267683e-06, "loss": 23.0312, "step": 7260 }, { "epoch": 0.06873278367300574, "grad_norm": 209.4972381591797, "learning_rate": 1.9921447979429934e-06, "loss": 22.0312, "step": 7261 }, { "epoch": 0.0687422496947208, "grad_norm": 234.5009307861328, "learning_rate": 1.9921409622766055e-06, "loss": 16.4219, "step": 7262 }, { "epoch": 0.06875171571643586, "grad_norm": 200.12310791015625, "learning_rate": 1.9921371256776694e-06, "loss": 17.4688, "step": 7263 }, { "epoch": 0.0687611817381509, "grad_norm": 255.74696350097656, "learning_rate": 1.992133288146189e-06, "loss": 18.0391, "step": 7264 }, { "epoch": 0.06877064775986597, "grad_norm": 376.3258361816406, "learning_rate": 1.992129449682168e-06, "loss": 24.0156, "step": 7265 }, { "epoch": 0.06878011378158101, "grad_norm": 342.81121826171875, "learning_rate": 1.99212561028561e-06, "loss": 36.2188, "step": 7266 }, { "epoch": 0.06878957980329607, "grad_norm": 1063.738037109375, "learning_rate": 1.992121769956518e-06, "loss": 37.2891, "step": 7267 }, { "epoch": 0.06879904582501112, "grad_norm": 777.8397827148438, "learning_rate": 1.9921179286948963e-06, "loss": 56.6406, "step": 7268 }, { "epoch": 0.06880851184672618, "grad_norm": 503.3398742675781, "learning_rate": 1.9921140865007485e-06, "loss": 49.9219, "step": 7269 }, { "epoch": 0.06881797786844122, "grad_norm": 3.282646417617798, "learning_rate": 1.9921102433740776e-06, "loss": 0.8208, "step": 7270 }, { "epoch": 0.06882744389015628, "grad_norm": 334.0233459472656, "learning_rate": 1.992106399314888e-06, "loss": 18.5469, "step": 7271 }, { "epoch": 0.06883690991187134, "grad_norm": 459.3161926269531, "learning_rate": 1.992102554323183e-06, "loss": 47.1875, "step": 7272 }, { "epoch": 0.06884637593358639, "grad_norm": 2.9949421882629395, "learning_rate": 1.9920987083989656e-06, "loss": 0.8508, "step": 7273 }, { "epoch": 0.06885584195530145, "grad_norm": 255.99403381347656, "learning_rate": 1.9920948615422403e-06, "loss": 25.9844, "step": 7274 }, { "epoch": 0.0688653079770165, "grad_norm": 410.89166259765625, "learning_rate": 1.9920910137530105e-06, "loss": 38.5547, "step": 7275 }, { "epoch": 0.06887477399873156, "grad_norm": 557.7354125976562, "learning_rate": 1.9920871650312795e-06, "loss": 44.2344, "step": 7276 }, { "epoch": 0.0688842400204466, "grad_norm": 209.406494140625, "learning_rate": 1.992083315377051e-06, "loss": 16.6562, "step": 7277 }, { "epoch": 0.06889370604216166, "grad_norm": 503.18731689453125, "learning_rate": 1.9920794647903287e-06, "loss": 14.5312, "step": 7278 }, { "epoch": 0.06890317206387671, "grad_norm": 825.7523193359375, "learning_rate": 1.992075613271116e-06, "loss": 53.8125, "step": 7279 }, { "epoch": 0.06891263808559177, "grad_norm": 535.4503784179688, "learning_rate": 1.9920717608194176e-06, "loss": 49.8281, "step": 7280 }, { "epoch": 0.06892210410730683, "grad_norm": 379.4809265136719, "learning_rate": 1.9920679074352357e-06, "loss": 29.3867, "step": 7281 }, { "epoch": 0.06893157012902187, "grad_norm": 181.70538330078125, "learning_rate": 1.9920640531185748e-06, "loss": 16.7344, "step": 7282 }, { "epoch": 0.06894103615073693, "grad_norm": 1178.43408203125, "learning_rate": 1.992060197869438e-06, "loss": 60.3516, "step": 7283 }, { "epoch": 0.06895050217245198, "grad_norm": 3.7183051109313965, "learning_rate": 1.992056341687829e-06, "loss": 1.0669, "step": 7284 }, { "epoch": 0.06895996819416704, "grad_norm": 506.26953125, "learning_rate": 1.9920524845737517e-06, "loss": 38.2031, "step": 7285 }, { "epoch": 0.06896943421588209, "grad_norm": 494.969482421875, "learning_rate": 1.99204862652721e-06, "loss": 19.6055, "step": 7286 }, { "epoch": 0.06897890023759715, "grad_norm": 319.4105529785156, "learning_rate": 1.9920447675482065e-06, "loss": 28.9688, "step": 7287 }, { "epoch": 0.06898836625931219, "grad_norm": 231.82077026367188, "learning_rate": 1.992040907636746e-06, "loss": 24.4219, "step": 7288 }, { "epoch": 0.06899783228102725, "grad_norm": 538.8790893554688, "learning_rate": 1.9920370467928314e-06, "loss": 30.6641, "step": 7289 }, { "epoch": 0.06900729830274231, "grad_norm": 1028.642822265625, "learning_rate": 1.9920331850164665e-06, "loss": 45.9062, "step": 7290 }, { "epoch": 0.06901676432445736, "grad_norm": 361.4853820800781, "learning_rate": 1.9920293223076547e-06, "loss": 28.9844, "step": 7291 }, { "epoch": 0.06902623034617242, "grad_norm": 3.672696590423584, "learning_rate": 1.9920254586664e-06, "loss": 1.1592, "step": 7292 }, { "epoch": 0.06903569636788746, "grad_norm": 728.8442993164062, "learning_rate": 1.9920215940927064e-06, "loss": 62.4219, "step": 7293 }, { "epoch": 0.06904516238960252, "grad_norm": 187.85328674316406, "learning_rate": 1.9920177285865763e-06, "loss": 23.9141, "step": 7294 }, { "epoch": 0.06905462841131757, "grad_norm": 835.5132446289062, "learning_rate": 1.9920138621480146e-06, "loss": 51.3672, "step": 7295 }, { "epoch": 0.06906409443303263, "grad_norm": 881.5437622070312, "learning_rate": 1.992009994777024e-06, "loss": 57.2969, "step": 7296 }, { "epoch": 0.06907356045474769, "grad_norm": 1980.6649169921875, "learning_rate": 1.992006126473609e-06, "loss": 81.0898, "step": 7297 }, { "epoch": 0.06908302647646274, "grad_norm": 777.2039794921875, "learning_rate": 1.9920022572377725e-06, "loss": 42.0703, "step": 7298 }, { "epoch": 0.0690924924981778, "grad_norm": 341.6648254394531, "learning_rate": 1.9919983870695185e-06, "loss": 26.5234, "step": 7299 }, { "epoch": 0.06910195851989284, "grad_norm": 410.0570373535156, "learning_rate": 1.99199451596885e-06, "loss": 31.5156, "step": 7300 }, { "epoch": 0.0691114245416079, "grad_norm": 2.887174367904663, "learning_rate": 1.9919906439357717e-06, "loss": 0.8521, "step": 7301 }, { "epoch": 0.06912089056332295, "grad_norm": 982.573974609375, "learning_rate": 1.991986770970287e-06, "loss": 49.3906, "step": 7302 }, { "epoch": 0.06913035658503801, "grad_norm": 207.35841369628906, "learning_rate": 1.9919828970723987e-06, "loss": 18.0625, "step": 7303 }, { "epoch": 0.06913982260675305, "grad_norm": 389.7633361816406, "learning_rate": 1.9919790222421113e-06, "loss": 21.1719, "step": 7304 }, { "epoch": 0.06914928862846811, "grad_norm": 350.7929992675781, "learning_rate": 1.9919751464794283e-06, "loss": 24.3672, "step": 7305 }, { "epoch": 0.06915875465018317, "grad_norm": 404.984375, "learning_rate": 1.991971269784353e-06, "loss": 37.5156, "step": 7306 }, { "epoch": 0.06916822067189822, "grad_norm": 208.63970947265625, "learning_rate": 1.991967392156889e-06, "loss": 20.8906, "step": 7307 }, { "epoch": 0.06917768669361328, "grad_norm": 430.841064453125, "learning_rate": 1.9919635135970403e-06, "loss": 23.6562, "step": 7308 }, { "epoch": 0.06918715271532833, "grad_norm": 564.0575561523438, "learning_rate": 1.9919596341048106e-06, "loss": 16.5938, "step": 7309 }, { "epoch": 0.06919661873704339, "grad_norm": 489.51654052734375, "learning_rate": 1.9919557536802035e-06, "loss": 25.4531, "step": 7310 }, { "epoch": 0.06920608475875843, "grad_norm": 641.5396118164062, "learning_rate": 1.9919518723232223e-06, "loss": 50.9844, "step": 7311 }, { "epoch": 0.06921555078047349, "grad_norm": 1757.1131591796875, "learning_rate": 1.9919479900338713e-06, "loss": 58.3359, "step": 7312 }, { "epoch": 0.06922501680218854, "grad_norm": 540.346923828125, "learning_rate": 1.991944106812153e-06, "loss": 20.4141, "step": 7313 }, { "epoch": 0.0692344828239036, "grad_norm": 300.7900695800781, "learning_rate": 1.9919402226580725e-06, "loss": 24.4141, "step": 7314 }, { "epoch": 0.06924394884561866, "grad_norm": 478.725830078125, "learning_rate": 1.9919363375716324e-06, "loss": 54.8594, "step": 7315 }, { "epoch": 0.0692534148673337, "grad_norm": 1455.298095703125, "learning_rate": 1.9919324515528364e-06, "loss": 58.7031, "step": 7316 }, { "epoch": 0.06926288088904876, "grad_norm": 535.9581909179688, "learning_rate": 1.9919285646016884e-06, "loss": 33.8828, "step": 7317 }, { "epoch": 0.06927234691076381, "grad_norm": 607.1513671875, "learning_rate": 1.9919246767181925e-06, "loss": 21.7969, "step": 7318 }, { "epoch": 0.06928181293247887, "grad_norm": 200.95388793945312, "learning_rate": 1.9919207879023515e-06, "loss": 19.1953, "step": 7319 }, { "epoch": 0.06929127895419392, "grad_norm": 189.70559692382812, "learning_rate": 1.9919168981541698e-06, "loss": 23.0234, "step": 7320 }, { "epoch": 0.06930074497590898, "grad_norm": 809.4456176757812, "learning_rate": 1.991913007473651e-06, "loss": 36.4219, "step": 7321 }, { "epoch": 0.06931021099762402, "grad_norm": 461.5372009277344, "learning_rate": 1.991909115860798e-06, "loss": 33.5312, "step": 7322 }, { "epoch": 0.06931967701933908, "grad_norm": 928.681884765625, "learning_rate": 1.9919052233156154e-06, "loss": 72.5625, "step": 7323 }, { "epoch": 0.06932914304105414, "grad_norm": 890.7039184570312, "learning_rate": 1.991901329838106e-06, "loss": 67.375, "step": 7324 }, { "epoch": 0.06933860906276919, "grad_norm": 247.6001434326172, "learning_rate": 1.991897435428274e-06, "loss": 26.7812, "step": 7325 }, { "epoch": 0.06934807508448425, "grad_norm": 563.7797241210938, "learning_rate": 1.991893540086123e-06, "loss": 50.1641, "step": 7326 }, { "epoch": 0.0693575411061993, "grad_norm": 971.6875, "learning_rate": 1.9918896438116568e-06, "loss": 46.9688, "step": 7327 }, { "epoch": 0.06936700712791435, "grad_norm": 386.1033020019531, "learning_rate": 1.991885746604878e-06, "loss": 22.8828, "step": 7328 }, { "epoch": 0.0693764731496294, "grad_norm": 2.9643571376800537, "learning_rate": 1.991881848465792e-06, "loss": 0.8228, "step": 7329 }, { "epoch": 0.06938593917134446, "grad_norm": 494.2928161621094, "learning_rate": 1.9918779493944017e-06, "loss": 40.8672, "step": 7330 }, { "epoch": 0.0693954051930595, "grad_norm": 441.4879455566406, "learning_rate": 1.99187404939071e-06, "loss": 17.6094, "step": 7331 }, { "epoch": 0.06940487121477457, "grad_norm": 387.37603759765625, "learning_rate": 1.9918701484547217e-06, "loss": 33.8281, "step": 7332 }, { "epoch": 0.06941433723648963, "grad_norm": 203.69883728027344, "learning_rate": 1.9918662465864396e-06, "loss": 23.6094, "step": 7333 }, { "epoch": 0.06942380325820467, "grad_norm": 344.739501953125, "learning_rate": 1.991862343785868e-06, "loss": 33.0938, "step": 7334 }, { "epoch": 0.06943326927991973, "grad_norm": 714.5194091796875, "learning_rate": 1.9918584400530107e-06, "loss": 73.25, "step": 7335 }, { "epoch": 0.06944273530163478, "grad_norm": 291.0477600097656, "learning_rate": 1.9918545353878702e-06, "loss": 22.6484, "step": 7336 }, { "epoch": 0.06945220132334984, "grad_norm": 407.5873718261719, "learning_rate": 1.9918506297904515e-06, "loss": 21.3203, "step": 7337 }, { "epoch": 0.06946166734506488, "grad_norm": 560.351806640625, "learning_rate": 1.9918467232607573e-06, "loss": 66.6562, "step": 7338 }, { "epoch": 0.06947113336677994, "grad_norm": 202.71018981933594, "learning_rate": 1.991842815798792e-06, "loss": 26.4453, "step": 7339 }, { "epoch": 0.069480599388495, "grad_norm": 900.7262573242188, "learning_rate": 1.991838907404559e-06, "loss": 37.3125, "step": 7340 }, { "epoch": 0.06949006541021005, "grad_norm": 3.0938000679016113, "learning_rate": 1.991834998078062e-06, "loss": 0.998, "step": 7341 }, { "epoch": 0.06949953143192511, "grad_norm": 2.958303689956665, "learning_rate": 1.9918310878193044e-06, "loss": 0.8511, "step": 7342 }, { "epoch": 0.06950899745364016, "grad_norm": 210.72998046875, "learning_rate": 1.9918271766282905e-06, "loss": 11.5742, "step": 7343 }, { "epoch": 0.06951846347535522, "grad_norm": 1705.0511474609375, "learning_rate": 1.9918232645050233e-06, "loss": 46.2422, "step": 7344 }, { "epoch": 0.06952792949707026, "grad_norm": 1603.5118408203125, "learning_rate": 1.9918193514495065e-06, "loss": 24.4219, "step": 7345 }, { "epoch": 0.06953739551878532, "grad_norm": 872.19384765625, "learning_rate": 1.9918154374617445e-06, "loss": 47.3125, "step": 7346 }, { "epoch": 0.06954686154050037, "grad_norm": 1144.6636962890625, "learning_rate": 1.99181152254174e-06, "loss": 44.8672, "step": 7347 }, { "epoch": 0.06955632756221543, "grad_norm": 503.4835205078125, "learning_rate": 1.991807606689498e-06, "loss": 53.625, "step": 7348 }, { "epoch": 0.06956579358393049, "grad_norm": 194.76559448242188, "learning_rate": 1.9918036899050207e-06, "loss": 24.5469, "step": 7349 }, { "epoch": 0.06957525960564553, "grad_norm": 774.5486450195312, "learning_rate": 1.9917997721883125e-06, "loss": 19.4922, "step": 7350 }, { "epoch": 0.0695847256273606, "grad_norm": 689.3071899414062, "learning_rate": 1.9917958535393773e-06, "loss": 29.7344, "step": 7351 }, { "epoch": 0.06959419164907564, "grad_norm": 696.1262817382812, "learning_rate": 1.9917919339582183e-06, "loss": 34.6406, "step": 7352 }, { "epoch": 0.0696036576707907, "grad_norm": 706.4270629882812, "learning_rate": 1.9917880134448394e-06, "loss": 55.2188, "step": 7353 }, { "epoch": 0.06961312369250575, "grad_norm": 731.8836059570312, "learning_rate": 1.991784091999244e-06, "loss": 59.8125, "step": 7354 }, { "epoch": 0.0696225897142208, "grad_norm": 583.183837890625, "learning_rate": 1.991780169621437e-06, "loss": 29.9531, "step": 7355 }, { "epoch": 0.06963205573593585, "grad_norm": 755.743408203125, "learning_rate": 1.9917762463114203e-06, "loss": 38.8828, "step": 7356 }, { "epoch": 0.06964152175765091, "grad_norm": 262.7541198730469, "learning_rate": 1.9917723220691987e-06, "loss": 23.7812, "step": 7357 }, { "epoch": 0.06965098777936597, "grad_norm": 708.4259643554688, "learning_rate": 1.991768396894776e-06, "loss": 67.4609, "step": 7358 }, { "epoch": 0.06966045380108102, "grad_norm": 331.9125671386719, "learning_rate": 1.991764470788155e-06, "loss": 23.6562, "step": 7359 }, { "epoch": 0.06966991982279608, "grad_norm": 404.60400390625, "learning_rate": 1.99176054374934e-06, "loss": 18.2734, "step": 7360 }, { "epoch": 0.06967938584451112, "grad_norm": 173.37332153320312, "learning_rate": 1.9917566157783348e-06, "loss": 25.3672, "step": 7361 }, { "epoch": 0.06968885186622618, "grad_norm": 3.563624382019043, "learning_rate": 1.991752686875143e-06, "loss": 1.0103, "step": 7362 }, { "epoch": 0.06969831788794123, "grad_norm": 2.748018980026245, "learning_rate": 1.991748757039768e-06, "loss": 0.8833, "step": 7363 }, { "epoch": 0.06970778390965629, "grad_norm": 326.2257385253906, "learning_rate": 1.991744826272213e-06, "loss": 21.0078, "step": 7364 }, { "epoch": 0.06971724993137134, "grad_norm": 502.64202880859375, "learning_rate": 1.9917408945724836e-06, "loss": 34.5625, "step": 7365 }, { "epoch": 0.0697267159530864, "grad_norm": 520.9301147460938, "learning_rate": 1.991736961940582e-06, "loss": 25.7969, "step": 7366 }, { "epoch": 0.06973618197480146, "grad_norm": 331.8801574707031, "learning_rate": 1.9917330283765115e-06, "loss": 26.9688, "step": 7367 }, { "epoch": 0.0697456479965165, "grad_norm": 783.3373413085938, "learning_rate": 1.991729093880277e-06, "loss": 49.3438, "step": 7368 }, { "epoch": 0.06975511401823156, "grad_norm": 286.8240051269531, "learning_rate": 1.9917251584518814e-06, "loss": 32.2188, "step": 7369 }, { "epoch": 0.06976458003994661, "grad_norm": 302.6920471191406, "learning_rate": 1.991721222091329e-06, "loss": 27.0859, "step": 7370 }, { "epoch": 0.06977404606166167, "grad_norm": 420.8332824707031, "learning_rate": 1.991717284798623e-06, "loss": 63.0625, "step": 7371 }, { "epoch": 0.06978351208337671, "grad_norm": 520.7198486328125, "learning_rate": 1.9917133465737673e-06, "loss": 31.1328, "step": 7372 }, { "epoch": 0.06979297810509177, "grad_norm": 333.6162414550781, "learning_rate": 1.9917094074167655e-06, "loss": 29.1016, "step": 7373 }, { "epoch": 0.06980244412680682, "grad_norm": 277.9934387207031, "learning_rate": 1.9917054673276213e-06, "loss": 20.0156, "step": 7374 }, { "epoch": 0.06981191014852188, "grad_norm": 451.2554016113281, "learning_rate": 1.991701526306339e-06, "loss": 37.0078, "step": 7375 }, { "epoch": 0.06982137617023694, "grad_norm": 608.8590087890625, "learning_rate": 1.991697584352921e-06, "loss": 45.4375, "step": 7376 }, { "epoch": 0.06983084219195199, "grad_norm": 494.5029296875, "learning_rate": 1.991693641467372e-06, "loss": 50.5312, "step": 7377 }, { "epoch": 0.06984030821366705, "grad_norm": 425.85552978515625, "learning_rate": 1.9916896976496956e-06, "loss": 31.4062, "step": 7378 }, { "epoch": 0.06984977423538209, "grad_norm": 459.0323181152344, "learning_rate": 1.9916857528998958e-06, "loss": 49.7188, "step": 7379 }, { "epoch": 0.06985924025709715, "grad_norm": 600.57421875, "learning_rate": 1.991681807217975e-06, "loss": 60.5625, "step": 7380 }, { "epoch": 0.0698687062788122, "grad_norm": 604.4678955078125, "learning_rate": 1.991677860603939e-06, "loss": 50.1484, "step": 7381 }, { "epoch": 0.06987817230052726, "grad_norm": 665.7972412109375, "learning_rate": 1.9916739130577897e-06, "loss": 36.4219, "step": 7382 }, { "epoch": 0.06988763832224232, "grad_norm": 195.10986328125, "learning_rate": 1.9916699645795313e-06, "loss": 19.125, "step": 7383 }, { "epoch": 0.06989710434395736, "grad_norm": 450.8621826171875, "learning_rate": 1.9916660151691677e-06, "loss": 39.1875, "step": 7384 }, { "epoch": 0.06990657036567242, "grad_norm": 600.1536254882812, "learning_rate": 1.991662064826703e-06, "loss": 42.0938, "step": 7385 }, { "epoch": 0.06991603638738747, "grad_norm": 3.201138734817505, "learning_rate": 1.9916581135521396e-06, "loss": 1.0249, "step": 7386 }, { "epoch": 0.06992550240910253, "grad_norm": 189.11904907226562, "learning_rate": 1.9916541613454827e-06, "loss": 13.1387, "step": 7387 }, { "epoch": 0.06993496843081758, "grad_norm": 262.8631591796875, "learning_rate": 1.991650208206735e-06, "loss": 20.7812, "step": 7388 }, { "epoch": 0.06994443445253264, "grad_norm": 309.97186279296875, "learning_rate": 1.9916462541359013e-06, "loss": 26.9375, "step": 7389 }, { "epoch": 0.06995390047424768, "grad_norm": 307.908447265625, "learning_rate": 1.991642299132984e-06, "loss": 20.1562, "step": 7390 }, { "epoch": 0.06996336649596274, "grad_norm": 630.3785400390625, "learning_rate": 1.9916383431979876e-06, "loss": 48.7812, "step": 7391 }, { "epoch": 0.0699728325176778, "grad_norm": 405.2330322265625, "learning_rate": 1.991634386330916e-06, "loss": 24.5234, "step": 7392 }, { "epoch": 0.06998229853939285, "grad_norm": 478.7851867675781, "learning_rate": 1.9916304285317723e-06, "loss": 48.5938, "step": 7393 }, { "epoch": 0.06999176456110791, "grad_norm": 425.759521484375, "learning_rate": 1.9916264698005604e-06, "loss": 44.8125, "step": 7394 }, { "epoch": 0.07000123058282295, "grad_norm": 416.8600158691406, "learning_rate": 1.9916225101372844e-06, "loss": 39.0391, "step": 7395 }, { "epoch": 0.07001069660453801, "grad_norm": 487.284912109375, "learning_rate": 1.9916185495419473e-06, "loss": 15.8359, "step": 7396 }, { "epoch": 0.07002016262625306, "grad_norm": 3.871490478515625, "learning_rate": 1.991614588014554e-06, "loss": 1.0786, "step": 7397 }, { "epoch": 0.07002962864796812, "grad_norm": 319.32958984375, "learning_rate": 1.991610625555107e-06, "loss": 26.0625, "step": 7398 }, { "epoch": 0.07003909466968317, "grad_norm": 2.9754278659820557, "learning_rate": 1.9916066621636107e-06, "loss": 0.8823, "step": 7399 }, { "epoch": 0.07004856069139823, "grad_norm": 459.6029052734375, "learning_rate": 1.9916026978400683e-06, "loss": 37.3594, "step": 7400 }, { "epoch": 0.07005802671311329, "grad_norm": 178.2370147705078, "learning_rate": 1.9915987325844843e-06, "loss": 20.1172, "step": 7401 }, { "epoch": 0.07006749273482833, "grad_norm": 351.64031982421875, "learning_rate": 1.9915947663968616e-06, "loss": 11.4766, "step": 7402 }, { "epoch": 0.07007695875654339, "grad_norm": 493.5525817871094, "learning_rate": 1.9915907992772046e-06, "loss": 43.25, "step": 7403 }, { "epoch": 0.07008642477825844, "grad_norm": 690.6556396484375, "learning_rate": 1.9915868312255165e-06, "loss": 29.8906, "step": 7404 }, { "epoch": 0.0700958907999735, "grad_norm": 342.9231262207031, "learning_rate": 1.9915828622418017e-06, "loss": 28.3008, "step": 7405 }, { "epoch": 0.07010535682168854, "grad_norm": 634.5818481445312, "learning_rate": 1.9915788923260634e-06, "loss": 34.2031, "step": 7406 }, { "epoch": 0.0701148228434036, "grad_norm": 449.60992431640625, "learning_rate": 1.991574921478305e-06, "loss": 66.25, "step": 7407 }, { "epoch": 0.07012428886511865, "grad_norm": 372.2669982910156, "learning_rate": 1.9915709496985312e-06, "loss": 40.8047, "step": 7408 }, { "epoch": 0.07013375488683371, "grad_norm": 359.44012451171875, "learning_rate": 1.9915669769867453e-06, "loss": 26.0469, "step": 7409 }, { "epoch": 0.07014322090854877, "grad_norm": 350.62225341796875, "learning_rate": 1.9915630033429504e-06, "loss": 34.4766, "step": 7410 }, { "epoch": 0.07015268693026382, "grad_norm": 225.7040252685547, "learning_rate": 1.991559028767151e-06, "loss": 25.3906, "step": 7411 }, { "epoch": 0.07016215295197888, "grad_norm": 235.70399475097656, "learning_rate": 1.991555053259351e-06, "loss": 21.4297, "step": 7412 }, { "epoch": 0.07017161897369392, "grad_norm": 362.5705871582031, "learning_rate": 1.991551076819553e-06, "loss": 27.3828, "step": 7413 }, { "epoch": 0.07018108499540898, "grad_norm": 349.64947509765625, "learning_rate": 1.991547099447762e-06, "loss": 22.2969, "step": 7414 }, { "epoch": 0.07019055101712403, "grad_norm": 440.45037841796875, "learning_rate": 1.9915431211439816e-06, "loss": 39.7969, "step": 7415 }, { "epoch": 0.07020001703883909, "grad_norm": 706.9877319335938, "learning_rate": 1.9915391419082145e-06, "loss": 52.125, "step": 7416 }, { "epoch": 0.07020948306055413, "grad_norm": 227.94573974609375, "learning_rate": 1.9915351617404655e-06, "loss": 20.3047, "step": 7417 }, { "epoch": 0.0702189490822692, "grad_norm": 331.25921630859375, "learning_rate": 1.9915311806407376e-06, "loss": 12.1562, "step": 7418 }, { "epoch": 0.07022841510398425, "grad_norm": 488.3271789550781, "learning_rate": 1.991527198609035e-06, "loss": 44.5156, "step": 7419 }, { "epoch": 0.0702378811256993, "grad_norm": 2.469583511352539, "learning_rate": 1.9915232156453615e-06, "loss": 0.8057, "step": 7420 }, { "epoch": 0.07024734714741436, "grad_norm": 694.6493530273438, "learning_rate": 1.991519231749721e-06, "loss": 28.4531, "step": 7421 }, { "epoch": 0.0702568131691294, "grad_norm": 2.810957431793213, "learning_rate": 1.991515246922116e-06, "loss": 0.8662, "step": 7422 }, { "epoch": 0.07026627919084447, "grad_norm": 215.01780700683594, "learning_rate": 1.991511261162552e-06, "loss": 25.8047, "step": 7423 }, { "epoch": 0.07027574521255951, "grad_norm": 353.0612487792969, "learning_rate": 1.9915072744710315e-06, "loss": 34.0, "step": 7424 }, { "epoch": 0.07028521123427457, "grad_norm": 388.214111328125, "learning_rate": 1.9915032868475587e-06, "loss": 22.5781, "step": 7425 }, { "epoch": 0.07029467725598963, "grad_norm": 280.5917053222656, "learning_rate": 1.9914992982921375e-06, "loss": 29.9297, "step": 7426 }, { "epoch": 0.07030414327770468, "grad_norm": 796.8793334960938, "learning_rate": 1.9914953088047713e-06, "loss": 46.3281, "step": 7427 }, { "epoch": 0.07031360929941974, "grad_norm": 512.1459350585938, "learning_rate": 1.9914913183854643e-06, "loss": 29.0938, "step": 7428 }, { "epoch": 0.07032307532113478, "grad_norm": 533.087646484375, "learning_rate": 1.9914873270342195e-06, "loss": 50.4141, "step": 7429 }, { "epoch": 0.07033254134284984, "grad_norm": 429.98944091796875, "learning_rate": 1.9914833347510415e-06, "loss": 42.4375, "step": 7430 }, { "epoch": 0.07034200736456489, "grad_norm": 247.05709838867188, "learning_rate": 1.9914793415359337e-06, "loss": 20.3438, "step": 7431 }, { "epoch": 0.07035147338627995, "grad_norm": 693.5121459960938, "learning_rate": 1.9914753473888995e-06, "loss": 68.5938, "step": 7432 }, { "epoch": 0.070360939407995, "grad_norm": 217.20272827148438, "learning_rate": 1.991471352309943e-06, "loss": 27.4766, "step": 7433 }, { "epoch": 0.07037040542971006, "grad_norm": 172.85911560058594, "learning_rate": 1.991467356299068e-06, "loss": 21.4375, "step": 7434 }, { "epoch": 0.07037987145142512, "grad_norm": 509.9386291503906, "learning_rate": 1.9914633593562782e-06, "loss": 37.9375, "step": 7435 }, { "epoch": 0.07038933747314016, "grad_norm": 559.1433715820312, "learning_rate": 1.9914593614815774e-06, "loss": 38.4609, "step": 7436 }, { "epoch": 0.07039880349485522, "grad_norm": 516.0458374023438, "learning_rate": 1.991455362674969e-06, "loss": 66.375, "step": 7437 }, { "epoch": 0.07040826951657027, "grad_norm": 262.7881164550781, "learning_rate": 1.991451362936458e-06, "loss": 26.1406, "step": 7438 }, { "epoch": 0.07041773553828533, "grad_norm": 506.0832824707031, "learning_rate": 1.991447362266046e-06, "loss": 45.9062, "step": 7439 }, { "epoch": 0.07042720156000037, "grad_norm": 403.35589599609375, "learning_rate": 1.9914433606637387e-06, "loss": 35.6328, "step": 7440 }, { "epoch": 0.07043666758171543, "grad_norm": 1269.18212890625, "learning_rate": 1.991439358129539e-06, "loss": 51.8828, "step": 7441 }, { "epoch": 0.07044613360343048, "grad_norm": 170.7887725830078, "learning_rate": 1.9914353546634507e-06, "loss": 27.5703, "step": 7442 }, { "epoch": 0.07045559962514554, "grad_norm": 251.50022888183594, "learning_rate": 1.9914313502654776e-06, "loss": 20.8594, "step": 7443 }, { "epoch": 0.0704650656468606, "grad_norm": 764.9610595703125, "learning_rate": 1.991427344935624e-06, "loss": 53.043, "step": 7444 }, { "epoch": 0.07047453166857565, "grad_norm": 421.0448913574219, "learning_rate": 1.9914233386738925e-06, "loss": 45.2812, "step": 7445 }, { "epoch": 0.0704839976902907, "grad_norm": 476.44927978515625, "learning_rate": 1.991419331480288e-06, "loss": 35.2734, "step": 7446 }, { "epoch": 0.07049346371200575, "grad_norm": 340.5408935546875, "learning_rate": 1.9914153233548135e-06, "loss": 21.0781, "step": 7447 }, { "epoch": 0.07050292973372081, "grad_norm": 679.7926025390625, "learning_rate": 1.9914113142974736e-06, "loss": 43.625, "step": 7448 }, { "epoch": 0.07051239575543586, "grad_norm": 210.23504638671875, "learning_rate": 1.9914073043082712e-06, "loss": 22.1406, "step": 7449 }, { "epoch": 0.07052186177715092, "grad_norm": 340.9158935546875, "learning_rate": 1.9914032933872106e-06, "loss": 23.6016, "step": 7450 }, { "epoch": 0.07053132779886596, "grad_norm": 349.7128601074219, "learning_rate": 1.991399281534295e-06, "loss": 47.6562, "step": 7451 }, { "epoch": 0.07054079382058102, "grad_norm": 648.066162109375, "learning_rate": 1.991395268749529e-06, "loss": 37.7656, "step": 7452 }, { "epoch": 0.07055025984229608, "grad_norm": 458.8071594238281, "learning_rate": 1.9913912550329155e-06, "loss": 40.2188, "step": 7453 }, { "epoch": 0.07055972586401113, "grad_norm": 336.4267883300781, "learning_rate": 1.9913872403844593e-06, "loss": 19.1484, "step": 7454 }, { "epoch": 0.07056919188572619, "grad_norm": 347.7162170410156, "learning_rate": 1.991383224804163e-06, "loss": 24.0781, "step": 7455 }, { "epoch": 0.07057865790744124, "grad_norm": 315.583740234375, "learning_rate": 1.9913792082920316e-06, "loss": 28.5312, "step": 7456 }, { "epoch": 0.0705881239291563, "grad_norm": 219.91677856445312, "learning_rate": 1.991375190848068e-06, "loss": 27.75, "step": 7457 }, { "epoch": 0.07059758995087134, "grad_norm": 202.8128204345703, "learning_rate": 1.991371172472276e-06, "loss": 20.7344, "step": 7458 }, { "epoch": 0.0706070559725864, "grad_norm": 584.9964599609375, "learning_rate": 1.9913671531646597e-06, "loss": 45.3438, "step": 7459 }, { "epoch": 0.07061652199430145, "grad_norm": 607.0519409179688, "learning_rate": 1.9913631329252228e-06, "loss": 22.293, "step": 7460 }, { "epoch": 0.07062598801601651, "grad_norm": 236.73223876953125, "learning_rate": 1.991359111753969e-06, "loss": 22.5547, "step": 7461 }, { "epoch": 0.07063545403773157, "grad_norm": 371.2930603027344, "learning_rate": 1.991355089650902e-06, "loss": 32.8984, "step": 7462 }, { "epoch": 0.07064492005944661, "grad_norm": 262.52850341796875, "learning_rate": 1.991351066616026e-06, "loss": 22.1484, "step": 7463 }, { "epoch": 0.07065438608116167, "grad_norm": 337.3019714355469, "learning_rate": 1.9913470426493444e-06, "loss": 33.1562, "step": 7464 }, { "epoch": 0.07066385210287672, "grad_norm": 158.21029663085938, "learning_rate": 1.9913430177508612e-06, "loss": 15.4453, "step": 7465 }, { "epoch": 0.07067331812459178, "grad_norm": 258.78289794921875, "learning_rate": 1.9913389919205795e-06, "loss": 24.4688, "step": 7466 }, { "epoch": 0.07068278414630683, "grad_norm": 957.1556396484375, "learning_rate": 1.9913349651585044e-06, "loss": 18.4922, "step": 7467 }, { "epoch": 0.07069225016802189, "grad_norm": 290.12139892578125, "learning_rate": 1.9913309374646384e-06, "loss": 21.0859, "step": 7468 }, { "epoch": 0.07070171618973695, "grad_norm": 264.09613037109375, "learning_rate": 1.9913269088389862e-06, "loss": 19.6719, "step": 7469 }, { "epoch": 0.07071118221145199, "grad_norm": 411.2812805175781, "learning_rate": 1.9913228792815508e-06, "loss": 31.0859, "step": 7470 }, { "epoch": 0.07072064823316705, "grad_norm": 2.386233329772949, "learning_rate": 1.9913188487923363e-06, "loss": 0.8325, "step": 7471 }, { "epoch": 0.0707301142548821, "grad_norm": 695.2225952148438, "learning_rate": 1.991314817371347e-06, "loss": 57.9219, "step": 7472 }, { "epoch": 0.07073958027659716, "grad_norm": 336.0799865722656, "learning_rate": 1.991310785018586e-06, "loss": 47.4375, "step": 7473 }, { "epoch": 0.0707490462983122, "grad_norm": 222.18496704101562, "learning_rate": 1.9913067517340573e-06, "loss": 30.1719, "step": 7474 }, { "epoch": 0.07075851232002726, "grad_norm": 693.20068359375, "learning_rate": 1.991302717517765e-06, "loss": 32.3828, "step": 7475 }, { "epoch": 0.07076797834174231, "grad_norm": 429.52557373046875, "learning_rate": 1.9912986823697125e-06, "loss": 48.8125, "step": 7476 }, { "epoch": 0.07077744436345737, "grad_norm": 657.952392578125, "learning_rate": 1.9912946462899035e-06, "loss": 58.4062, "step": 7477 }, { "epoch": 0.07078691038517243, "grad_norm": 3.088881015777588, "learning_rate": 1.9912906092783427e-06, "loss": 0.8936, "step": 7478 }, { "epoch": 0.07079637640688748, "grad_norm": 386.4744567871094, "learning_rate": 1.9912865713350324e-06, "loss": 29.2344, "step": 7479 }, { "epoch": 0.07080584242860254, "grad_norm": 212.549072265625, "learning_rate": 1.991282532459978e-06, "loss": 18.9688, "step": 7480 }, { "epoch": 0.07081530845031758, "grad_norm": 421.8505554199219, "learning_rate": 1.9912784926531816e-06, "loss": 37.9219, "step": 7481 }, { "epoch": 0.07082477447203264, "grad_norm": 427.1658020019531, "learning_rate": 1.9912744519146487e-06, "loss": 53.5781, "step": 7482 }, { "epoch": 0.07083424049374769, "grad_norm": 287.09490966796875, "learning_rate": 1.9912704102443816e-06, "loss": 25.4609, "step": 7483 }, { "epoch": 0.07084370651546275, "grad_norm": 281.4053649902344, "learning_rate": 1.9912663676423855e-06, "loss": 27.5859, "step": 7484 }, { "epoch": 0.0708531725371778, "grad_norm": 2.4766476154327393, "learning_rate": 1.991262324108663e-06, "loss": 0.7642, "step": 7485 }, { "epoch": 0.07086263855889285, "grad_norm": 228.0844268798828, "learning_rate": 1.9912582796432185e-06, "loss": 29.4609, "step": 7486 }, { "epoch": 0.07087210458060791, "grad_norm": 328.5888977050781, "learning_rate": 1.991254234246056e-06, "loss": 33.7344, "step": 7487 }, { "epoch": 0.07088157060232296, "grad_norm": 229.33839416503906, "learning_rate": 1.9912501879171784e-06, "loss": 18.4688, "step": 7488 }, { "epoch": 0.07089103662403802, "grad_norm": 243.5563507080078, "learning_rate": 1.991246140656591e-06, "loss": 23.3906, "step": 7489 }, { "epoch": 0.07090050264575307, "grad_norm": 676.9498291015625, "learning_rate": 1.991242092464296e-06, "loss": 45.2344, "step": 7490 }, { "epoch": 0.07090996866746813, "grad_norm": 3.4449708461761475, "learning_rate": 1.991238043340298e-06, "loss": 1.0835, "step": 7491 }, { "epoch": 0.07091943468918317, "grad_norm": 401.778076171875, "learning_rate": 1.991233993284601e-06, "loss": 57.875, "step": 7492 }, { "epoch": 0.07092890071089823, "grad_norm": 179.29722595214844, "learning_rate": 1.9912299422972083e-06, "loss": 22.8359, "step": 7493 }, { "epoch": 0.07093836673261328, "grad_norm": 235.78250122070312, "learning_rate": 1.991225890378124e-06, "loss": 22.8203, "step": 7494 }, { "epoch": 0.07094783275432834, "grad_norm": 437.3791198730469, "learning_rate": 1.991221837527352e-06, "loss": 38.6719, "step": 7495 }, { "epoch": 0.0709572987760434, "grad_norm": 590.5224609375, "learning_rate": 1.991217783744896e-06, "loss": 23.8203, "step": 7496 }, { "epoch": 0.07096676479775844, "grad_norm": 354.2182922363281, "learning_rate": 1.9912137290307594e-06, "loss": 32.3281, "step": 7497 }, { "epoch": 0.0709762308194735, "grad_norm": 315.2845764160156, "learning_rate": 1.9912096733849463e-06, "loss": 39.75, "step": 7498 }, { "epoch": 0.07098569684118855, "grad_norm": 398.7052001953125, "learning_rate": 1.991205616807461e-06, "loss": 19.7656, "step": 7499 }, { "epoch": 0.07099516286290361, "grad_norm": 254.3289031982422, "learning_rate": 1.9912015592983067e-06, "loss": 27.8594, "step": 7500 }, { "epoch": 0.07100462888461866, "grad_norm": 243.35340881347656, "learning_rate": 1.9911975008574874e-06, "loss": 19.3672, "step": 7501 }, { "epoch": 0.07101409490633372, "grad_norm": 323.45721435546875, "learning_rate": 1.991193441485007e-06, "loss": 22.4766, "step": 7502 }, { "epoch": 0.07102356092804876, "grad_norm": 3.121488571166992, "learning_rate": 1.9911893811808695e-06, "loss": 0.9658, "step": 7503 }, { "epoch": 0.07103302694976382, "grad_norm": 367.06036376953125, "learning_rate": 1.991185319945078e-06, "loss": 20.2812, "step": 7504 }, { "epoch": 0.07104249297147888, "grad_norm": 450.65777587890625, "learning_rate": 1.991181257777637e-06, "loss": 33.8438, "step": 7505 }, { "epoch": 0.07105195899319393, "grad_norm": 258.5393981933594, "learning_rate": 1.99117719467855e-06, "loss": 19.375, "step": 7506 }, { "epoch": 0.07106142501490899, "grad_norm": 331.35601806640625, "learning_rate": 1.991173130647821e-06, "loss": 41.2578, "step": 7507 }, { "epoch": 0.07107089103662403, "grad_norm": 643.834228515625, "learning_rate": 1.9911690656854542e-06, "loss": 57.7656, "step": 7508 }, { "epoch": 0.0710803570583391, "grad_norm": 380.4493408203125, "learning_rate": 1.991164999791452e-06, "loss": 26.6016, "step": 7509 }, { "epoch": 0.07108982308005414, "grad_norm": 149.14453125, "learning_rate": 1.99116093296582e-06, "loss": 8.3945, "step": 7510 }, { "epoch": 0.0710992891017692, "grad_norm": 1559.95556640625, "learning_rate": 1.991156865208561e-06, "loss": 46.8047, "step": 7511 }, { "epoch": 0.07110875512348426, "grad_norm": 558.5841674804688, "learning_rate": 1.9911527965196784e-06, "loss": 28.4609, "step": 7512 }, { "epoch": 0.0711182211451993, "grad_norm": 289.2507019042969, "learning_rate": 1.9911487268991774e-06, "loss": 21.7422, "step": 7513 }, { "epoch": 0.07112768716691437, "grad_norm": 364.2064208984375, "learning_rate": 1.991144656347061e-06, "loss": 30.75, "step": 7514 }, { "epoch": 0.07113715318862941, "grad_norm": 204.95802307128906, "learning_rate": 1.991140584863333e-06, "loss": 20.9492, "step": 7515 }, { "epoch": 0.07114661921034447, "grad_norm": 291.97039794921875, "learning_rate": 1.991136512447997e-06, "loss": 29.4844, "step": 7516 }, { "epoch": 0.07115608523205952, "grad_norm": 811.9486694335938, "learning_rate": 1.9911324391010577e-06, "loss": 24.2539, "step": 7517 }, { "epoch": 0.07116555125377458, "grad_norm": 500.98046875, "learning_rate": 1.991128364822518e-06, "loss": 67.1211, "step": 7518 }, { "epoch": 0.07117501727548962, "grad_norm": 425.7456359863281, "learning_rate": 1.991124289612382e-06, "loss": 54.1875, "step": 7519 }, { "epoch": 0.07118448329720468, "grad_norm": 528.4595336914062, "learning_rate": 1.991120213470654e-06, "loss": 33.6367, "step": 7520 }, { "epoch": 0.07119394931891974, "grad_norm": 276.7221984863281, "learning_rate": 1.991116136397337e-06, "loss": 35.2344, "step": 7521 }, { "epoch": 0.07120341534063479, "grad_norm": 339.92303466796875, "learning_rate": 1.991112058392436e-06, "loss": 13.7109, "step": 7522 }, { "epoch": 0.07121288136234985, "grad_norm": 580.5543212890625, "learning_rate": 1.9911079794559537e-06, "loss": 39.3438, "step": 7523 }, { "epoch": 0.0712223473840649, "grad_norm": 195.14402770996094, "learning_rate": 1.991103899587895e-06, "loss": 18.4844, "step": 7524 }, { "epoch": 0.07123181340577996, "grad_norm": 516.8076171875, "learning_rate": 1.9910998187882623e-06, "loss": 20.6875, "step": 7525 }, { "epoch": 0.071241279427495, "grad_norm": 529.9879760742188, "learning_rate": 1.9910957370570604e-06, "loss": 30.7031, "step": 7526 }, { "epoch": 0.07125074544921006, "grad_norm": 3.1344783306121826, "learning_rate": 1.991091654394293e-06, "loss": 0.9546, "step": 7527 }, { "epoch": 0.07126021147092511, "grad_norm": 661.7390747070312, "learning_rate": 1.991087570799964e-06, "loss": 43.9844, "step": 7528 }, { "epoch": 0.07126967749264017, "grad_norm": 553.7164306640625, "learning_rate": 1.9910834862740773e-06, "loss": 42.5391, "step": 7529 }, { "epoch": 0.07127914351435523, "grad_norm": 992.8172607421875, "learning_rate": 1.9910794008166364e-06, "loss": 53.625, "step": 7530 }, { "epoch": 0.07128860953607027, "grad_norm": 613.691162109375, "learning_rate": 1.9910753144276457e-06, "loss": 55.9219, "step": 7531 }, { "epoch": 0.07129807555778533, "grad_norm": 514.1185913085938, "learning_rate": 1.9910712271071084e-06, "loss": 46.8125, "step": 7532 }, { "epoch": 0.07130754157950038, "grad_norm": 483.29058837890625, "learning_rate": 1.9910671388550284e-06, "loss": 52.5469, "step": 7533 }, { "epoch": 0.07131700760121544, "grad_norm": 372.5434265136719, "learning_rate": 1.99106304967141e-06, "loss": 20.6562, "step": 7534 }, { "epoch": 0.07132647362293049, "grad_norm": 849.6724243164062, "learning_rate": 1.991058959556257e-06, "loss": 56.4062, "step": 7535 }, { "epoch": 0.07133593964464555, "grad_norm": 700.949462890625, "learning_rate": 1.9910548685095725e-06, "loss": 34.4727, "step": 7536 }, { "epoch": 0.07134540566636059, "grad_norm": 3.351097345352173, "learning_rate": 1.991050776531361e-06, "loss": 0.7992, "step": 7537 }, { "epoch": 0.07135487168807565, "grad_norm": 762.6841430664062, "learning_rate": 1.9910466836216266e-06, "loss": 38.9062, "step": 7538 }, { "epoch": 0.07136433770979071, "grad_norm": 287.3771667480469, "learning_rate": 1.991042589780373e-06, "loss": 25.9297, "step": 7539 }, { "epoch": 0.07137380373150576, "grad_norm": 258.9764099121094, "learning_rate": 1.9910384950076035e-06, "loss": 18.1875, "step": 7540 }, { "epoch": 0.07138326975322082, "grad_norm": 177.09970092773438, "learning_rate": 1.991034399303322e-06, "loss": 21.8984, "step": 7541 }, { "epoch": 0.07139273577493586, "grad_norm": 250.75469970703125, "learning_rate": 1.991030302667533e-06, "loss": 22.5078, "step": 7542 }, { "epoch": 0.07140220179665092, "grad_norm": 3.5665347576141357, "learning_rate": 1.99102620510024e-06, "loss": 0.937, "step": 7543 }, { "epoch": 0.07141166781836597, "grad_norm": 443.7077331542969, "learning_rate": 1.9910221066014468e-06, "loss": 24.6562, "step": 7544 }, { "epoch": 0.07142113384008103, "grad_norm": 346.6645202636719, "learning_rate": 1.9910180071711573e-06, "loss": 41.625, "step": 7545 }, { "epoch": 0.07143059986179608, "grad_norm": 205.47845458984375, "learning_rate": 1.991013906809375e-06, "loss": 15.9688, "step": 7546 }, { "epoch": 0.07144006588351114, "grad_norm": 277.9532165527344, "learning_rate": 1.9910098055161043e-06, "loss": 21.0781, "step": 7547 }, { "epoch": 0.0714495319052262, "grad_norm": 961.85009765625, "learning_rate": 1.991005703291349e-06, "loss": 85.9062, "step": 7548 }, { "epoch": 0.07145899792694124, "grad_norm": 584.6773071289062, "learning_rate": 1.9910016001351127e-06, "loss": 21.0547, "step": 7549 }, { "epoch": 0.0714684639486563, "grad_norm": 239.01707458496094, "learning_rate": 1.9909974960473994e-06, "loss": 27.3281, "step": 7550 }, { "epoch": 0.07147792997037135, "grad_norm": 537.2697143554688, "learning_rate": 1.990993391028213e-06, "loss": 39.375, "step": 7551 }, { "epoch": 0.07148739599208641, "grad_norm": 427.29949951171875, "learning_rate": 1.9909892850775574e-06, "loss": 29.8438, "step": 7552 }, { "epoch": 0.07149686201380145, "grad_norm": 298.5392761230469, "learning_rate": 1.990985178195436e-06, "loss": 24.25, "step": 7553 }, { "epoch": 0.07150632803551651, "grad_norm": 764.7437744140625, "learning_rate": 1.9909810703818533e-06, "loss": 46.5781, "step": 7554 }, { "epoch": 0.07151579405723156, "grad_norm": 214.55145263671875, "learning_rate": 1.9909769616368128e-06, "loss": 26.8359, "step": 7555 }, { "epoch": 0.07152526007894662, "grad_norm": 253.7930908203125, "learning_rate": 1.990972851960318e-06, "loss": 29.8672, "step": 7556 }, { "epoch": 0.07153472610066168, "grad_norm": 462.6033630371094, "learning_rate": 1.990968741352374e-06, "loss": 48.3203, "step": 7557 }, { "epoch": 0.07154419212237673, "grad_norm": 360.2895202636719, "learning_rate": 1.990964629812983e-06, "loss": 15.1367, "step": 7558 }, { "epoch": 0.07155365814409179, "grad_norm": 533.6305541992188, "learning_rate": 1.99096051734215e-06, "loss": 34.75, "step": 7559 }, { "epoch": 0.07156312416580683, "grad_norm": 559.9769287109375, "learning_rate": 1.9909564039398788e-06, "loss": 29.1172, "step": 7560 }, { "epoch": 0.07157259018752189, "grad_norm": 676.2191772460938, "learning_rate": 1.990952289606173e-06, "loss": 55.6953, "step": 7561 }, { "epoch": 0.07158205620923694, "grad_norm": 372.63348388671875, "learning_rate": 1.990948174341036e-06, "loss": 30.6406, "step": 7562 }, { "epoch": 0.071591522230952, "grad_norm": 284.4420166015625, "learning_rate": 1.990944058144473e-06, "loss": 22.4766, "step": 7563 }, { "epoch": 0.07160098825266706, "grad_norm": 571.2545776367188, "learning_rate": 1.9909399410164864e-06, "loss": 60.4062, "step": 7564 }, { "epoch": 0.0716104542743821, "grad_norm": 461.0743713378906, "learning_rate": 1.990935822957081e-06, "loss": 33.8438, "step": 7565 }, { "epoch": 0.07161992029609716, "grad_norm": 262.89276123046875, "learning_rate": 1.9909317039662607e-06, "loss": 18.0469, "step": 7566 }, { "epoch": 0.07162938631781221, "grad_norm": 4.599905967712402, "learning_rate": 1.9909275840440283e-06, "loss": 0.938, "step": 7567 }, { "epoch": 0.07163885233952727, "grad_norm": 494.8286437988281, "learning_rate": 1.9909234631903892e-06, "loss": 45.8477, "step": 7568 }, { "epoch": 0.07164831836124232, "grad_norm": 568.5359497070312, "learning_rate": 1.990919341405346e-06, "loss": 22.875, "step": 7569 }, { "epoch": 0.07165778438295738, "grad_norm": 242.5527801513672, "learning_rate": 1.990915218688903e-06, "loss": 24.1094, "step": 7570 }, { "epoch": 0.07166725040467242, "grad_norm": 247.5305938720703, "learning_rate": 1.9909110950410646e-06, "loss": 22.8906, "step": 7571 }, { "epoch": 0.07167671642638748, "grad_norm": 345.92462158203125, "learning_rate": 1.990906970461834e-06, "loss": 20.7578, "step": 7572 }, { "epoch": 0.07168618244810254, "grad_norm": 710.6264038085938, "learning_rate": 1.9909028449512155e-06, "loss": 67.75, "step": 7573 }, { "epoch": 0.07169564846981759, "grad_norm": 3.1058802604675293, "learning_rate": 1.9908987185092126e-06, "loss": 0.8813, "step": 7574 }, { "epoch": 0.07170511449153265, "grad_norm": 766.076904296875, "learning_rate": 1.9908945911358296e-06, "loss": 69.1719, "step": 7575 }, { "epoch": 0.0717145805132477, "grad_norm": 811.8724975585938, "learning_rate": 1.9908904628310695e-06, "loss": 69.0156, "step": 7576 }, { "epoch": 0.07172404653496275, "grad_norm": 3.0067429542541504, "learning_rate": 1.9908863335949375e-06, "loss": 0.9683, "step": 7577 }, { "epoch": 0.0717335125566778, "grad_norm": 1122.7232666015625, "learning_rate": 1.9908822034274367e-06, "loss": 73.3125, "step": 7578 }, { "epoch": 0.07174297857839286, "grad_norm": 422.63458251953125, "learning_rate": 1.990878072328571e-06, "loss": 45.0938, "step": 7579 }, { "epoch": 0.0717524446001079, "grad_norm": 1287.688232421875, "learning_rate": 1.9908739402983443e-06, "loss": 55.9375, "step": 7580 }, { "epoch": 0.07176191062182297, "grad_norm": 361.4249572753906, "learning_rate": 1.9908698073367605e-06, "loss": 32.7812, "step": 7581 }, { "epoch": 0.07177137664353803, "grad_norm": 334.2077331542969, "learning_rate": 1.9908656734438234e-06, "loss": 52.6016, "step": 7582 }, { "epoch": 0.07178084266525307, "grad_norm": 571.105224609375, "learning_rate": 1.9908615386195376e-06, "loss": 25.0312, "step": 7583 }, { "epoch": 0.07179030868696813, "grad_norm": 889.7496337890625, "learning_rate": 1.990857402863906e-06, "loss": 26.625, "step": 7584 }, { "epoch": 0.07179977470868318, "grad_norm": 849.9676513671875, "learning_rate": 1.990853266176933e-06, "loss": 59.3906, "step": 7585 }, { "epoch": 0.07180924073039824, "grad_norm": 702.5534057617188, "learning_rate": 1.9908491285586224e-06, "loss": 36.4922, "step": 7586 }, { "epoch": 0.07181870675211328, "grad_norm": 485.8114929199219, "learning_rate": 1.990844990008978e-06, "loss": 26.7266, "step": 7587 }, { "epoch": 0.07182817277382834, "grad_norm": 538.670654296875, "learning_rate": 1.9908408505280037e-06, "loss": 53.2812, "step": 7588 }, { "epoch": 0.07183763879554339, "grad_norm": 316.6219787597656, "learning_rate": 1.990836710115704e-06, "loss": 36.4844, "step": 7589 }, { "epoch": 0.07184710481725845, "grad_norm": 469.3204345703125, "learning_rate": 1.9908325687720816e-06, "loss": 48.9375, "step": 7590 }, { "epoch": 0.07185657083897351, "grad_norm": 2.9238390922546387, "learning_rate": 1.990828426497141e-06, "loss": 0.8948, "step": 7591 }, { "epoch": 0.07186603686068856, "grad_norm": 1306.8033447265625, "learning_rate": 1.9908242832908865e-06, "loss": 69.6953, "step": 7592 }, { "epoch": 0.07187550288240362, "grad_norm": 530.9364013671875, "learning_rate": 1.990820139153322e-06, "loss": 11.0352, "step": 7593 }, { "epoch": 0.07188496890411866, "grad_norm": 234.44483947753906, "learning_rate": 1.9908159940844503e-06, "loss": 19.3672, "step": 7594 }, { "epoch": 0.07189443492583372, "grad_norm": 551.1989135742188, "learning_rate": 1.9908118480842762e-06, "loss": 56.75, "step": 7595 }, { "epoch": 0.07190390094754877, "grad_norm": 276.1125793457031, "learning_rate": 1.9908077011528034e-06, "loss": 24.25, "step": 7596 }, { "epoch": 0.07191336696926383, "grad_norm": 1341.75830078125, "learning_rate": 1.990803553290036e-06, "loss": 42.5, "step": 7597 }, { "epoch": 0.07192283299097887, "grad_norm": 466.9098815917969, "learning_rate": 1.990799404495978e-06, "loss": 16.2422, "step": 7598 }, { "epoch": 0.07193229901269393, "grad_norm": 239.277099609375, "learning_rate": 1.9907952547706323e-06, "loss": 19.9062, "step": 7599 }, { "epoch": 0.071941765034409, "grad_norm": 450.53253173828125, "learning_rate": 1.9907911041140043e-06, "loss": 45.5156, "step": 7600 }, { "epoch": 0.07195123105612404, "grad_norm": 431.4561767578125, "learning_rate": 1.9907869525260966e-06, "loss": 31.8125, "step": 7601 }, { "epoch": 0.0719606970778391, "grad_norm": 245.55784606933594, "learning_rate": 1.9907828000069134e-06, "loss": 29.5938, "step": 7602 }, { "epoch": 0.07197016309955415, "grad_norm": 425.8849182128906, "learning_rate": 1.9907786465564595e-06, "loss": 22.1797, "step": 7603 }, { "epoch": 0.0719796291212692, "grad_norm": 227.21929931640625, "learning_rate": 1.9907744921747377e-06, "loss": 20.4453, "step": 7604 }, { "epoch": 0.07198909514298425, "grad_norm": 529.9819946289062, "learning_rate": 1.9907703368617523e-06, "loss": 30.2578, "step": 7605 }, { "epoch": 0.07199856116469931, "grad_norm": 515.3013916015625, "learning_rate": 1.9907661806175076e-06, "loss": 53.1406, "step": 7606 }, { "epoch": 0.07200802718641437, "grad_norm": 364.48419189453125, "learning_rate": 1.9907620234420064e-06, "loss": 30.4062, "step": 7607 }, { "epoch": 0.07201749320812942, "grad_norm": 498.19049072265625, "learning_rate": 1.990757865335254e-06, "loss": 39.6172, "step": 7608 }, { "epoch": 0.07202695922984448, "grad_norm": 450.4951477050781, "learning_rate": 1.9907537062972536e-06, "loss": 31.8516, "step": 7609 }, { "epoch": 0.07203642525155952, "grad_norm": 545.3037719726562, "learning_rate": 1.990749546328009e-06, "loss": 39.3438, "step": 7610 }, { "epoch": 0.07204589127327458, "grad_norm": 3.0689587593078613, "learning_rate": 1.9907453854275243e-06, "loss": 0.7441, "step": 7611 }, { "epoch": 0.07205535729498963, "grad_norm": 269.17596435546875, "learning_rate": 1.9907412235958034e-06, "loss": 21.0703, "step": 7612 }, { "epoch": 0.07206482331670469, "grad_norm": 390.932373046875, "learning_rate": 1.9907370608328503e-06, "loss": 22.3516, "step": 7613 }, { "epoch": 0.07207428933841974, "grad_norm": 637.3209838867188, "learning_rate": 1.9907328971386687e-06, "loss": 41.3906, "step": 7614 }, { "epoch": 0.0720837553601348, "grad_norm": 514.2140502929688, "learning_rate": 1.9907287325132625e-06, "loss": 25.4219, "step": 7615 }, { "epoch": 0.07209322138184986, "grad_norm": 867.0182495117188, "learning_rate": 1.990724566956636e-06, "loss": 31.9531, "step": 7616 }, { "epoch": 0.0721026874035649, "grad_norm": 3.556351661682129, "learning_rate": 1.9907204004687927e-06, "loss": 0.9634, "step": 7617 }, { "epoch": 0.07211215342527996, "grad_norm": 272.9183349609375, "learning_rate": 1.990716233049737e-06, "loss": 24.5156, "step": 7618 }, { "epoch": 0.07212161944699501, "grad_norm": 511.60369873046875, "learning_rate": 1.990712064699472e-06, "loss": 14.0352, "step": 7619 }, { "epoch": 0.07213108546871007, "grad_norm": 698.2618408203125, "learning_rate": 1.9907078954180025e-06, "loss": 35.9688, "step": 7620 }, { "epoch": 0.07214055149042511, "grad_norm": 517.1539916992188, "learning_rate": 1.990703725205332e-06, "loss": 36.4531, "step": 7621 }, { "epoch": 0.07215001751214017, "grad_norm": 231.24635314941406, "learning_rate": 1.9906995540614642e-06, "loss": 22.7812, "step": 7622 }, { "epoch": 0.07215948353385522, "grad_norm": 750.360107421875, "learning_rate": 1.9906953819864033e-06, "loss": 17.9688, "step": 7623 }, { "epoch": 0.07216894955557028, "grad_norm": 229.04315185546875, "learning_rate": 1.9906912089801536e-06, "loss": 28.3359, "step": 7624 }, { "epoch": 0.07217841557728534, "grad_norm": 3.4716289043426514, "learning_rate": 1.990687035042718e-06, "loss": 0.9473, "step": 7625 }, { "epoch": 0.07218788159900039, "grad_norm": 568.2807006835938, "learning_rate": 1.990682860174101e-06, "loss": 23.3203, "step": 7626 }, { "epoch": 0.07219734762071545, "grad_norm": 719.0282592773438, "learning_rate": 1.9906786843743073e-06, "loss": 23.0703, "step": 7627 }, { "epoch": 0.07220681364243049, "grad_norm": 222.8924560546875, "learning_rate": 1.9906745076433395e-06, "loss": 24.7656, "step": 7628 }, { "epoch": 0.07221627966414555, "grad_norm": 476.46466064453125, "learning_rate": 1.9906703299812022e-06, "loss": 41.2969, "step": 7629 }, { "epoch": 0.0722257456858606, "grad_norm": 251.43089294433594, "learning_rate": 1.9906661513878996e-06, "loss": 21.0625, "step": 7630 }, { "epoch": 0.07223521170757566, "grad_norm": 295.2666015625, "learning_rate": 1.990661971863435e-06, "loss": 21.5312, "step": 7631 }, { "epoch": 0.0722446777292907, "grad_norm": 283.6393737792969, "learning_rate": 1.9906577914078126e-06, "loss": 18.4219, "step": 7632 }, { "epoch": 0.07225414375100576, "grad_norm": 200.98802185058594, "learning_rate": 1.9906536100210364e-06, "loss": 17.0391, "step": 7633 }, { "epoch": 0.07226360977272082, "grad_norm": 911.3287963867188, "learning_rate": 1.99064942770311e-06, "loss": 69.5625, "step": 7634 }, { "epoch": 0.07227307579443587, "grad_norm": 795.3517456054688, "learning_rate": 1.9906452444540375e-06, "loss": 55.5938, "step": 7635 }, { "epoch": 0.07228254181615093, "grad_norm": 1243.19140625, "learning_rate": 1.9906410602738232e-06, "loss": 31.4531, "step": 7636 }, { "epoch": 0.07229200783786598, "grad_norm": 865.4872436523438, "learning_rate": 1.9906368751624707e-06, "loss": 53.5703, "step": 7637 }, { "epoch": 0.07230147385958104, "grad_norm": 594.3308715820312, "learning_rate": 1.9906326891199843e-06, "loss": 56.9219, "step": 7638 }, { "epoch": 0.07231093988129608, "grad_norm": 2282.688232421875, "learning_rate": 1.990628502146367e-06, "loss": 51.1484, "step": 7639 }, { "epoch": 0.07232040590301114, "grad_norm": 286.97314453125, "learning_rate": 1.9906243142416237e-06, "loss": 20.2891, "step": 7640 }, { "epoch": 0.07232987192472619, "grad_norm": 487.7582702636719, "learning_rate": 1.990620125405758e-06, "loss": 39.3906, "step": 7641 }, { "epoch": 0.07233933794644125, "grad_norm": 256.0733337402344, "learning_rate": 1.9906159356387737e-06, "loss": 26.6094, "step": 7642 }, { "epoch": 0.07234880396815631, "grad_norm": 208.0385284423828, "learning_rate": 1.9906117449406753e-06, "loss": 22.3828, "step": 7643 }, { "epoch": 0.07235826998987135, "grad_norm": 453.10260009765625, "learning_rate": 1.9906075533114657e-06, "loss": 24.4766, "step": 7644 }, { "epoch": 0.07236773601158641, "grad_norm": 616.23974609375, "learning_rate": 1.99060336075115e-06, "loss": 61.0312, "step": 7645 }, { "epoch": 0.07237720203330146, "grad_norm": 259.3553771972656, "learning_rate": 1.990599167259731e-06, "loss": 23.3906, "step": 7646 }, { "epoch": 0.07238666805501652, "grad_norm": 295.90618896484375, "learning_rate": 1.9905949728372137e-06, "loss": 31.0156, "step": 7647 }, { "epoch": 0.07239613407673157, "grad_norm": 449.4529724121094, "learning_rate": 1.990590777483601e-06, "loss": 39.9688, "step": 7648 }, { "epoch": 0.07240560009844663, "grad_norm": 1312.597900390625, "learning_rate": 1.9905865811988977e-06, "loss": 43.9688, "step": 7649 }, { "epoch": 0.07241506612016169, "grad_norm": 307.9249572753906, "learning_rate": 1.9905823839831077e-06, "loss": 10.4336, "step": 7650 }, { "epoch": 0.07242453214187673, "grad_norm": 479.5008239746094, "learning_rate": 1.9905781858362346e-06, "loss": 37.2266, "step": 7651 }, { "epoch": 0.07243399816359179, "grad_norm": 386.7599792480469, "learning_rate": 1.9905739867582824e-06, "loss": 23.0859, "step": 7652 }, { "epoch": 0.07244346418530684, "grad_norm": 408.5174560546875, "learning_rate": 1.990569786749255e-06, "loss": 30.7109, "step": 7653 }, { "epoch": 0.0724529302070219, "grad_norm": 3.23956298828125, "learning_rate": 1.9905655858091562e-06, "loss": 1.0239, "step": 7654 }, { "epoch": 0.07246239622873694, "grad_norm": 2.435913324356079, "learning_rate": 1.9905613839379906e-06, "loss": 0.8569, "step": 7655 }, { "epoch": 0.072471862250452, "grad_norm": 383.08489990234375, "learning_rate": 1.9905571811357616e-06, "loss": 62.0938, "step": 7656 }, { "epoch": 0.07248132827216705, "grad_norm": 335.65716552734375, "learning_rate": 1.9905529774024737e-06, "loss": 27.9609, "step": 7657 }, { "epoch": 0.07249079429388211, "grad_norm": 244.8297119140625, "learning_rate": 1.9905487727381296e-06, "loss": 17.1953, "step": 7658 }, { "epoch": 0.07250026031559717, "grad_norm": 742.3485717773438, "learning_rate": 1.9905445671427346e-06, "loss": 53.2812, "step": 7659 }, { "epoch": 0.07250972633731222, "grad_norm": 459.9317321777344, "learning_rate": 1.9905403606162924e-06, "loss": 25.2812, "step": 7660 }, { "epoch": 0.07251919235902728, "grad_norm": 3.1523725986480713, "learning_rate": 1.9905361531588064e-06, "loss": 0.9033, "step": 7661 }, { "epoch": 0.07252865838074232, "grad_norm": 336.8829040527344, "learning_rate": 1.990531944770281e-06, "loss": 45.8281, "step": 7662 }, { "epoch": 0.07253812440245738, "grad_norm": 368.6029968261719, "learning_rate": 1.9905277354507195e-06, "loss": 33.7969, "step": 7663 }, { "epoch": 0.07254759042417243, "grad_norm": 566.5282592773438, "learning_rate": 1.9905235252001267e-06, "loss": 40.1094, "step": 7664 }, { "epoch": 0.07255705644588749, "grad_norm": 184.63026428222656, "learning_rate": 1.9905193140185066e-06, "loss": 29.6719, "step": 7665 }, { "epoch": 0.07256652246760253, "grad_norm": 347.4270935058594, "learning_rate": 1.9905151019058622e-06, "loss": 43.1719, "step": 7666 }, { "epoch": 0.0725759884893176, "grad_norm": 632.411865234375, "learning_rate": 1.9905108888621986e-06, "loss": 37.0156, "step": 7667 }, { "epoch": 0.07258545451103265, "grad_norm": 1093.432373046875, "learning_rate": 1.9905066748875187e-06, "loss": 71.4375, "step": 7668 }, { "epoch": 0.0725949205327477, "grad_norm": 485.34173583984375, "learning_rate": 1.990502459981827e-06, "loss": 23.2891, "step": 7669 }, { "epoch": 0.07260438655446276, "grad_norm": 2.7469990253448486, "learning_rate": 1.9904982441451275e-06, "loss": 0.8574, "step": 7670 }, { "epoch": 0.0726138525761778, "grad_norm": 447.9671936035156, "learning_rate": 1.9904940273774246e-06, "loss": 45.7344, "step": 7671 }, { "epoch": 0.07262331859789287, "grad_norm": 237.0879364013672, "learning_rate": 1.990489809678721e-06, "loss": 25.0234, "step": 7672 }, { "epoch": 0.07263278461960791, "grad_norm": 694.0286254882812, "learning_rate": 1.990485591049022e-06, "loss": 27.5234, "step": 7673 }, { "epoch": 0.07264225064132297, "grad_norm": 175.84646606445312, "learning_rate": 1.9904813714883307e-06, "loss": 16.6641, "step": 7674 }, { "epoch": 0.07265171666303802, "grad_norm": 293.7280578613281, "learning_rate": 1.9904771509966513e-06, "loss": 27.5859, "step": 7675 }, { "epoch": 0.07266118268475308, "grad_norm": 334.0005798339844, "learning_rate": 1.990472929573988e-06, "loss": 26.2656, "step": 7676 }, { "epoch": 0.07267064870646814, "grad_norm": 322.2906188964844, "learning_rate": 1.9904687072203445e-06, "loss": 21.4375, "step": 7677 }, { "epoch": 0.07268011472818318, "grad_norm": 3.3601090908050537, "learning_rate": 1.9904644839357243e-06, "loss": 1.0142, "step": 7678 }, { "epoch": 0.07268958074989824, "grad_norm": 389.1446838378906, "learning_rate": 1.9904602597201324e-06, "loss": 34.9453, "step": 7679 }, { "epoch": 0.07269904677161329, "grad_norm": 658.0618896484375, "learning_rate": 1.9904560345735724e-06, "loss": 42.4531, "step": 7680 }, { "epoch": 0.07270851279332835, "grad_norm": 277.7979431152344, "learning_rate": 1.9904518084960483e-06, "loss": 26.9922, "step": 7681 }, { "epoch": 0.0727179788150434, "grad_norm": 246.89181518554688, "learning_rate": 1.9904475814875638e-06, "loss": 26.8438, "step": 7682 }, { "epoch": 0.07272744483675846, "grad_norm": 245.27906799316406, "learning_rate": 1.9904433535481227e-06, "loss": 21.8594, "step": 7683 }, { "epoch": 0.0727369108584735, "grad_norm": 338.4667053222656, "learning_rate": 1.9904391246777298e-06, "loss": 18.8281, "step": 7684 }, { "epoch": 0.07274637688018856, "grad_norm": 512.8969116210938, "learning_rate": 1.990434894876388e-06, "loss": 28.457, "step": 7685 }, { "epoch": 0.07275584290190362, "grad_norm": 234.09576416015625, "learning_rate": 1.990430664144102e-06, "loss": 23.8594, "step": 7686 }, { "epoch": 0.07276530892361867, "grad_norm": 600.8047485351562, "learning_rate": 1.9904264324808756e-06, "loss": 34.7734, "step": 7687 }, { "epoch": 0.07277477494533373, "grad_norm": 280.4901428222656, "learning_rate": 1.990422199886713e-06, "loss": 23.2031, "step": 7688 }, { "epoch": 0.07278424096704877, "grad_norm": 292.658447265625, "learning_rate": 1.990417966361618e-06, "loss": 31.0703, "step": 7689 }, { "epoch": 0.07279370698876383, "grad_norm": 233.83648681640625, "learning_rate": 1.9904137319055942e-06, "loss": 18.8359, "step": 7690 }, { "epoch": 0.07280317301047888, "grad_norm": 249.70205688476562, "learning_rate": 1.990409496518646e-06, "loss": 39.2344, "step": 7691 }, { "epoch": 0.07281263903219394, "grad_norm": 254.65052795410156, "learning_rate": 1.9904052602007774e-06, "loss": 22.0859, "step": 7692 }, { "epoch": 0.072822105053909, "grad_norm": 156.40245056152344, "learning_rate": 1.9904010229519923e-06, "loss": 21.2812, "step": 7693 }, { "epoch": 0.07283157107562405, "grad_norm": 204.80722045898438, "learning_rate": 1.990396784772295e-06, "loss": 25.9844, "step": 7694 }, { "epoch": 0.0728410370973391, "grad_norm": 3.2590298652648926, "learning_rate": 1.9903925456616886e-06, "loss": 0.9326, "step": 7695 }, { "epoch": 0.07285050311905415, "grad_norm": 191.26771545410156, "learning_rate": 1.990388305620178e-06, "loss": 19.5469, "step": 7696 }, { "epoch": 0.07285996914076921, "grad_norm": 372.13824462890625, "learning_rate": 1.990384064647766e-06, "loss": 42.9453, "step": 7697 }, { "epoch": 0.07286943516248426, "grad_norm": 324.00885009765625, "learning_rate": 1.9903798227444584e-06, "loss": 31.75, "step": 7698 }, { "epoch": 0.07287890118419932, "grad_norm": 188.19778442382812, "learning_rate": 1.990375579910258e-06, "loss": 19.8828, "step": 7699 }, { "epoch": 0.07288836720591436, "grad_norm": 378.6837158203125, "learning_rate": 1.990371336145169e-06, "loss": 15.8125, "step": 7700 }, { "epoch": 0.07289783322762942, "grad_norm": 950.8076171875, "learning_rate": 1.990367091449195e-06, "loss": 64.5469, "step": 7701 }, { "epoch": 0.07290729924934448, "grad_norm": 701.5776977539062, "learning_rate": 1.990362845822341e-06, "loss": 21.3203, "step": 7702 }, { "epoch": 0.07291676527105953, "grad_norm": 526.21484375, "learning_rate": 1.9903585992646096e-06, "loss": 37.125, "step": 7703 }, { "epoch": 0.07292623129277459, "grad_norm": 258.97271728515625, "learning_rate": 1.990354351776006e-06, "loss": 28.2969, "step": 7704 }, { "epoch": 0.07293569731448964, "grad_norm": 485.4208068847656, "learning_rate": 1.9903501033565335e-06, "loss": 56.7031, "step": 7705 }, { "epoch": 0.0729451633362047, "grad_norm": 3.766530752182007, "learning_rate": 1.9903458540061964e-06, "loss": 0.813, "step": 7706 }, { "epoch": 0.07295462935791974, "grad_norm": 556.9954833984375, "learning_rate": 1.990341603724999e-06, "loss": 52.0234, "step": 7707 }, { "epoch": 0.0729640953796348, "grad_norm": 259.781982421875, "learning_rate": 1.9903373525129443e-06, "loss": 26.4688, "step": 7708 }, { "epoch": 0.07297356140134985, "grad_norm": 373.9740905761719, "learning_rate": 1.9903331003700374e-06, "loss": 26.2344, "step": 7709 }, { "epoch": 0.07298302742306491, "grad_norm": 448.77252197265625, "learning_rate": 1.9903288472962815e-06, "loss": 20.5, "step": 7710 }, { "epoch": 0.07299249344477997, "grad_norm": 317.4896545410156, "learning_rate": 1.990324593291681e-06, "loss": 26.125, "step": 7711 }, { "epoch": 0.07300195946649501, "grad_norm": 495.4125671386719, "learning_rate": 1.9903203383562397e-06, "loss": 58.125, "step": 7712 }, { "epoch": 0.07301142548821007, "grad_norm": 399.5958251953125, "learning_rate": 1.990316082489962e-06, "loss": 46.2812, "step": 7713 }, { "epoch": 0.07302089150992512, "grad_norm": 173.22763061523438, "learning_rate": 1.990311825692851e-06, "loss": 21.7422, "step": 7714 }, { "epoch": 0.07303035753164018, "grad_norm": 204.42916870117188, "learning_rate": 1.990307567964912e-06, "loss": 20.6875, "step": 7715 }, { "epoch": 0.07303982355335523, "grad_norm": 3.385699987411499, "learning_rate": 1.990303309306148e-06, "loss": 1.0723, "step": 7716 }, { "epoch": 0.07304928957507029, "grad_norm": 376.9508972167969, "learning_rate": 1.9902990497165637e-06, "loss": 34.6719, "step": 7717 }, { "epoch": 0.07305875559678533, "grad_norm": 3.503671169281006, "learning_rate": 1.990294789196162e-06, "loss": 0.9058, "step": 7718 }, { "epoch": 0.07306822161850039, "grad_norm": 602.90966796875, "learning_rate": 1.990290527744948e-06, "loss": 20.7734, "step": 7719 }, { "epoch": 0.07307768764021545, "grad_norm": 159.93524169921875, "learning_rate": 1.9902862653629255e-06, "loss": 16.0, "step": 7720 }, { "epoch": 0.0730871536619305, "grad_norm": 495.773193359375, "learning_rate": 1.990282002050098e-06, "loss": 20.2969, "step": 7721 }, { "epoch": 0.07309661968364556, "grad_norm": 191.4704132080078, "learning_rate": 1.99027773780647e-06, "loss": 22.4297, "step": 7722 }, { "epoch": 0.0731060857053606, "grad_norm": 385.458251953125, "learning_rate": 1.9902734726320455e-06, "loss": 44.7188, "step": 7723 }, { "epoch": 0.07311555172707566, "grad_norm": 495.4996643066406, "learning_rate": 1.990269206526828e-06, "loss": 21.8984, "step": 7724 }, { "epoch": 0.07312501774879071, "grad_norm": 999.8109130859375, "learning_rate": 1.9902649394908224e-06, "loss": 51.8047, "step": 7725 }, { "epoch": 0.07313448377050577, "grad_norm": 363.8932800292969, "learning_rate": 1.9902606715240317e-06, "loss": 30.5, "step": 7726 }, { "epoch": 0.07314394979222082, "grad_norm": 547.3767700195312, "learning_rate": 1.9902564026264608e-06, "loss": 22.5781, "step": 7727 }, { "epoch": 0.07315341581393588, "grad_norm": 486.9081115722656, "learning_rate": 1.990252132798113e-06, "loss": 25.5469, "step": 7728 }, { "epoch": 0.07316288183565094, "grad_norm": 169.06141662597656, "learning_rate": 1.9902478620389925e-06, "loss": 20.2422, "step": 7729 }, { "epoch": 0.07317234785736598, "grad_norm": 476.5834655761719, "learning_rate": 1.9902435903491037e-06, "loss": 26.3672, "step": 7730 }, { "epoch": 0.07318181387908104, "grad_norm": 326.8877868652344, "learning_rate": 1.99023931772845e-06, "loss": 33.2188, "step": 7731 }, { "epoch": 0.07319127990079609, "grad_norm": 408.9595642089844, "learning_rate": 1.9902350441770363e-06, "loss": 25.0938, "step": 7732 }, { "epoch": 0.07320074592251115, "grad_norm": 502.09423828125, "learning_rate": 1.9902307696948653e-06, "loss": 29.1172, "step": 7733 }, { "epoch": 0.0732102119442262, "grad_norm": 333.50738525390625, "learning_rate": 1.9902264942819427e-06, "loss": 18.7344, "step": 7734 }, { "epoch": 0.07321967796594125, "grad_norm": 286.2925109863281, "learning_rate": 1.990222217938271e-06, "loss": 22.6094, "step": 7735 }, { "epoch": 0.07322914398765631, "grad_norm": 3.576902389526367, "learning_rate": 1.990217940663855e-06, "loss": 0.978, "step": 7736 }, { "epoch": 0.07323861000937136, "grad_norm": 215.3629608154297, "learning_rate": 1.9902136624586987e-06, "loss": 20.8438, "step": 7737 }, { "epoch": 0.07324807603108642, "grad_norm": 609.0733032226562, "learning_rate": 1.990209383322806e-06, "loss": 23.543, "step": 7738 }, { "epoch": 0.07325754205280147, "grad_norm": 317.9944763183594, "learning_rate": 1.9902051032561805e-06, "loss": 25.6406, "step": 7739 }, { "epoch": 0.07326700807451653, "grad_norm": 252.14210510253906, "learning_rate": 1.990200822258827e-06, "loss": 17.8047, "step": 7740 }, { "epoch": 0.07327647409623157, "grad_norm": 1964.086181640625, "learning_rate": 1.990196540330749e-06, "loss": 83.9375, "step": 7741 }, { "epoch": 0.07328594011794663, "grad_norm": 308.60162353515625, "learning_rate": 1.990192257471951e-06, "loss": 20.875, "step": 7742 }, { "epoch": 0.07329540613966168, "grad_norm": 264.1639099121094, "learning_rate": 1.990187973682436e-06, "loss": 18.6484, "step": 7743 }, { "epoch": 0.07330487216137674, "grad_norm": 565.743896484375, "learning_rate": 1.990183688962209e-06, "loss": 22.2812, "step": 7744 }, { "epoch": 0.0733143381830918, "grad_norm": 327.44146728515625, "learning_rate": 1.990179403311274e-06, "loss": 45.7031, "step": 7745 }, { "epoch": 0.07332380420480684, "grad_norm": 907.5953979492188, "learning_rate": 1.9901751167296346e-06, "loss": 57.8828, "step": 7746 }, { "epoch": 0.0733332702265219, "grad_norm": 837.8818969726562, "learning_rate": 1.990170829217295e-06, "loss": 50.4688, "step": 7747 }, { "epoch": 0.07334273624823695, "grad_norm": 517.406982421875, "learning_rate": 1.990166540774259e-06, "loss": 32.2578, "step": 7748 }, { "epoch": 0.07335220226995201, "grad_norm": 212.1629638671875, "learning_rate": 1.9901622514005314e-06, "loss": 18.1562, "step": 7749 }, { "epoch": 0.07336166829166706, "grad_norm": 426.07757568359375, "learning_rate": 1.9901579610961153e-06, "loss": 21.0586, "step": 7750 }, { "epoch": 0.07337113431338212, "grad_norm": 316.0277404785156, "learning_rate": 1.9901536698610153e-06, "loss": 24.1797, "step": 7751 }, { "epoch": 0.07338060033509716, "grad_norm": 605.6529541015625, "learning_rate": 1.990149377695235e-06, "loss": 50.6875, "step": 7752 }, { "epoch": 0.07339006635681222, "grad_norm": 410.1715393066406, "learning_rate": 1.990145084598779e-06, "loss": 39.4922, "step": 7753 }, { "epoch": 0.07339953237852728, "grad_norm": 587.861328125, "learning_rate": 1.990140790571651e-06, "loss": 62.6094, "step": 7754 }, { "epoch": 0.07340899840024233, "grad_norm": 320.0467224121094, "learning_rate": 1.990136495613855e-06, "loss": 26.4531, "step": 7755 }, { "epoch": 0.07341846442195739, "grad_norm": 242.2895965576172, "learning_rate": 1.9901321997253954e-06, "loss": 26.0938, "step": 7756 }, { "epoch": 0.07342793044367243, "grad_norm": 518.316650390625, "learning_rate": 1.9901279029062757e-06, "loss": 30.0078, "step": 7757 }, { "epoch": 0.0734373964653875, "grad_norm": 240.3124237060547, "learning_rate": 1.9901236051565e-06, "loss": 32.2422, "step": 7758 }, { "epoch": 0.07344686248710254, "grad_norm": 385.7977294921875, "learning_rate": 1.9901193064760725e-06, "loss": 29.7969, "step": 7759 }, { "epoch": 0.0734563285088176, "grad_norm": 242.18356323242188, "learning_rate": 1.9901150068649975e-06, "loss": 25.3906, "step": 7760 }, { "epoch": 0.07346579453053265, "grad_norm": 506.98480224609375, "learning_rate": 1.990110706323279e-06, "loss": 48.8594, "step": 7761 }, { "epoch": 0.0734752605522477, "grad_norm": 1234.6990966796875, "learning_rate": 1.9901064048509206e-06, "loss": 53.8203, "step": 7762 }, { "epoch": 0.07348472657396277, "grad_norm": 283.6845703125, "learning_rate": 1.9901021024479263e-06, "loss": 28.7773, "step": 7763 }, { "epoch": 0.07349419259567781, "grad_norm": 587.0357055664062, "learning_rate": 1.9900977991143007e-06, "loss": 48.5625, "step": 7764 }, { "epoch": 0.07350365861739287, "grad_norm": 361.07623291015625, "learning_rate": 1.9900934948500477e-06, "loss": 19.1719, "step": 7765 }, { "epoch": 0.07351312463910792, "grad_norm": 389.46929931640625, "learning_rate": 1.9900891896551714e-06, "loss": 39.9375, "step": 7766 }, { "epoch": 0.07352259066082298, "grad_norm": 433.7288513183594, "learning_rate": 1.9900848835296752e-06, "loss": 41.7969, "step": 7767 }, { "epoch": 0.07353205668253802, "grad_norm": 195.10140991210938, "learning_rate": 1.990080576473564e-06, "loss": 20.5781, "step": 7768 }, { "epoch": 0.07354152270425308, "grad_norm": 374.8475646972656, "learning_rate": 1.9900762684868408e-06, "loss": 36.9688, "step": 7769 }, { "epoch": 0.07355098872596813, "grad_norm": 610.6051635742188, "learning_rate": 1.990071959569511e-06, "loss": 43.4219, "step": 7770 }, { "epoch": 0.07356045474768319, "grad_norm": 1073.405517578125, "learning_rate": 1.9900676497215777e-06, "loss": 52.7188, "step": 7771 }, { "epoch": 0.07356992076939825, "grad_norm": 406.59539794921875, "learning_rate": 1.9900633389430454e-06, "loss": 27.1836, "step": 7772 }, { "epoch": 0.0735793867911133, "grad_norm": 345.21392822265625, "learning_rate": 1.990059027233918e-06, "loss": 27.5781, "step": 7773 }, { "epoch": 0.07358885281282836, "grad_norm": 446.5315856933594, "learning_rate": 1.990054714594199e-06, "loss": 57.2656, "step": 7774 }, { "epoch": 0.0735983188345434, "grad_norm": 673.4780883789062, "learning_rate": 1.9900504010238933e-06, "loss": 52.4219, "step": 7775 }, { "epoch": 0.07360778485625846, "grad_norm": 507.7639465332031, "learning_rate": 1.990046086523005e-06, "loss": 24.5, "step": 7776 }, { "epoch": 0.07361725087797351, "grad_norm": 440.0870056152344, "learning_rate": 1.990041771091537e-06, "loss": 36.0, "step": 7777 }, { "epoch": 0.07362671689968857, "grad_norm": 484.5918884277344, "learning_rate": 1.990037454729495e-06, "loss": 25.4219, "step": 7778 }, { "epoch": 0.07363618292140363, "grad_norm": 460.08935546875, "learning_rate": 1.9900331374368816e-06, "loss": 63.4219, "step": 7779 }, { "epoch": 0.07364564894311867, "grad_norm": 141.5282440185547, "learning_rate": 1.990028819213702e-06, "loss": 20.0234, "step": 7780 }, { "epoch": 0.07365511496483373, "grad_norm": 671.6639404296875, "learning_rate": 1.9900245000599593e-06, "loss": 40.1953, "step": 7781 }, { "epoch": 0.07366458098654878, "grad_norm": 328.7576599121094, "learning_rate": 1.990020179975658e-06, "loss": 25.3438, "step": 7782 }, { "epoch": 0.07367404700826384, "grad_norm": 950.1121215820312, "learning_rate": 1.990015858960802e-06, "loss": 62.0156, "step": 7783 }, { "epoch": 0.07368351302997889, "grad_norm": 292.1514892578125, "learning_rate": 1.9900115370153955e-06, "loss": 14.6797, "step": 7784 }, { "epoch": 0.07369297905169395, "grad_norm": 263.5529479980469, "learning_rate": 1.990007214139443e-06, "loss": 19.6953, "step": 7785 }, { "epoch": 0.07370244507340899, "grad_norm": 714.4374389648438, "learning_rate": 1.9900028903329476e-06, "loss": 19.9297, "step": 7786 }, { "epoch": 0.07371191109512405, "grad_norm": 336.85546875, "learning_rate": 1.9899985655959146e-06, "loss": 26.0625, "step": 7787 }, { "epoch": 0.07372137711683911, "grad_norm": 3.020850419998169, "learning_rate": 1.989994239928347e-06, "loss": 0.9463, "step": 7788 }, { "epoch": 0.07373084313855416, "grad_norm": 201.15914916992188, "learning_rate": 1.9899899133302486e-06, "loss": 21.9688, "step": 7789 }, { "epoch": 0.07374030916026922, "grad_norm": 818.4331665039062, "learning_rate": 1.989985585801625e-06, "loss": 30.6562, "step": 7790 }, { "epoch": 0.07374977518198426, "grad_norm": 529.1079711914062, "learning_rate": 1.989981257342479e-06, "loss": 24.5, "step": 7791 }, { "epoch": 0.07375924120369932, "grad_norm": 517.1475830078125, "learning_rate": 1.989976927952815e-06, "loss": 45.1094, "step": 7792 }, { "epoch": 0.07376870722541437, "grad_norm": 2.9409310817718506, "learning_rate": 1.989972597632637e-06, "loss": 0.7515, "step": 7793 }, { "epoch": 0.07377817324712943, "grad_norm": 553.5172729492188, "learning_rate": 1.989968266381949e-06, "loss": 47.5625, "step": 7794 }, { "epoch": 0.07378763926884448, "grad_norm": 844.8604125976562, "learning_rate": 1.9899639342007555e-06, "loss": 62.9531, "step": 7795 }, { "epoch": 0.07379710529055954, "grad_norm": 421.3685607910156, "learning_rate": 1.9899596010890603e-06, "loss": 33.0156, "step": 7796 }, { "epoch": 0.0738065713122746, "grad_norm": 334.90863037109375, "learning_rate": 1.9899552670468672e-06, "loss": 15.5703, "step": 7797 }, { "epoch": 0.07381603733398964, "grad_norm": 182.38058471679688, "learning_rate": 1.989950932074181e-06, "loss": 21.1016, "step": 7798 }, { "epoch": 0.0738255033557047, "grad_norm": 569.1196899414062, "learning_rate": 1.989946596171005e-06, "loss": 16.7109, "step": 7799 }, { "epoch": 0.07383496937741975, "grad_norm": 250.05787658691406, "learning_rate": 1.989942259337344e-06, "loss": 19.2344, "step": 7800 }, { "epoch": 0.07384443539913481, "grad_norm": 443.2502746582031, "learning_rate": 1.989937921573201e-06, "loss": 17.5078, "step": 7801 }, { "epoch": 0.07385390142084985, "grad_norm": 776.9044189453125, "learning_rate": 1.9899335828785814e-06, "loss": 56.7266, "step": 7802 }, { "epoch": 0.07386336744256491, "grad_norm": 207.9147186279297, "learning_rate": 1.9899292432534883e-06, "loss": 15.3594, "step": 7803 }, { "epoch": 0.07387283346427996, "grad_norm": 286.68994140625, "learning_rate": 1.9899249026979265e-06, "loss": 25.2578, "step": 7804 }, { "epoch": 0.07388229948599502, "grad_norm": 3.0146186351776123, "learning_rate": 1.9899205612118994e-06, "loss": 0.8975, "step": 7805 }, { "epoch": 0.07389176550771008, "grad_norm": 581.9160766601562, "learning_rate": 1.989916218795411e-06, "loss": 46.6875, "step": 7806 }, { "epoch": 0.07390123152942513, "grad_norm": 3.771362543106079, "learning_rate": 1.9899118754484664e-06, "loss": 1.1382, "step": 7807 }, { "epoch": 0.07391069755114019, "grad_norm": 296.3580017089844, "learning_rate": 1.9899075311710686e-06, "loss": 26.75, "step": 7808 }, { "epoch": 0.07392016357285523, "grad_norm": 3.740834951400757, "learning_rate": 1.989903185963222e-06, "loss": 0.8984, "step": 7809 }, { "epoch": 0.07392962959457029, "grad_norm": 303.3360900878906, "learning_rate": 1.9898988398249312e-06, "loss": 42.1094, "step": 7810 }, { "epoch": 0.07393909561628534, "grad_norm": 547.008544921875, "learning_rate": 1.9898944927562e-06, "loss": 51.9688, "step": 7811 }, { "epoch": 0.0739485616380004, "grad_norm": 3.061126708984375, "learning_rate": 1.989890144757032e-06, "loss": 0.8687, "step": 7812 }, { "epoch": 0.07395802765971544, "grad_norm": 237.3316650390625, "learning_rate": 1.9898857958274314e-06, "loss": 23.2344, "step": 7813 }, { "epoch": 0.0739674936814305, "grad_norm": 250.72964477539062, "learning_rate": 1.9898814459674032e-06, "loss": 21.5391, "step": 7814 }, { "epoch": 0.07397695970314556, "grad_norm": 310.0326843261719, "learning_rate": 1.9898770951769505e-06, "loss": 29.1094, "step": 7815 }, { "epoch": 0.07398642572486061, "grad_norm": 186.43556213378906, "learning_rate": 1.989872743456078e-06, "loss": 17.4297, "step": 7816 }, { "epoch": 0.07399589174657567, "grad_norm": 517.5050659179688, "learning_rate": 1.989868390804789e-06, "loss": 60.25, "step": 7817 }, { "epoch": 0.07400535776829072, "grad_norm": 3.672152280807495, "learning_rate": 1.9898640372230882e-06, "loss": 0.9785, "step": 7818 }, { "epoch": 0.07401482379000578, "grad_norm": 396.2974548339844, "learning_rate": 1.9898596827109803e-06, "loss": 19.9609, "step": 7819 }, { "epoch": 0.07402428981172082, "grad_norm": 589.0237426757812, "learning_rate": 1.989855327268468e-06, "loss": 31.25, "step": 7820 }, { "epoch": 0.07403375583343588, "grad_norm": 3.281907320022583, "learning_rate": 1.9898509708955565e-06, "loss": 0.7905, "step": 7821 }, { "epoch": 0.07404322185515094, "grad_norm": 3.352717638015747, "learning_rate": 1.9898466135922492e-06, "loss": 0.8979, "step": 7822 }, { "epoch": 0.07405268787686599, "grad_norm": 260.89349365234375, "learning_rate": 1.9898422553585504e-06, "loss": 31.1875, "step": 7823 }, { "epoch": 0.07406215389858105, "grad_norm": 623.048828125, "learning_rate": 1.9898378961944646e-06, "loss": 60.7812, "step": 7824 }, { "epoch": 0.0740716199202961, "grad_norm": 429.704345703125, "learning_rate": 1.9898335360999953e-06, "loss": 23.6953, "step": 7825 }, { "epoch": 0.07408108594201115, "grad_norm": 429.3663330078125, "learning_rate": 1.989829175075147e-06, "loss": 29.3828, "step": 7826 }, { "epoch": 0.0740905519637262, "grad_norm": 603.03759765625, "learning_rate": 1.9898248131199235e-06, "loss": 43.8281, "step": 7827 }, { "epoch": 0.07410001798544126, "grad_norm": 3.305358648300171, "learning_rate": 1.9898204502343294e-06, "loss": 0.8311, "step": 7828 }, { "epoch": 0.0741094840071563, "grad_norm": 521.109619140625, "learning_rate": 1.9898160864183683e-06, "loss": 22.4922, "step": 7829 }, { "epoch": 0.07411895002887137, "grad_norm": 564.866943359375, "learning_rate": 1.9898117216720445e-06, "loss": 24.707, "step": 7830 }, { "epoch": 0.07412841605058643, "grad_norm": 265.8131103515625, "learning_rate": 1.989807355995362e-06, "loss": 20.6953, "step": 7831 }, { "epoch": 0.07413788207230147, "grad_norm": 176.88258361816406, "learning_rate": 1.989802989388325e-06, "loss": 15.9297, "step": 7832 }, { "epoch": 0.07414734809401653, "grad_norm": 2.2965095043182373, "learning_rate": 1.9897986218509373e-06, "loss": 0.8501, "step": 7833 }, { "epoch": 0.07415681411573158, "grad_norm": 211.15650939941406, "learning_rate": 1.9897942533832034e-06, "loss": 21.4062, "step": 7834 }, { "epoch": 0.07416628013744664, "grad_norm": 358.9505310058594, "learning_rate": 1.9897898839851275e-06, "loss": 48.4766, "step": 7835 }, { "epoch": 0.07417574615916168, "grad_norm": 261.1632385253906, "learning_rate": 1.989785513656714e-06, "loss": 25.0469, "step": 7836 }, { "epoch": 0.07418521218087674, "grad_norm": 427.7331237792969, "learning_rate": 1.9897811423979657e-06, "loss": 29.5703, "step": 7837 }, { "epoch": 0.07419467820259179, "grad_norm": 405.4223937988281, "learning_rate": 1.989776770208888e-06, "loss": 30.6016, "step": 7838 }, { "epoch": 0.07420414422430685, "grad_norm": 317.26702880859375, "learning_rate": 1.989772397089484e-06, "loss": 13.7031, "step": 7839 }, { "epoch": 0.07421361024602191, "grad_norm": 518.75, "learning_rate": 1.9897680230397586e-06, "loss": 64.2812, "step": 7840 }, { "epoch": 0.07422307626773696, "grad_norm": 238.99493408203125, "learning_rate": 1.9897636480597157e-06, "loss": 22.3047, "step": 7841 }, { "epoch": 0.07423254228945202, "grad_norm": 214.47442626953125, "learning_rate": 1.989759272149359e-06, "loss": 28.2031, "step": 7842 }, { "epoch": 0.07424200831116706, "grad_norm": 1156.9456787109375, "learning_rate": 1.9897548953086935e-06, "loss": 34.6953, "step": 7843 }, { "epoch": 0.07425147433288212, "grad_norm": 3.51088285446167, "learning_rate": 1.9897505175377227e-06, "loss": 0.9058, "step": 7844 }, { "epoch": 0.07426094035459717, "grad_norm": 301.859375, "learning_rate": 1.9897461388364506e-06, "loss": 26.3906, "step": 7845 }, { "epoch": 0.07427040637631223, "grad_norm": 258.5216369628906, "learning_rate": 1.9897417592048818e-06, "loss": 27.6406, "step": 7846 }, { "epoch": 0.07427987239802727, "grad_norm": 225.61228942871094, "learning_rate": 1.98973737864302e-06, "loss": 20.1406, "step": 7847 }, { "epoch": 0.07428933841974233, "grad_norm": 458.94354248046875, "learning_rate": 1.989732997150869e-06, "loss": 39.5703, "step": 7848 }, { "epoch": 0.0742988044414574, "grad_norm": 269.4731140136719, "learning_rate": 1.989728614728434e-06, "loss": 20.0469, "step": 7849 }, { "epoch": 0.07430827046317244, "grad_norm": 316.86767578125, "learning_rate": 1.9897242313757185e-06, "loss": 38.0, "step": 7850 }, { "epoch": 0.0743177364848875, "grad_norm": 208.6240997314453, "learning_rate": 1.9897198470927263e-06, "loss": 20.1719, "step": 7851 }, { "epoch": 0.07432720250660255, "grad_norm": 573.3817138671875, "learning_rate": 1.9897154618794614e-06, "loss": 56.7891, "step": 7852 }, { "epoch": 0.0743366685283176, "grad_norm": 331.05816650390625, "learning_rate": 1.989711075735929e-06, "loss": 17.4609, "step": 7853 }, { "epoch": 0.07434613455003265, "grad_norm": 853.7544555664062, "learning_rate": 1.9897066886621327e-06, "loss": 34.1016, "step": 7854 }, { "epoch": 0.07435560057174771, "grad_norm": 532.6224365234375, "learning_rate": 1.989702300658076e-06, "loss": 46.1758, "step": 7855 }, { "epoch": 0.07436506659346276, "grad_norm": 278.55242919921875, "learning_rate": 1.9896979117237637e-06, "loss": 25.2578, "step": 7856 }, { "epoch": 0.07437453261517782, "grad_norm": 621.77392578125, "learning_rate": 1.9896935218592e-06, "loss": 35.1172, "step": 7857 }, { "epoch": 0.07438399863689288, "grad_norm": 813.1824340820312, "learning_rate": 1.9896891310643883e-06, "loss": 51.2891, "step": 7858 }, { "epoch": 0.07439346465860792, "grad_norm": 896.2973022460938, "learning_rate": 1.9896847393393337e-06, "loss": 46.7578, "step": 7859 }, { "epoch": 0.07440293068032298, "grad_norm": 273.3180847167969, "learning_rate": 1.9896803466840395e-06, "loss": 28.5625, "step": 7860 }, { "epoch": 0.07441239670203803, "grad_norm": 321.5802917480469, "learning_rate": 1.9896759530985105e-06, "loss": 25.125, "step": 7861 }, { "epoch": 0.07442186272375309, "grad_norm": 380.7770080566406, "learning_rate": 1.9896715585827503e-06, "loss": 21.6016, "step": 7862 }, { "epoch": 0.07443132874546814, "grad_norm": 1042.14892578125, "learning_rate": 1.989667163136763e-06, "loss": 69.875, "step": 7863 }, { "epoch": 0.0744407947671832, "grad_norm": 3.2175018787384033, "learning_rate": 1.9896627667605526e-06, "loss": 0.7119, "step": 7864 }, { "epoch": 0.07445026078889826, "grad_norm": 252.19174194335938, "learning_rate": 1.9896583694541243e-06, "loss": 24.0781, "step": 7865 }, { "epoch": 0.0744597268106133, "grad_norm": 213.28167724609375, "learning_rate": 1.9896539712174814e-06, "loss": 18.9766, "step": 7866 }, { "epoch": 0.07446919283232836, "grad_norm": 238.86856079101562, "learning_rate": 1.9896495720506282e-06, "loss": 10.8086, "step": 7867 }, { "epoch": 0.07447865885404341, "grad_norm": 582.0234375, "learning_rate": 1.9896451719535686e-06, "loss": 37.4219, "step": 7868 }, { "epoch": 0.07448812487575847, "grad_norm": 269.0195617675781, "learning_rate": 1.9896407709263066e-06, "loss": 35.6562, "step": 7869 }, { "epoch": 0.07449759089747351, "grad_norm": 209.46844482421875, "learning_rate": 1.9896363689688475e-06, "loss": 21.6953, "step": 7870 }, { "epoch": 0.07450705691918857, "grad_norm": 313.2518615722656, "learning_rate": 1.9896319660811937e-06, "loss": 21.4922, "step": 7871 }, { "epoch": 0.07451652294090362, "grad_norm": 1030.201171875, "learning_rate": 1.989627562263351e-06, "loss": 38.3047, "step": 7872 }, { "epoch": 0.07452598896261868, "grad_norm": 225.66799926757812, "learning_rate": 1.989623157515322e-06, "loss": 23.875, "step": 7873 }, { "epoch": 0.07453545498433374, "grad_norm": 166.4659881591797, "learning_rate": 1.989618751837112e-06, "loss": 9.6055, "step": 7874 }, { "epoch": 0.07454492100604879, "grad_norm": 188.91075134277344, "learning_rate": 1.989614345228725e-06, "loss": 23.2969, "step": 7875 }, { "epoch": 0.07455438702776385, "grad_norm": 367.7387390136719, "learning_rate": 1.9896099376901645e-06, "loss": 20.3672, "step": 7876 }, { "epoch": 0.07456385304947889, "grad_norm": 361.4109802246094, "learning_rate": 1.989605529221435e-06, "loss": 35.8438, "step": 7877 }, { "epoch": 0.07457331907119395, "grad_norm": 528.1442260742188, "learning_rate": 1.989601119822541e-06, "loss": 22.6016, "step": 7878 }, { "epoch": 0.074582785092909, "grad_norm": 329.18511962890625, "learning_rate": 1.9895967094934863e-06, "loss": 39.5156, "step": 7879 }, { "epoch": 0.07459225111462406, "grad_norm": 548.5663452148438, "learning_rate": 1.989592298234275e-06, "loss": 32.3516, "step": 7880 }, { "epoch": 0.0746017171363391, "grad_norm": 267.7093505859375, "learning_rate": 1.9895878860449112e-06, "loss": 12.4648, "step": 7881 }, { "epoch": 0.07461118315805416, "grad_norm": 501.2581481933594, "learning_rate": 1.9895834729253993e-06, "loss": 41.3281, "step": 7882 }, { "epoch": 0.07462064917976922, "grad_norm": 294.9993896484375, "learning_rate": 1.989579058875743e-06, "loss": 41.7344, "step": 7883 }, { "epoch": 0.07463011520148427, "grad_norm": 273.8593444824219, "learning_rate": 1.9895746438959474e-06, "loss": 11.4375, "step": 7884 }, { "epoch": 0.07463958122319933, "grad_norm": 320.9474182128906, "learning_rate": 1.9895702279860155e-06, "loss": 26.0391, "step": 7885 }, { "epoch": 0.07464904724491438, "grad_norm": 280.3133544921875, "learning_rate": 1.989565811145952e-06, "loss": 17.0469, "step": 7886 }, { "epoch": 0.07465851326662944, "grad_norm": 170.58656311035156, "learning_rate": 1.989561393375761e-06, "loss": 19.8594, "step": 7887 }, { "epoch": 0.07466797928834448, "grad_norm": 2.653918981552124, "learning_rate": 1.989556974675447e-06, "loss": 0.873, "step": 7888 }, { "epoch": 0.07467744531005954, "grad_norm": 411.6569519042969, "learning_rate": 1.989552555045014e-06, "loss": 43.7656, "step": 7889 }, { "epoch": 0.07468691133177459, "grad_norm": 302.5626220703125, "learning_rate": 1.9895481344844652e-06, "loss": 42.3906, "step": 7890 }, { "epoch": 0.07469637735348965, "grad_norm": 196.2335968017578, "learning_rate": 1.989543712993806e-06, "loss": 21.375, "step": 7891 }, { "epoch": 0.07470584337520471, "grad_norm": 630.7767333984375, "learning_rate": 1.98953929057304e-06, "loss": 61.9375, "step": 7892 }, { "epoch": 0.07471530939691975, "grad_norm": 3.2717983722686768, "learning_rate": 1.9895348672221714e-06, "loss": 0.9365, "step": 7893 }, { "epoch": 0.07472477541863481, "grad_norm": 518.2737426757812, "learning_rate": 1.9895304429412042e-06, "loss": 39.6641, "step": 7894 }, { "epoch": 0.07473424144034986, "grad_norm": 762.7935180664062, "learning_rate": 1.989526017730143e-06, "loss": 65.4609, "step": 7895 }, { "epoch": 0.07474370746206492, "grad_norm": 296.8765563964844, "learning_rate": 1.989521591588992e-06, "loss": 19.1953, "step": 7896 }, { "epoch": 0.07475317348377997, "grad_norm": 317.1935119628906, "learning_rate": 1.9895171645177546e-06, "loss": 21.6797, "step": 7897 }, { "epoch": 0.07476263950549503, "grad_norm": 397.08502197265625, "learning_rate": 1.9895127365164354e-06, "loss": 33.4688, "step": 7898 }, { "epoch": 0.07477210552721007, "grad_norm": 600.0338745117188, "learning_rate": 1.989508307585039e-06, "loss": 54.1797, "step": 7899 }, { "epoch": 0.07478157154892513, "grad_norm": 569.1010131835938, "learning_rate": 1.9895038777235687e-06, "loss": 21.7188, "step": 7900 }, { "epoch": 0.07479103757064019, "grad_norm": 374.9268798828125, "learning_rate": 1.9894994469320294e-06, "loss": 47.125, "step": 7901 }, { "epoch": 0.07480050359235524, "grad_norm": 346.7905578613281, "learning_rate": 1.989495015210425e-06, "loss": 27.9297, "step": 7902 }, { "epoch": 0.0748099696140703, "grad_norm": 726.725341796875, "learning_rate": 1.98949058255876e-06, "loss": 27.8125, "step": 7903 }, { "epoch": 0.07481943563578534, "grad_norm": 1731.6297607421875, "learning_rate": 1.9894861489770376e-06, "loss": 35.2852, "step": 7904 }, { "epoch": 0.0748289016575004, "grad_norm": 902.7050170898438, "learning_rate": 1.989481714465263e-06, "loss": 41.6719, "step": 7905 }, { "epoch": 0.07483836767921545, "grad_norm": 423.06060791015625, "learning_rate": 1.98947727902344e-06, "loss": 25.375, "step": 7906 }, { "epoch": 0.07484783370093051, "grad_norm": 389.37371826171875, "learning_rate": 1.989472842651572e-06, "loss": 23.6875, "step": 7907 }, { "epoch": 0.07485729972264557, "grad_norm": 516.84130859375, "learning_rate": 1.9894684053496643e-06, "loss": 51.5859, "step": 7908 }, { "epoch": 0.07486676574436062, "grad_norm": 1127.72119140625, "learning_rate": 1.989463967117721e-06, "loss": 44.5703, "step": 7909 }, { "epoch": 0.07487623176607568, "grad_norm": 283.32952880859375, "learning_rate": 1.989459527955746e-06, "loss": 22.4766, "step": 7910 }, { "epoch": 0.07488569778779072, "grad_norm": 267.3606872558594, "learning_rate": 1.9894550878637428e-06, "loss": 23.1562, "step": 7911 }, { "epoch": 0.07489516380950578, "grad_norm": 479.6025085449219, "learning_rate": 1.9894506468417166e-06, "loss": 28.3711, "step": 7912 }, { "epoch": 0.07490462983122083, "grad_norm": 229.2274627685547, "learning_rate": 1.989446204889671e-06, "loss": 23.6328, "step": 7913 }, { "epoch": 0.07491409585293589, "grad_norm": 313.0173034667969, "learning_rate": 1.9894417620076106e-06, "loss": 22.2578, "step": 7914 }, { "epoch": 0.07492356187465093, "grad_norm": 205.5279083251953, "learning_rate": 1.9894373181955393e-06, "loss": 23.8984, "step": 7915 }, { "epoch": 0.074933027896366, "grad_norm": 223.147705078125, "learning_rate": 1.989432873453461e-06, "loss": 22.5625, "step": 7916 }, { "epoch": 0.07494249391808105, "grad_norm": 199.80368041992188, "learning_rate": 1.98942842778138e-06, "loss": 24.3828, "step": 7917 }, { "epoch": 0.0749519599397961, "grad_norm": 1625.9127197265625, "learning_rate": 1.9894239811793012e-06, "loss": 48.5391, "step": 7918 }, { "epoch": 0.07496142596151116, "grad_norm": 1237.7879638671875, "learning_rate": 1.989419533647228e-06, "loss": 52.0625, "step": 7919 }, { "epoch": 0.0749708919832262, "grad_norm": 331.85662841796875, "learning_rate": 1.9894150851851647e-06, "loss": 28.3828, "step": 7920 }, { "epoch": 0.07498035800494127, "grad_norm": 202.0017852783203, "learning_rate": 1.9894106357931156e-06, "loss": 20.2656, "step": 7921 }, { "epoch": 0.07498982402665631, "grad_norm": 462.9127502441406, "learning_rate": 1.9894061854710853e-06, "loss": 41.0078, "step": 7922 }, { "epoch": 0.07499929004837137, "grad_norm": 306.69476318359375, "learning_rate": 1.989401734219077e-06, "loss": 20.8594, "step": 7923 }, { "epoch": 0.07500875607008642, "grad_norm": 475.1324157714844, "learning_rate": 1.989397282037096e-06, "loss": 29.6172, "step": 7924 }, { "epoch": 0.07501822209180148, "grad_norm": 448.8993835449219, "learning_rate": 1.9893928289251453e-06, "loss": 53.3281, "step": 7925 }, { "epoch": 0.07502768811351654, "grad_norm": 2.9885900020599365, "learning_rate": 1.9893883748832296e-06, "loss": 0.939, "step": 7926 }, { "epoch": 0.07503715413523158, "grad_norm": 1192.4188232421875, "learning_rate": 1.989383919911354e-06, "loss": 57.9688, "step": 7927 }, { "epoch": 0.07504662015694664, "grad_norm": 256.52923583984375, "learning_rate": 1.989379464009521e-06, "loss": 24.1406, "step": 7928 }, { "epoch": 0.07505608617866169, "grad_norm": 729.3926391601562, "learning_rate": 1.989375007177736e-06, "loss": 50.1562, "step": 7929 }, { "epoch": 0.07506555220037675, "grad_norm": 279.54327392578125, "learning_rate": 1.989370549416003e-06, "loss": 26.2422, "step": 7930 }, { "epoch": 0.0750750182220918, "grad_norm": 418.2335510253906, "learning_rate": 1.9893660907243262e-06, "loss": 24.1758, "step": 7931 }, { "epoch": 0.07508448424380686, "grad_norm": 372.8615417480469, "learning_rate": 1.9893616311027093e-06, "loss": 25.8906, "step": 7932 }, { "epoch": 0.0750939502655219, "grad_norm": 334.2088623046875, "learning_rate": 1.989357170551157e-06, "loss": 23.3906, "step": 7933 }, { "epoch": 0.07510341628723696, "grad_norm": 734.1502075195312, "learning_rate": 1.9893527090696734e-06, "loss": 62.4375, "step": 7934 }, { "epoch": 0.07511288230895202, "grad_norm": 174.4520721435547, "learning_rate": 1.9893482466582624e-06, "loss": 18.0703, "step": 7935 }, { "epoch": 0.07512234833066707, "grad_norm": 181.28561401367188, "learning_rate": 1.9893437833169282e-06, "loss": 18.9922, "step": 7936 }, { "epoch": 0.07513181435238213, "grad_norm": 221.6005401611328, "learning_rate": 1.9893393190456756e-06, "loss": 22.7812, "step": 7937 }, { "epoch": 0.07514128037409717, "grad_norm": 649.4873657226562, "learning_rate": 1.9893348538445082e-06, "loss": 54.5312, "step": 7938 }, { "epoch": 0.07515074639581223, "grad_norm": 3.128175973892212, "learning_rate": 1.989330387713431e-06, "loss": 0.7725, "step": 7939 }, { "epoch": 0.07516021241752728, "grad_norm": 620.1820678710938, "learning_rate": 1.9893259206524468e-06, "loss": 35.5312, "step": 7940 }, { "epoch": 0.07516967843924234, "grad_norm": 250.74822998046875, "learning_rate": 1.9893214526615607e-06, "loss": 19.8359, "step": 7941 }, { "epoch": 0.07517914446095739, "grad_norm": 413.31646728515625, "learning_rate": 1.989316983740777e-06, "loss": 43.7344, "step": 7942 }, { "epoch": 0.07518861048267245, "grad_norm": 264.57452392578125, "learning_rate": 1.9893125138900996e-06, "loss": 10.2891, "step": 7943 }, { "epoch": 0.0751980765043875, "grad_norm": 386.077392578125, "learning_rate": 1.989308043109533e-06, "loss": 40.4219, "step": 7944 }, { "epoch": 0.07520754252610255, "grad_norm": 320.1135559082031, "learning_rate": 1.989303571399081e-06, "loss": 27.7031, "step": 7945 }, { "epoch": 0.07521700854781761, "grad_norm": 298.0207214355469, "learning_rate": 1.989299098758748e-06, "loss": 25.5156, "step": 7946 }, { "epoch": 0.07522647456953266, "grad_norm": 456.23162841796875, "learning_rate": 1.989294625188538e-06, "loss": 25.8047, "step": 7947 }, { "epoch": 0.07523594059124772, "grad_norm": 243.17108154296875, "learning_rate": 1.989290150688456e-06, "loss": 21.4531, "step": 7948 }, { "epoch": 0.07524540661296276, "grad_norm": 577.9053955078125, "learning_rate": 1.9892856752585055e-06, "loss": 67.2031, "step": 7949 }, { "epoch": 0.07525487263467782, "grad_norm": 516.8131103515625, "learning_rate": 1.9892811988986905e-06, "loss": 43.75, "step": 7950 }, { "epoch": 0.07526433865639288, "grad_norm": 282.9229736328125, "learning_rate": 1.9892767216090157e-06, "loss": 29.4609, "step": 7951 }, { "epoch": 0.07527380467810793, "grad_norm": 208.1815643310547, "learning_rate": 1.989272243389485e-06, "loss": 20.6016, "step": 7952 }, { "epoch": 0.07528327069982299, "grad_norm": 228.89849853515625, "learning_rate": 1.989267764240103e-06, "loss": 23.5, "step": 7953 }, { "epoch": 0.07529273672153804, "grad_norm": 256.4129333496094, "learning_rate": 1.9892632841608735e-06, "loss": 25.625, "step": 7954 }, { "epoch": 0.0753022027432531, "grad_norm": 276.1039733886719, "learning_rate": 1.989258803151801e-06, "loss": 25.7422, "step": 7955 }, { "epoch": 0.07531166876496814, "grad_norm": 238.52146911621094, "learning_rate": 1.989254321212889e-06, "loss": 21.8984, "step": 7956 }, { "epoch": 0.0753211347866832, "grad_norm": 286.8876037597656, "learning_rate": 1.989249838344143e-06, "loss": 37.5156, "step": 7957 }, { "epoch": 0.07533060080839825, "grad_norm": 654.6364135742188, "learning_rate": 1.9892453545455662e-06, "loss": 15.4023, "step": 7958 }, { "epoch": 0.07534006683011331, "grad_norm": 719.457275390625, "learning_rate": 1.989240869817163e-06, "loss": 39.125, "step": 7959 }, { "epoch": 0.07534953285182837, "grad_norm": 390.3125305175781, "learning_rate": 1.989236384158938e-06, "loss": 29.5078, "step": 7960 }, { "epoch": 0.07535899887354341, "grad_norm": 369.92578125, "learning_rate": 1.989231897570895e-06, "loss": 27.4766, "step": 7961 }, { "epoch": 0.07536846489525847, "grad_norm": 1748.8466796875, "learning_rate": 1.9892274100530387e-06, "loss": 32.2031, "step": 7962 }, { "epoch": 0.07537793091697352, "grad_norm": 269.979248046875, "learning_rate": 1.9892229216053727e-06, "loss": 25.7422, "step": 7963 }, { "epoch": 0.07538739693868858, "grad_norm": 1052.05126953125, "learning_rate": 1.9892184322279017e-06, "loss": 79.6953, "step": 7964 }, { "epoch": 0.07539686296040363, "grad_norm": 2.8921620845794678, "learning_rate": 1.989213941920629e-06, "loss": 0.939, "step": 7965 }, { "epoch": 0.07540632898211869, "grad_norm": 525.0552368164062, "learning_rate": 1.9892094506835604e-06, "loss": 54.0156, "step": 7966 }, { "epoch": 0.07541579500383373, "grad_norm": 228.26792907714844, "learning_rate": 1.989204958516699e-06, "loss": 20.2109, "step": 7967 }, { "epoch": 0.07542526102554879, "grad_norm": 937.9195556640625, "learning_rate": 1.989200465420049e-06, "loss": 45.25, "step": 7968 }, { "epoch": 0.07543472704726385, "grad_norm": 494.3483581542969, "learning_rate": 1.989195971393615e-06, "loss": 24.1484, "step": 7969 }, { "epoch": 0.0754441930689789, "grad_norm": 409.9561767578125, "learning_rate": 1.9891914764374014e-06, "loss": 36.6406, "step": 7970 }, { "epoch": 0.07545365909069396, "grad_norm": 1180.5028076171875, "learning_rate": 1.9891869805514122e-06, "loss": 30.2266, "step": 7971 }, { "epoch": 0.075463125112409, "grad_norm": 605.7735595703125, "learning_rate": 1.9891824837356515e-06, "loss": 44.0312, "step": 7972 }, { "epoch": 0.07547259113412406, "grad_norm": 438.3897705078125, "learning_rate": 1.9891779859901235e-06, "loss": 26.0781, "step": 7973 }, { "epoch": 0.07548205715583911, "grad_norm": 334.910400390625, "learning_rate": 1.989173487314833e-06, "loss": 22.0469, "step": 7974 }, { "epoch": 0.07549152317755417, "grad_norm": 328.6864013671875, "learning_rate": 1.989168987709783e-06, "loss": 20.8984, "step": 7975 }, { "epoch": 0.07550098919926922, "grad_norm": 380.2566833496094, "learning_rate": 1.989164487174979e-06, "loss": 9.4277, "step": 7976 }, { "epoch": 0.07551045522098428, "grad_norm": 285.6993713378906, "learning_rate": 1.9891599857104247e-06, "loss": 24.3359, "step": 7977 }, { "epoch": 0.07551992124269934, "grad_norm": 373.5867614746094, "learning_rate": 1.989155483316124e-06, "loss": 29.8398, "step": 7978 }, { "epoch": 0.07552938726441438, "grad_norm": 354.5078125, "learning_rate": 1.989150979992082e-06, "loss": 28.9375, "step": 7979 }, { "epoch": 0.07553885328612944, "grad_norm": 192.98170471191406, "learning_rate": 1.989146475738302e-06, "loss": 23.4141, "step": 7980 }, { "epoch": 0.07554831930784449, "grad_norm": 898.0112915039062, "learning_rate": 1.989141970554789e-06, "loss": 81.3438, "step": 7981 }, { "epoch": 0.07555778532955955, "grad_norm": 966.91748046875, "learning_rate": 1.989137464441547e-06, "loss": 27.8672, "step": 7982 }, { "epoch": 0.0755672513512746, "grad_norm": 354.9923400878906, "learning_rate": 1.98913295739858e-06, "loss": 30.5391, "step": 7983 }, { "epoch": 0.07557671737298965, "grad_norm": 519.3421020507812, "learning_rate": 1.989128449425892e-06, "loss": 27.6953, "step": 7984 }, { "epoch": 0.0755861833947047, "grad_norm": 221.0620880126953, "learning_rate": 1.9891239405234877e-06, "loss": 21.0859, "step": 7985 }, { "epoch": 0.07559564941641976, "grad_norm": 242.35231018066406, "learning_rate": 1.9891194306913713e-06, "loss": 23.9531, "step": 7986 }, { "epoch": 0.07560511543813482, "grad_norm": 479.2976379394531, "learning_rate": 1.9891149199295472e-06, "loss": 31.7812, "step": 7987 }, { "epoch": 0.07561458145984987, "grad_norm": 209.09242248535156, "learning_rate": 1.9891104082380194e-06, "loss": 20.5938, "step": 7988 }, { "epoch": 0.07562404748156493, "grad_norm": 376.4866943359375, "learning_rate": 1.989105895616792e-06, "loss": 43.0234, "step": 7989 }, { "epoch": 0.07563351350327997, "grad_norm": 406.54638671875, "learning_rate": 1.98910138206587e-06, "loss": 52.0, "step": 7990 }, { "epoch": 0.07564297952499503, "grad_norm": 407.5090637207031, "learning_rate": 1.989096867585256e-06, "loss": 29.4922, "step": 7991 }, { "epoch": 0.07565244554671008, "grad_norm": 224.98126220703125, "learning_rate": 1.9890923521749563e-06, "loss": 10.6914, "step": 7992 }, { "epoch": 0.07566191156842514, "grad_norm": 308.4093933105469, "learning_rate": 1.989087835834973e-06, "loss": 19.4609, "step": 7993 }, { "epoch": 0.0756713775901402, "grad_norm": 171.3807373046875, "learning_rate": 1.9890833185653123e-06, "loss": 22.5586, "step": 7994 }, { "epoch": 0.07568084361185524, "grad_norm": 255.1243438720703, "learning_rate": 1.9890788003659774e-06, "loss": 26.7109, "step": 7995 }, { "epoch": 0.0756903096335703, "grad_norm": 507.8326110839844, "learning_rate": 1.989074281236973e-06, "loss": 35.4609, "step": 7996 }, { "epoch": 0.07569977565528535, "grad_norm": 302.8341369628906, "learning_rate": 1.989069761178303e-06, "loss": 32.3516, "step": 7997 }, { "epoch": 0.07570924167700041, "grad_norm": 338.7254943847656, "learning_rate": 1.9890652401899715e-06, "loss": 9.1328, "step": 7998 }, { "epoch": 0.07571870769871546, "grad_norm": 663.545166015625, "learning_rate": 1.9890607182719833e-06, "loss": 33.1914, "step": 7999 }, { "epoch": 0.07572817372043052, "grad_norm": 592.913818359375, "learning_rate": 1.9890561954243425e-06, "loss": 51.4844, "step": 8000 }, { "epoch": 0.07573763974214556, "grad_norm": 441.1180419921875, "learning_rate": 1.989051671647053e-06, "loss": 49.8906, "step": 8001 }, { "epoch": 0.07574710576386062, "grad_norm": 379.790283203125, "learning_rate": 1.9890471469401194e-06, "loss": 22.9648, "step": 8002 }, { "epoch": 0.07575657178557568, "grad_norm": 728.5665893554688, "learning_rate": 1.9890426213035455e-06, "loss": 86.1875, "step": 8003 }, { "epoch": 0.07576603780729073, "grad_norm": 3.426126718521118, "learning_rate": 1.989038094737336e-06, "loss": 0.9629, "step": 8004 }, { "epoch": 0.07577550382900579, "grad_norm": 253.57945251464844, "learning_rate": 1.9890335672414953e-06, "loss": 24.5312, "step": 8005 }, { "epoch": 0.07578496985072083, "grad_norm": 282.7489318847656, "learning_rate": 1.9890290388160273e-06, "loss": 27.1641, "step": 8006 }, { "epoch": 0.0757944358724359, "grad_norm": 196.47738647460938, "learning_rate": 1.989024509460936e-06, "loss": 23.0703, "step": 8007 }, { "epoch": 0.07580390189415094, "grad_norm": 2.696985960006714, "learning_rate": 1.989019979176226e-06, "loss": 0.8706, "step": 8008 }, { "epoch": 0.075813367915866, "grad_norm": 398.02581787109375, "learning_rate": 1.9890154479619022e-06, "loss": 33.2266, "step": 8009 }, { "epoch": 0.07582283393758105, "grad_norm": 537.0755615234375, "learning_rate": 1.9890109158179677e-06, "loss": 56.0625, "step": 8010 }, { "epoch": 0.0758322999592961, "grad_norm": 282.6112365722656, "learning_rate": 1.9890063827444276e-06, "loss": 26.125, "step": 8011 }, { "epoch": 0.07584176598101117, "grad_norm": 294.68377685546875, "learning_rate": 1.989001848741286e-06, "loss": 27.8047, "step": 8012 }, { "epoch": 0.07585123200272621, "grad_norm": 370.8663635253906, "learning_rate": 1.988997313808546e-06, "loss": 27.9688, "step": 8013 }, { "epoch": 0.07586069802444127, "grad_norm": 355.0487976074219, "learning_rate": 1.9889927779462136e-06, "loss": 29.9219, "step": 8014 }, { "epoch": 0.07587016404615632, "grad_norm": 394.17413330078125, "learning_rate": 1.9889882411542925e-06, "loss": 25.1953, "step": 8015 }, { "epoch": 0.07587963006787138, "grad_norm": 470.8172607421875, "learning_rate": 1.988983703432786e-06, "loss": 51.9531, "step": 8016 }, { "epoch": 0.07588909608958642, "grad_norm": 581.8789672851562, "learning_rate": 1.9889791647817e-06, "loss": 71.0938, "step": 8017 }, { "epoch": 0.07589856211130148, "grad_norm": 574.8593139648438, "learning_rate": 1.988974625201038e-06, "loss": 48.6562, "step": 8018 }, { "epoch": 0.07590802813301653, "grad_norm": 792.6998901367188, "learning_rate": 1.9889700846908035e-06, "loss": 13.5391, "step": 8019 }, { "epoch": 0.07591749415473159, "grad_norm": 232.10244750976562, "learning_rate": 1.9889655432510017e-06, "loss": 18.3281, "step": 8020 }, { "epoch": 0.07592696017644665, "grad_norm": 208.07510375976562, "learning_rate": 1.9889610008816367e-06, "loss": 15.5898, "step": 8021 }, { "epoch": 0.0759364261981617, "grad_norm": 188.01132202148438, "learning_rate": 1.9889564575827128e-06, "loss": 16.3008, "step": 8022 }, { "epoch": 0.07594589221987676, "grad_norm": 148.05401611328125, "learning_rate": 1.988951913354234e-06, "loss": 17.9062, "step": 8023 }, { "epoch": 0.0759553582415918, "grad_norm": 764.9488525390625, "learning_rate": 1.9889473681962046e-06, "loss": 61.6562, "step": 8024 }, { "epoch": 0.07596482426330686, "grad_norm": 3.507826805114746, "learning_rate": 1.9889428221086293e-06, "loss": 0.8862, "step": 8025 }, { "epoch": 0.07597429028502191, "grad_norm": 194.1053924560547, "learning_rate": 1.988938275091512e-06, "loss": 22.9961, "step": 8026 }, { "epoch": 0.07598375630673697, "grad_norm": 415.47894287109375, "learning_rate": 1.988933727144857e-06, "loss": 27.8281, "step": 8027 }, { "epoch": 0.07599322232845201, "grad_norm": 622.7335205078125, "learning_rate": 1.988929178268669e-06, "loss": 60.8008, "step": 8028 }, { "epoch": 0.07600268835016707, "grad_norm": 823.0458374023438, "learning_rate": 1.9889246284629515e-06, "loss": 48.7422, "step": 8029 }, { "epoch": 0.07601215437188213, "grad_norm": 3.231740951538086, "learning_rate": 1.988920077727709e-06, "loss": 1.0044, "step": 8030 }, { "epoch": 0.07602162039359718, "grad_norm": 752.1704711914062, "learning_rate": 1.9889155260629463e-06, "loss": 46.4062, "step": 8031 }, { "epoch": 0.07603108641531224, "grad_norm": 247.99343872070312, "learning_rate": 1.988910973468667e-06, "loss": 25.7344, "step": 8032 }, { "epoch": 0.07604055243702729, "grad_norm": 632.8496704101562, "learning_rate": 1.988906419944876e-06, "loss": 36.0312, "step": 8033 }, { "epoch": 0.07605001845874235, "grad_norm": 283.29510498046875, "learning_rate": 1.9889018654915773e-06, "loss": 27.4922, "step": 8034 }, { "epoch": 0.07605948448045739, "grad_norm": 152.42681884765625, "learning_rate": 1.988897310108775e-06, "loss": 22.2344, "step": 8035 }, { "epoch": 0.07606895050217245, "grad_norm": 523.8509521484375, "learning_rate": 1.988892753796474e-06, "loss": 31.5781, "step": 8036 }, { "epoch": 0.07607841652388751, "grad_norm": 296.7117004394531, "learning_rate": 1.988888196554678e-06, "loss": 23.4297, "step": 8037 }, { "epoch": 0.07608788254560256, "grad_norm": 3.181264877319336, "learning_rate": 1.988883638383391e-06, "loss": 0.9409, "step": 8038 }, { "epoch": 0.07609734856731762, "grad_norm": 233.75035095214844, "learning_rate": 1.988879079282618e-06, "loss": 23.1797, "step": 8039 }, { "epoch": 0.07610681458903266, "grad_norm": 350.29534912109375, "learning_rate": 1.988874519252363e-06, "loss": 25.8281, "step": 8040 }, { "epoch": 0.07611628061074772, "grad_norm": 712.0346069335938, "learning_rate": 1.98886995829263e-06, "loss": 39.4219, "step": 8041 }, { "epoch": 0.07612574663246277, "grad_norm": 244.23980712890625, "learning_rate": 1.988865396403424e-06, "loss": 32.9062, "step": 8042 }, { "epoch": 0.07613521265417783, "grad_norm": 487.6708679199219, "learning_rate": 1.9888608335847486e-06, "loss": 27.4961, "step": 8043 }, { "epoch": 0.07614467867589288, "grad_norm": 380.0262756347656, "learning_rate": 1.9888562698366087e-06, "loss": 29.3594, "step": 8044 }, { "epoch": 0.07615414469760794, "grad_norm": 255.29080200195312, "learning_rate": 1.9888517051590077e-06, "loss": 20.2734, "step": 8045 }, { "epoch": 0.076163610719323, "grad_norm": 285.7001037597656, "learning_rate": 1.9888471395519507e-06, "loss": 12.5352, "step": 8046 }, { "epoch": 0.07617307674103804, "grad_norm": 243.9690399169922, "learning_rate": 1.988842573015442e-06, "loss": 19.2109, "step": 8047 }, { "epoch": 0.0761825427627531, "grad_norm": 686.4725952148438, "learning_rate": 1.988838005549485e-06, "loss": 28.4609, "step": 8048 }, { "epoch": 0.07619200878446815, "grad_norm": 355.2594909667969, "learning_rate": 1.988833437154085e-06, "loss": 32.6562, "step": 8049 }, { "epoch": 0.07620147480618321, "grad_norm": 390.173583984375, "learning_rate": 1.988828867829246e-06, "loss": 19.2188, "step": 8050 }, { "epoch": 0.07621094082789825, "grad_norm": 166.8349151611328, "learning_rate": 1.9888242975749717e-06, "loss": 21.3516, "step": 8051 }, { "epoch": 0.07622040684961331, "grad_norm": 368.67181396484375, "learning_rate": 1.9888197263912675e-06, "loss": 37.8281, "step": 8052 }, { "epoch": 0.07622987287132836, "grad_norm": 445.1274719238281, "learning_rate": 1.9888151542781364e-06, "loss": 14.6445, "step": 8053 }, { "epoch": 0.07623933889304342, "grad_norm": 238.21665954589844, "learning_rate": 1.988810581235584e-06, "loss": 22.8438, "step": 8054 }, { "epoch": 0.07624880491475848, "grad_norm": 644.42626953125, "learning_rate": 1.988806007263614e-06, "loss": 18.1367, "step": 8055 }, { "epoch": 0.07625827093647353, "grad_norm": 455.9514465332031, "learning_rate": 1.9888014323622303e-06, "loss": 46.5156, "step": 8056 }, { "epoch": 0.07626773695818859, "grad_norm": 309.8287048339844, "learning_rate": 1.9887968565314377e-06, "loss": 21.6719, "step": 8057 }, { "epoch": 0.07627720297990363, "grad_norm": 278.5891418457031, "learning_rate": 1.9887922797712404e-06, "loss": 15.7109, "step": 8058 }, { "epoch": 0.07628666900161869, "grad_norm": 687.9639892578125, "learning_rate": 1.988787702081643e-06, "loss": 60.8828, "step": 8059 }, { "epoch": 0.07629613502333374, "grad_norm": 298.4606628417969, "learning_rate": 1.9887831234626488e-06, "loss": 29.3047, "step": 8060 }, { "epoch": 0.0763056010450488, "grad_norm": 192.7290802001953, "learning_rate": 1.9887785439142633e-06, "loss": 23.6562, "step": 8061 }, { "epoch": 0.07631506706676384, "grad_norm": 526.62255859375, "learning_rate": 1.98877396343649e-06, "loss": 38.8438, "step": 8062 }, { "epoch": 0.0763245330884789, "grad_norm": 331.08990478515625, "learning_rate": 1.9887693820293336e-06, "loss": 17.25, "step": 8063 }, { "epoch": 0.07633399911019396, "grad_norm": 706.9811401367188, "learning_rate": 1.9887647996927982e-06, "loss": 25.2656, "step": 8064 }, { "epoch": 0.07634346513190901, "grad_norm": 164.03897094726562, "learning_rate": 1.9887602164268885e-06, "loss": 24.8594, "step": 8065 }, { "epoch": 0.07635293115362407, "grad_norm": 351.5257263183594, "learning_rate": 1.9887556322316084e-06, "loss": 55.5625, "step": 8066 }, { "epoch": 0.07636239717533912, "grad_norm": 4.074217796325684, "learning_rate": 1.9887510471069624e-06, "loss": 0.8545, "step": 8067 }, { "epoch": 0.07637186319705418, "grad_norm": 363.5827941894531, "learning_rate": 1.9887464610529544e-06, "loss": 21.6406, "step": 8068 }, { "epoch": 0.07638132921876922, "grad_norm": 592.734130859375, "learning_rate": 1.9887418740695894e-06, "loss": 34.9844, "step": 8069 }, { "epoch": 0.07639079524048428, "grad_norm": 163.94606018066406, "learning_rate": 1.9887372861568714e-06, "loss": 22.375, "step": 8070 }, { "epoch": 0.07640026126219933, "grad_norm": 3.527019500732422, "learning_rate": 1.9887326973148045e-06, "loss": 0.9639, "step": 8071 }, { "epoch": 0.07640972728391439, "grad_norm": 759.0111083984375, "learning_rate": 1.988728107543393e-06, "loss": 29.0312, "step": 8072 }, { "epoch": 0.07641919330562945, "grad_norm": 537.6572875976562, "learning_rate": 1.988723516842642e-06, "loss": 35.0781, "step": 8073 }, { "epoch": 0.0764286593273445, "grad_norm": 191.9508819580078, "learning_rate": 1.9887189252125545e-06, "loss": 22.0547, "step": 8074 }, { "epoch": 0.07643812534905955, "grad_norm": 3.1401946544647217, "learning_rate": 1.988714332653136e-06, "loss": 0.9133, "step": 8075 }, { "epoch": 0.0764475913707746, "grad_norm": 3.565115213394165, "learning_rate": 1.98870973916439e-06, "loss": 0.9814, "step": 8076 }, { "epoch": 0.07645705739248966, "grad_norm": 172.58181762695312, "learning_rate": 1.9887051447463213e-06, "loss": 18.75, "step": 8077 }, { "epoch": 0.0764665234142047, "grad_norm": 494.6904602050781, "learning_rate": 1.988700549398934e-06, "loss": 25.9688, "step": 8078 }, { "epoch": 0.07647598943591977, "grad_norm": 922.2372436523438, "learning_rate": 1.988695953122233e-06, "loss": 40.7656, "step": 8079 }, { "epoch": 0.07648545545763483, "grad_norm": 304.0807800292969, "learning_rate": 1.9886913559162218e-06, "loss": 30.5, "step": 8080 }, { "epoch": 0.07649492147934987, "grad_norm": 150.90155029296875, "learning_rate": 1.988686757780905e-06, "loss": 20.875, "step": 8081 }, { "epoch": 0.07650438750106493, "grad_norm": 614.43701171875, "learning_rate": 1.988682158716287e-06, "loss": 25.5078, "step": 8082 }, { "epoch": 0.07651385352277998, "grad_norm": 385.2694396972656, "learning_rate": 1.988677558722372e-06, "loss": 48.6562, "step": 8083 }, { "epoch": 0.07652331954449504, "grad_norm": 594.9563598632812, "learning_rate": 1.9886729577991647e-06, "loss": 27.5938, "step": 8084 }, { "epoch": 0.07653278556621008, "grad_norm": 524.158203125, "learning_rate": 1.988668355946669e-06, "loss": 18.5547, "step": 8085 }, { "epoch": 0.07654225158792514, "grad_norm": 337.279052734375, "learning_rate": 1.9886637531648888e-06, "loss": 22.1406, "step": 8086 }, { "epoch": 0.07655171760964019, "grad_norm": 183.38528442382812, "learning_rate": 1.9886591494538297e-06, "loss": 20.4375, "step": 8087 }, { "epoch": 0.07656118363135525, "grad_norm": 257.4671936035156, "learning_rate": 1.988654544813495e-06, "loss": 21.7188, "step": 8088 }, { "epoch": 0.07657064965307031, "grad_norm": 262.4556884765625, "learning_rate": 1.9886499392438895e-06, "loss": 15.1094, "step": 8089 }, { "epoch": 0.07658011567478536, "grad_norm": 196.37632751464844, "learning_rate": 1.988645332745017e-06, "loss": 24.2812, "step": 8090 }, { "epoch": 0.07658958169650042, "grad_norm": 478.4145812988281, "learning_rate": 1.988640725316883e-06, "loss": 34.8672, "step": 8091 }, { "epoch": 0.07659904771821546, "grad_norm": 216.36412048339844, "learning_rate": 1.98863611695949e-06, "loss": 22.7031, "step": 8092 }, { "epoch": 0.07660851373993052, "grad_norm": 520.9180908203125, "learning_rate": 1.9886315076728444e-06, "loss": 46.75, "step": 8093 }, { "epoch": 0.07661797976164557, "grad_norm": 387.60687255859375, "learning_rate": 1.988626897456949e-06, "loss": 52.8125, "step": 8094 }, { "epoch": 0.07662744578336063, "grad_norm": 727.5215454101562, "learning_rate": 1.9886222863118086e-06, "loss": 19.0312, "step": 8095 }, { "epoch": 0.07663691180507567, "grad_norm": 149.83523559570312, "learning_rate": 1.9886176742374274e-06, "loss": 24.0, "step": 8096 }, { "epoch": 0.07664637782679073, "grad_norm": 866.6432495117188, "learning_rate": 1.98861306123381e-06, "loss": 58.4062, "step": 8097 }, { "epoch": 0.0766558438485058, "grad_norm": 340.18206787109375, "learning_rate": 1.9886084473009607e-06, "loss": 47.9219, "step": 8098 }, { "epoch": 0.07666530987022084, "grad_norm": 279.0538330078125, "learning_rate": 1.988603832438884e-06, "loss": 17.7109, "step": 8099 }, { "epoch": 0.0766747758919359, "grad_norm": 243.5879364013672, "learning_rate": 1.988599216647584e-06, "loss": 26.1562, "step": 8100 }, { "epoch": 0.07668424191365095, "grad_norm": 200.87120056152344, "learning_rate": 1.9885945999270648e-06, "loss": 27.5156, "step": 8101 }, { "epoch": 0.076693707935366, "grad_norm": 314.3636779785156, "learning_rate": 1.988589982277331e-06, "loss": 26.5781, "step": 8102 }, { "epoch": 0.07670317395708105, "grad_norm": 861.4793701171875, "learning_rate": 1.988585363698387e-06, "loss": 40.9141, "step": 8103 }, { "epoch": 0.07671263997879611, "grad_norm": 173.4169464111328, "learning_rate": 1.988580744190237e-06, "loss": 21.2109, "step": 8104 }, { "epoch": 0.07672210600051116, "grad_norm": 593.4721069335938, "learning_rate": 1.9885761237528854e-06, "loss": 36.2031, "step": 8105 }, { "epoch": 0.07673157202222622, "grad_norm": 416.0846252441406, "learning_rate": 1.988571502386337e-06, "loss": 51.7812, "step": 8106 }, { "epoch": 0.07674103804394128, "grad_norm": 367.0791015625, "learning_rate": 1.988566880090595e-06, "loss": 19.7891, "step": 8107 }, { "epoch": 0.07675050406565632, "grad_norm": 554.1012573242188, "learning_rate": 1.988562256865665e-06, "loss": 65.8906, "step": 8108 }, { "epoch": 0.07675997008737138, "grad_norm": 813.490966796875, "learning_rate": 1.9885576327115504e-06, "loss": 23.1094, "step": 8109 }, { "epoch": 0.07676943610908643, "grad_norm": 387.534423828125, "learning_rate": 1.988553007628256e-06, "loss": 23.4844, "step": 8110 }, { "epoch": 0.07677890213080149, "grad_norm": 349.6477355957031, "learning_rate": 1.9885483816157863e-06, "loss": 21.9648, "step": 8111 }, { "epoch": 0.07678836815251654, "grad_norm": 485.4428405761719, "learning_rate": 1.9885437546741453e-06, "loss": 27.4688, "step": 8112 }, { "epoch": 0.0767978341742316, "grad_norm": 3.2769696712493896, "learning_rate": 1.9885391268033374e-06, "loss": 0.9946, "step": 8113 }, { "epoch": 0.07680730019594664, "grad_norm": 3.4930591583251953, "learning_rate": 1.988534498003367e-06, "loss": 0.877, "step": 8114 }, { "epoch": 0.0768167662176617, "grad_norm": 426.26171875, "learning_rate": 1.988529868274239e-06, "loss": 44.0938, "step": 8115 }, { "epoch": 0.07682623223937676, "grad_norm": 334.76898193359375, "learning_rate": 1.9885252376159565e-06, "loss": 18.0, "step": 8116 }, { "epoch": 0.07683569826109181, "grad_norm": 808.2674560546875, "learning_rate": 1.9885206060285248e-06, "loss": 32.9375, "step": 8117 }, { "epoch": 0.07684516428280687, "grad_norm": 439.7249755859375, "learning_rate": 1.9885159735119483e-06, "loss": 29.1406, "step": 8118 }, { "epoch": 0.07685463030452191, "grad_norm": 248.310791015625, "learning_rate": 1.988511340066231e-06, "loss": 22.2109, "step": 8119 }, { "epoch": 0.07686409632623697, "grad_norm": 712.6346435546875, "learning_rate": 1.9885067056913773e-06, "loss": 21.3594, "step": 8120 }, { "epoch": 0.07687356234795202, "grad_norm": 362.2654724121094, "learning_rate": 1.9885020703873916e-06, "loss": 30.2812, "step": 8121 }, { "epoch": 0.07688302836966708, "grad_norm": 178.7076416015625, "learning_rate": 1.988497434154278e-06, "loss": 25.4844, "step": 8122 }, { "epoch": 0.07689249439138214, "grad_norm": 1033.59375, "learning_rate": 1.9884927969920414e-06, "loss": 39.1562, "step": 8123 }, { "epoch": 0.07690196041309719, "grad_norm": 263.85577392578125, "learning_rate": 1.988488158900686e-06, "loss": 17.8359, "step": 8124 }, { "epoch": 0.07691142643481225, "grad_norm": 272.1051330566406, "learning_rate": 1.9884835198802156e-06, "loss": 27.4688, "step": 8125 }, { "epoch": 0.07692089245652729, "grad_norm": 259.28668212890625, "learning_rate": 1.9884788799306358e-06, "loss": 17.5938, "step": 8126 }, { "epoch": 0.07693035847824235, "grad_norm": 211.7530059814453, "learning_rate": 1.9884742390519493e-06, "loss": 18.9453, "step": 8127 }, { "epoch": 0.0769398244999574, "grad_norm": 460.2309265136719, "learning_rate": 1.9884695972441617e-06, "loss": 34.0078, "step": 8128 }, { "epoch": 0.07694929052167246, "grad_norm": 528.303955078125, "learning_rate": 1.988464954507277e-06, "loss": 47.8438, "step": 8129 }, { "epoch": 0.0769587565433875, "grad_norm": 508.80364990234375, "learning_rate": 1.9884603108412993e-06, "loss": 42.0, "step": 8130 }, { "epoch": 0.07696822256510256, "grad_norm": 616.1954956054688, "learning_rate": 1.9884556662462333e-06, "loss": 20.3672, "step": 8131 }, { "epoch": 0.07697768858681762, "grad_norm": 486.8760681152344, "learning_rate": 1.9884510207220836e-06, "loss": 44.4844, "step": 8132 }, { "epoch": 0.07698715460853267, "grad_norm": 326.483154296875, "learning_rate": 1.988446374268854e-06, "loss": 29.6172, "step": 8133 }, { "epoch": 0.07699662063024773, "grad_norm": 3.2750985622406006, "learning_rate": 1.988441726886549e-06, "loss": 0.8804, "step": 8134 }, { "epoch": 0.07700608665196278, "grad_norm": 631.4591064453125, "learning_rate": 1.988437078575173e-06, "loss": 48.2422, "step": 8135 }, { "epoch": 0.07701555267367784, "grad_norm": 203.1705780029297, "learning_rate": 1.988432429334731e-06, "loss": 28.5938, "step": 8136 }, { "epoch": 0.07702501869539288, "grad_norm": 316.113525390625, "learning_rate": 1.9884277791652264e-06, "loss": 30.0312, "step": 8137 }, { "epoch": 0.07703448471710794, "grad_norm": 204.3355255126953, "learning_rate": 1.988423128066664e-06, "loss": 24.4141, "step": 8138 }, { "epoch": 0.07704395073882299, "grad_norm": 305.6619873046875, "learning_rate": 1.988418476039048e-06, "loss": 21.9219, "step": 8139 }, { "epoch": 0.07705341676053805, "grad_norm": 352.6118469238281, "learning_rate": 1.9884138230823834e-06, "loss": 27.1641, "step": 8140 }, { "epoch": 0.07706288278225311, "grad_norm": 418.453125, "learning_rate": 1.988409169196674e-06, "loss": 59.6094, "step": 8141 }, { "epoch": 0.07707234880396815, "grad_norm": 796.5441284179688, "learning_rate": 1.988404514381924e-06, "loss": 40.2812, "step": 8142 }, { "epoch": 0.07708181482568321, "grad_norm": 349.6156005859375, "learning_rate": 1.988399858638138e-06, "loss": 23.3984, "step": 8143 }, { "epoch": 0.07709128084739826, "grad_norm": 305.52545166015625, "learning_rate": 1.988395201965321e-06, "loss": 22.9062, "step": 8144 }, { "epoch": 0.07710074686911332, "grad_norm": 187.94371032714844, "learning_rate": 1.988390544363476e-06, "loss": 14.7891, "step": 8145 }, { "epoch": 0.07711021289082837, "grad_norm": 472.0030212402344, "learning_rate": 1.988385885832609e-06, "loss": 52.1094, "step": 8146 }, { "epoch": 0.07711967891254343, "grad_norm": 353.2944641113281, "learning_rate": 1.988381226372723e-06, "loss": 18.0938, "step": 8147 }, { "epoch": 0.07712914493425847, "grad_norm": 384.3602600097656, "learning_rate": 1.9883765659838236e-06, "loss": 36.0938, "step": 8148 }, { "epoch": 0.07713861095597353, "grad_norm": 987.880126953125, "learning_rate": 1.9883719046659136e-06, "loss": 45.7812, "step": 8149 }, { "epoch": 0.07714807697768859, "grad_norm": 239.6569061279297, "learning_rate": 1.988367242418999e-06, "loss": 28.75, "step": 8150 }, { "epoch": 0.07715754299940364, "grad_norm": 764.9644165039062, "learning_rate": 1.988362579243083e-06, "loss": 39.9453, "step": 8151 }, { "epoch": 0.0771670090211187, "grad_norm": 669.8430786132812, "learning_rate": 1.988357915138171e-06, "loss": 35.4453, "step": 8152 }, { "epoch": 0.07717647504283374, "grad_norm": 277.8525695800781, "learning_rate": 1.9883532501042667e-06, "loss": 13.0625, "step": 8153 }, { "epoch": 0.0771859410645488, "grad_norm": 3.6928205490112305, "learning_rate": 1.9883485841413746e-06, "loss": 1.0688, "step": 8154 }, { "epoch": 0.07719540708626385, "grad_norm": 322.11395263671875, "learning_rate": 1.988343917249499e-06, "loss": 30.1797, "step": 8155 }, { "epoch": 0.07720487310797891, "grad_norm": 716.9779052734375, "learning_rate": 1.988339249428644e-06, "loss": 35.9141, "step": 8156 }, { "epoch": 0.07721433912969396, "grad_norm": 260.8818664550781, "learning_rate": 1.9883345806788153e-06, "loss": 29.2188, "step": 8157 }, { "epoch": 0.07722380515140902, "grad_norm": 730.1019897460938, "learning_rate": 1.988329911000016e-06, "loss": 24.2344, "step": 8158 }, { "epoch": 0.07723327117312408, "grad_norm": 361.70440673828125, "learning_rate": 1.9883252403922505e-06, "loss": 21.4844, "step": 8159 }, { "epoch": 0.07724273719483912, "grad_norm": 264.6295471191406, "learning_rate": 1.988320568855524e-06, "loss": 21.5625, "step": 8160 }, { "epoch": 0.07725220321655418, "grad_norm": 290.17889404296875, "learning_rate": 1.9883158963898407e-06, "loss": 21.5781, "step": 8161 }, { "epoch": 0.07726166923826923, "grad_norm": 166.3005828857422, "learning_rate": 1.9883112229952043e-06, "loss": 18.8516, "step": 8162 }, { "epoch": 0.07727113525998429, "grad_norm": 378.1297912597656, "learning_rate": 1.9883065486716197e-06, "loss": 45.3281, "step": 8163 }, { "epoch": 0.07728060128169933, "grad_norm": 2.7304036617279053, "learning_rate": 1.988301873419091e-06, "loss": 0.8999, "step": 8164 }, { "epoch": 0.0772900673034144, "grad_norm": 1109.22900390625, "learning_rate": 1.9882971972376236e-06, "loss": 35.0547, "step": 8165 }, { "epoch": 0.07729953332512945, "grad_norm": 347.8614807128906, "learning_rate": 1.9882925201272206e-06, "loss": 34.9219, "step": 8166 }, { "epoch": 0.0773089993468445, "grad_norm": 190.9130859375, "learning_rate": 1.988287842087887e-06, "loss": 22.4297, "step": 8167 }, { "epoch": 0.07731846536855956, "grad_norm": 694.0296020507812, "learning_rate": 1.988283163119627e-06, "loss": 55.4609, "step": 8168 }, { "epoch": 0.0773279313902746, "grad_norm": 190.91978454589844, "learning_rate": 1.9882784832224453e-06, "loss": 19.0234, "step": 8169 }, { "epoch": 0.07733739741198967, "grad_norm": 427.32794189453125, "learning_rate": 1.9882738023963463e-06, "loss": 22.4297, "step": 8170 }, { "epoch": 0.07734686343370471, "grad_norm": 500.5888977050781, "learning_rate": 1.9882691206413334e-06, "loss": 24.7109, "step": 8171 }, { "epoch": 0.07735632945541977, "grad_norm": 1659.0382080078125, "learning_rate": 1.9882644379574125e-06, "loss": 59.875, "step": 8172 }, { "epoch": 0.07736579547713482, "grad_norm": 464.40777587890625, "learning_rate": 1.9882597543445874e-06, "loss": 28.7969, "step": 8173 }, { "epoch": 0.07737526149884988, "grad_norm": 887.5704956054688, "learning_rate": 1.9882550698028623e-06, "loss": 35.9688, "step": 8174 }, { "epoch": 0.07738472752056494, "grad_norm": 321.1134338378906, "learning_rate": 1.988250384332241e-06, "loss": 16.8398, "step": 8175 }, { "epoch": 0.07739419354227998, "grad_norm": 372.7459411621094, "learning_rate": 1.9882456979327297e-06, "loss": 46.1719, "step": 8176 }, { "epoch": 0.07740365956399504, "grad_norm": 331.54815673828125, "learning_rate": 1.9882410106043315e-06, "loss": 40.1094, "step": 8177 }, { "epoch": 0.07741312558571009, "grad_norm": 195.5301055908203, "learning_rate": 1.9882363223470507e-06, "loss": 20.2344, "step": 8178 }, { "epoch": 0.07742259160742515, "grad_norm": 299.5621032714844, "learning_rate": 1.9882316331608924e-06, "loss": 21.2617, "step": 8179 }, { "epoch": 0.0774320576291402, "grad_norm": 450.919189453125, "learning_rate": 1.9882269430458604e-06, "loss": 23.4844, "step": 8180 }, { "epoch": 0.07744152365085526, "grad_norm": 3.2459018230438232, "learning_rate": 1.9882222520019593e-06, "loss": 0.9629, "step": 8181 }, { "epoch": 0.0774509896725703, "grad_norm": 252.8109130859375, "learning_rate": 1.988217560029194e-06, "loss": 21.5938, "step": 8182 }, { "epoch": 0.07746045569428536, "grad_norm": 498.695068359375, "learning_rate": 1.988212867127568e-06, "loss": 36.2188, "step": 8183 }, { "epoch": 0.07746992171600042, "grad_norm": 517.5072021484375, "learning_rate": 1.988208173297086e-06, "loss": 40.5625, "step": 8184 }, { "epoch": 0.07747938773771547, "grad_norm": 1702.565673828125, "learning_rate": 1.9882034785377533e-06, "loss": 16.6172, "step": 8185 }, { "epoch": 0.07748885375943053, "grad_norm": 195.5110626220703, "learning_rate": 1.988198782849573e-06, "loss": 22.7578, "step": 8186 }, { "epoch": 0.07749831978114557, "grad_norm": 525.4268798828125, "learning_rate": 1.9881940862325506e-06, "loss": 44.5625, "step": 8187 }, { "epoch": 0.07750778580286063, "grad_norm": 474.8701171875, "learning_rate": 1.98818938868669e-06, "loss": 42.375, "step": 8188 }, { "epoch": 0.07751725182457568, "grad_norm": 551.1829223632812, "learning_rate": 1.9881846902119954e-06, "loss": 48.375, "step": 8189 }, { "epoch": 0.07752671784629074, "grad_norm": 447.24420166015625, "learning_rate": 1.9881799908084717e-06, "loss": 35.4375, "step": 8190 }, { "epoch": 0.07753618386800579, "grad_norm": 2.679389238357544, "learning_rate": 1.9881752904761225e-06, "loss": 0.9023, "step": 8191 }, { "epoch": 0.07754564988972085, "grad_norm": 688.9622802734375, "learning_rate": 1.9881705892149535e-06, "loss": 53.8672, "step": 8192 }, { "epoch": 0.0775551159114359, "grad_norm": 518.5081176757812, "learning_rate": 1.9881658870249683e-06, "loss": 38.5703, "step": 8193 }, { "epoch": 0.07756458193315095, "grad_norm": 896.4820556640625, "learning_rate": 1.9881611839061713e-06, "loss": 76.0781, "step": 8194 }, { "epoch": 0.07757404795486601, "grad_norm": 218.35986328125, "learning_rate": 1.988156479858567e-06, "loss": 20.0547, "step": 8195 }, { "epoch": 0.07758351397658106, "grad_norm": 541.495849609375, "learning_rate": 1.9881517748821603e-06, "loss": 23.7422, "step": 8196 }, { "epoch": 0.07759297999829612, "grad_norm": 687.8442993164062, "learning_rate": 1.988147068976955e-06, "loss": 44.125, "step": 8197 }, { "epoch": 0.07760244602001116, "grad_norm": 225.2308349609375, "learning_rate": 1.9881423621429557e-06, "loss": 23.9297, "step": 8198 }, { "epoch": 0.07761191204172622, "grad_norm": 275.5953369140625, "learning_rate": 1.988137654380167e-06, "loss": 31.625, "step": 8199 }, { "epoch": 0.07762137806344127, "grad_norm": 820.475341796875, "learning_rate": 1.988132945688593e-06, "loss": 59.7109, "step": 8200 }, { "epoch": 0.07763084408515633, "grad_norm": 675.160400390625, "learning_rate": 1.9881282360682383e-06, "loss": 23.2578, "step": 8201 }, { "epoch": 0.07764031010687139, "grad_norm": 563.4215698242188, "learning_rate": 1.9881235255191073e-06, "loss": 46.6641, "step": 8202 }, { "epoch": 0.07764977612858644, "grad_norm": 1079.9742431640625, "learning_rate": 1.9881188140412047e-06, "loss": 57.2734, "step": 8203 }, { "epoch": 0.0776592421503015, "grad_norm": 235.62661743164062, "learning_rate": 1.9881141016345343e-06, "loss": 19.6484, "step": 8204 }, { "epoch": 0.07766870817201654, "grad_norm": 445.4721984863281, "learning_rate": 1.9881093882991016e-06, "loss": 53.1562, "step": 8205 }, { "epoch": 0.0776781741937316, "grad_norm": 382.2731628417969, "learning_rate": 1.9881046740349095e-06, "loss": 54.4844, "step": 8206 }, { "epoch": 0.07768764021544665, "grad_norm": 295.4429931640625, "learning_rate": 1.988099958841964e-06, "loss": 19.8359, "step": 8207 }, { "epoch": 0.07769710623716171, "grad_norm": 423.2117614746094, "learning_rate": 1.9880952427202685e-06, "loss": 44.6523, "step": 8208 }, { "epoch": 0.07770657225887677, "grad_norm": 661.9795532226562, "learning_rate": 1.9880905256698276e-06, "loss": 24.7812, "step": 8209 }, { "epoch": 0.07771603828059181, "grad_norm": 222.7840118408203, "learning_rate": 1.988085807690646e-06, "loss": 22.875, "step": 8210 }, { "epoch": 0.07772550430230687, "grad_norm": 210.26768493652344, "learning_rate": 1.9880810887827284e-06, "loss": 19.1406, "step": 8211 }, { "epoch": 0.07773497032402192, "grad_norm": 189.1550750732422, "learning_rate": 1.9880763689460785e-06, "loss": 24.9844, "step": 8212 }, { "epoch": 0.07774443634573698, "grad_norm": 464.6105041503906, "learning_rate": 1.988071648180701e-06, "loss": 34.2109, "step": 8213 }, { "epoch": 0.07775390236745203, "grad_norm": 460.5853576660156, "learning_rate": 1.9880669264866007e-06, "loss": 31.9688, "step": 8214 }, { "epoch": 0.07776336838916709, "grad_norm": 397.88507080078125, "learning_rate": 1.9880622038637816e-06, "loss": 25.9844, "step": 8215 }, { "epoch": 0.07777283441088213, "grad_norm": 397.10394287109375, "learning_rate": 1.9880574803122485e-06, "loss": 23.5078, "step": 8216 }, { "epoch": 0.07778230043259719, "grad_norm": 3.3290598392486572, "learning_rate": 1.9880527558320056e-06, "loss": 0.8882, "step": 8217 }, { "epoch": 0.07779176645431225, "grad_norm": 457.680908203125, "learning_rate": 1.988048030423057e-06, "loss": 34.0781, "step": 8218 }, { "epoch": 0.0778012324760273, "grad_norm": 438.16156005859375, "learning_rate": 1.9880433040854083e-06, "loss": 16.332, "step": 8219 }, { "epoch": 0.07781069849774236, "grad_norm": 650.6170654296875, "learning_rate": 1.9880385768190623e-06, "loss": 62.2188, "step": 8220 }, { "epoch": 0.0778201645194574, "grad_norm": 339.4599304199219, "learning_rate": 1.988033848624025e-06, "loss": 23.6562, "step": 8221 }, { "epoch": 0.07782963054117246, "grad_norm": 870.8787231445312, "learning_rate": 1.9880291195002997e-06, "loss": 47.5742, "step": 8222 }, { "epoch": 0.07783909656288751, "grad_norm": 596.0487670898438, "learning_rate": 1.9880243894478912e-06, "loss": 21.4844, "step": 8223 }, { "epoch": 0.07784856258460257, "grad_norm": 283.40576171875, "learning_rate": 1.9880196584668043e-06, "loss": 21.2812, "step": 8224 }, { "epoch": 0.07785802860631762, "grad_norm": 547.9681396484375, "learning_rate": 1.9880149265570436e-06, "loss": 26.9141, "step": 8225 }, { "epoch": 0.07786749462803268, "grad_norm": 518.9537963867188, "learning_rate": 1.9880101937186125e-06, "loss": 12.9297, "step": 8226 }, { "epoch": 0.07787696064974774, "grad_norm": 911.3694458007812, "learning_rate": 1.9880054599515164e-06, "loss": 74.6562, "step": 8227 }, { "epoch": 0.07788642667146278, "grad_norm": 961.8978881835938, "learning_rate": 1.9880007252557593e-06, "loss": 68.1172, "step": 8228 }, { "epoch": 0.07789589269317784, "grad_norm": 437.2579650878906, "learning_rate": 1.987995989631346e-06, "loss": 35.1875, "step": 8229 }, { "epoch": 0.07790535871489289, "grad_norm": 318.5867004394531, "learning_rate": 1.9879912530782805e-06, "loss": 26.1406, "step": 8230 }, { "epoch": 0.07791482473660795, "grad_norm": 313.99542236328125, "learning_rate": 1.987986515596568e-06, "loss": 26.1797, "step": 8231 }, { "epoch": 0.077924290758323, "grad_norm": 3.7438199520111084, "learning_rate": 1.987981777186212e-06, "loss": 0.8804, "step": 8232 }, { "epoch": 0.07793375678003805, "grad_norm": 2.867356061935425, "learning_rate": 1.987977037847217e-06, "loss": 0.8647, "step": 8233 }, { "epoch": 0.0779432228017531, "grad_norm": 358.2580261230469, "learning_rate": 1.9879722975795887e-06, "loss": 19.6641, "step": 8234 }, { "epoch": 0.07795268882346816, "grad_norm": 549.95361328125, "learning_rate": 1.98796755638333e-06, "loss": 36.4141, "step": 8235 }, { "epoch": 0.07796215484518322, "grad_norm": 509.61083984375, "learning_rate": 1.987962814258447e-06, "loss": 54.6406, "step": 8236 }, { "epoch": 0.07797162086689827, "grad_norm": 453.89654541015625, "learning_rate": 1.987958071204942e-06, "loss": 27.4375, "step": 8237 }, { "epoch": 0.07798108688861333, "grad_norm": 2.6008594036102295, "learning_rate": 1.987953327222822e-06, "loss": 0.8257, "step": 8238 }, { "epoch": 0.07799055291032837, "grad_norm": 166.3528289794922, "learning_rate": 1.9879485823120888e-06, "loss": 20.0391, "step": 8239 }, { "epoch": 0.07800001893204343, "grad_norm": 258.9320373535156, "learning_rate": 1.9879438364727492e-06, "loss": 17.9492, "step": 8240 }, { "epoch": 0.07800948495375848, "grad_norm": 274.36846923828125, "learning_rate": 1.9879390897048062e-06, "loss": 22.0547, "step": 8241 }, { "epoch": 0.07801895097547354, "grad_norm": 397.2615966796875, "learning_rate": 1.987934342008265e-06, "loss": 46.6406, "step": 8242 }, { "epoch": 0.07802841699718858, "grad_norm": 263.95281982421875, "learning_rate": 1.9879295933831297e-06, "loss": 20.1641, "step": 8243 }, { "epoch": 0.07803788301890364, "grad_norm": 458.8432922363281, "learning_rate": 1.9879248438294046e-06, "loss": 24.7031, "step": 8244 }, { "epoch": 0.0780473490406187, "grad_norm": 645.1764526367188, "learning_rate": 1.9879200933470946e-06, "loss": 28.875, "step": 8245 }, { "epoch": 0.07805681506233375, "grad_norm": 601.5708618164062, "learning_rate": 1.987915341936204e-06, "loss": 30.75, "step": 8246 }, { "epoch": 0.07806628108404881, "grad_norm": 335.3727722167969, "learning_rate": 1.9879105895967375e-06, "loss": 14.625, "step": 8247 }, { "epoch": 0.07807574710576386, "grad_norm": 452.1387634277344, "learning_rate": 1.987905836328699e-06, "loss": 20.6523, "step": 8248 }, { "epoch": 0.07808521312747892, "grad_norm": 507.8369445800781, "learning_rate": 1.987901082132093e-06, "loss": 14.3438, "step": 8249 }, { "epoch": 0.07809467914919396, "grad_norm": 361.8849182128906, "learning_rate": 1.9878963270069245e-06, "loss": 21.0469, "step": 8250 }, { "epoch": 0.07810414517090902, "grad_norm": 209.7782745361328, "learning_rate": 1.987891570953198e-06, "loss": 15.1133, "step": 8251 }, { "epoch": 0.07811361119262408, "grad_norm": 506.6585693359375, "learning_rate": 1.9878868139709174e-06, "loss": 52.75, "step": 8252 }, { "epoch": 0.07812307721433913, "grad_norm": 737.6630249023438, "learning_rate": 1.9878820560600874e-06, "loss": 32.4453, "step": 8253 }, { "epoch": 0.07813254323605419, "grad_norm": 216.17642211914062, "learning_rate": 1.987877297220713e-06, "loss": 27.2422, "step": 8254 }, { "epoch": 0.07814200925776923, "grad_norm": 465.1225891113281, "learning_rate": 1.987872537452798e-06, "loss": 38.3125, "step": 8255 }, { "epoch": 0.0781514752794843, "grad_norm": 245.48377990722656, "learning_rate": 1.9878677767563467e-06, "loss": 15.9375, "step": 8256 }, { "epoch": 0.07816094130119934, "grad_norm": 241.1619110107422, "learning_rate": 1.9878630151313645e-06, "loss": 19.4219, "step": 8257 }, { "epoch": 0.0781704073229144, "grad_norm": 3.9483375549316406, "learning_rate": 1.987858252577855e-06, "loss": 1.0554, "step": 8258 }, { "epoch": 0.07817987334462945, "grad_norm": 327.32440185546875, "learning_rate": 1.987853489095823e-06, "loss": 22.0, "step": 8259 }, { "epoch": 0.0781893393663445, "grad_norm": 303.9170837402344, "learning_rate": 1.987848724685273e-06, "loss": 21.9141, "step": 8260 }, { "epoch": 0.07819880538805957, "grad_norm": 309.4626770019531, "learning_rate": 1.9878439593462096e-06, "loss": 24.0312, "step": 8261 }, { "epoch": 0.07820827140977461, "grad_norm": 3.0832345485687256, "learning_rate": 1.987839193078637e-06, "loss": 1.0459, "step": 8262 }, { "epoch": 0.07821773743148967, "grad_norm": 167.67681884765625, "learning_rate": 1.9878344258825597e-06, "loss": 23.8438, "step": 8263 }, { "epoch": 0.07822720345320472, "grad_norm": 199.7349853515625, "learning_rate": 1.987829657757983e-06, "loss": 19.6484, "step": 8264 }, { "epoch": 0.07823666947491978, "grad_norm": 2.7938594818115234, "learning_rate": 1.9878248887049096e-06, "loss": 0.8325, "step": 8265 }, { "epoch": 0.07824613549663482, "grad_norm": 336.162353515625, "learning_rate": 1.987820118723346e-06, "loss": 21.1562, "step": 8266 }, { "epoch": 0.07825560151834988, "grad_norm": 287.31170654296875, "learning_rate": 1.9878153478132953e-06, "loss": 17.1484, "step": 8267 }, { "epoch": 0.07826506754006493, "grad_norm": 206.2054443359375, "learning_rate": 1.9878105759747623e-06, "loss": 16.707, "step": 8268 }, { "epoch": 0.07827453356177999, "grad_norm": 3.7213282585144043, "learning_rate": 1.987805803207752e-06, "loss": 0.9614, "step": 8269 }, { "epoch": 0.07828399958349505, "grad_norm": 366.2005310058594, "learning_rate": 1.9878010295122682e-06, "loss": 30.9453, "step": 8270 }, { "epoch": 0.0782934656052101, "grad_norm": 476.2554016113281, "learning_rate": 1.9877962548883157e-06, "loss": 21.3047, "step": 8271 }, { "epoch": 0.07830293162692516, "grad_norm": 421.9985656738281, "learning_rate": 1.9877914793358993e-06, "loss": 53.5469, "step": 8272 }, { "epoch": 0.0783123976486402, "grad_norm": 681.7072143554688, "learning_rate": 1.9877867028550227e-06, "loss": 57.4844, "step": 8273 }, { "epoch": 0.07832186367035526, "grad_norm": 664.4384155273438, "learning_rate": 1.987781925445691e-06, "loss": 43.5625, "step": 8274 }, { "epoch": 0.07833132969207031, "grad_norm": 2.41931414604187, "learning_rate": 1.987777147107909e-06, "loss": 0.8208, "step": 8275 }, { "epoch": 0.07834079571378537, "grad_norm": 471.935302734375, "learning_rate": 1.9877723678416806e-06, "loss": 34.6992, "step": 8276 }, { "epoch": 0.07835026173550041, "grad_norm": 213.3160858154297, "learning_rate": 1.98776758764701e-06, "loss": 17.9844, "step": 8277 }, { "epoch": 0.07835972775721547, "grad_norm": 271.3631896972656, "learning_rate": 1.9877628065239024e-06, "loss": 23.8672, "step": 8278 }, { "epoch": 0.07836919377893053, "grad_norm": 883.251220703125, "learning_rate": 1.9877580244723624e-06, "loss": 37.0547, "step": 8279 }, { "epoch": 0.07837865980064558, "grad_norm": 700.646484375, "learning_rate": 1.9877532414923933e-06, "loss": 32.0, "step": 8280 }, { "epoch": 0.07838812582236064, "grad_norm": 471.791015625, "learning_rate": 1.9877484575840012e-06, "loss": 43.1953, "step": 8281 }, { "epoch": 0.07839759184407569, "grad_norm": 247.42515563964844, "learning_rate": 1.9877436727471894e-06, "loss": 22.1953, "step": 8282 }, { "epoch": 0.07840705786579075, "grad_norm": 342.30517578125, "learning_rate": 1.9877388869819626e-06, "loss": 21.6641, "step": 8283 }, { "epoch": 0.07841652388750579, "grad_norm": 596.1990356445312, "learning_rate": 1.987734100288326e-06, "loss": 37.9805, "step": 8284 }, { "epoch": 0.07842598990922085, "grad_norm": 3.197108507156372, "learning_rate": 1.9877293126662834e-06, "loss": 1.0454, "step": 8285 }, { "epoch": 0.0784354559309359, "grad_norm": 332.29400634765625, "learning_rate": 1.987724524115839e-06, "loss": 39.7031, "step": 8286 }, { "epoch": 0.07844492195265096, "grad_norm": 1673.3929443359375, "learning_rate": 1.9877197346369987e-06, "loss": 54.25, "step": 8287 }, { "epoch": 0.07845438797436602, "grad_norm": 529.2880249023438, "learning_rate": 1.9877149442297654e-06, "loss": 33.125, "step": 8288 }, { "epoch": 0.07846385399608106, "grad_norm": 1018.2173461914062, "learning_rate": 1.9877101528941445e-06, "loss": 61.1562, "step": 8289 }, { "epoch": 0.07847332001779612, "grad_norm": 869.5347290039062, "learning_rate": 1.9877053606301404e-06, "loss": 35.0703, "step": 8290 }, { "epoch": 0.07848278603951117, "grad_norm": 386.2419738769531, "learning_rate": 1.9877005674377577e-06, "loss": 23.4844, "step": 8291 }, { "epoch": 0.07849225206122623, "grad_norm": 203.1743927001953, "learning_rate": 1.9876957733170007e-06, "loss": 17.6797, "step": 8292 }, { "epoch": 0.07850171808294128, "grad_norm": 398.6024169921875, "learning_rate": 1.9876909782678735e-06, "loss": 41.3594, "step": 8293 }, { "epoch": 0.07851118410465634, "grad_norm": 498.1827697753906, "learning_rate": 1.987686182290381e-06, "loss": 42.0, "step": 8294 }, { "epoch": 0.07852065012637138, "grad_norm": 370.4280090332031, "learning_rate": 1.987681385384528e-06, "loss": 20.6172, "step": 8295 }, { "epoch": 0.07853011614808644, "grad_norm": 212.22488403320312, "learning_rate": 1.987676587550319e-06, "loss": 21.0391, "step": 8296 }, { "epoch": 0.0785395821698015, "grad_norm": 427.5256652832031, "learning_rate": 1.9876717887877578e-06, "loss": 24.2656, "step": 8297 }, { "epoch": 0.07854904819151655, "grad_norm": 652.5330810546875, "learning_rate": 1.98766698909685e-06, "loss": 26.1953, "step": 8298 }, { "epoch": 0.07855851421323161, "grad_norm": 2.4632163047790527, "learning_rate": 1.987662188477599e-06, "loss": 0.8325, "step": 8299 }, { "epoch": 0.07856798023494665, "grad_norm": 276.2518310546875, "learning_rate": 1.98765738693001e-06, "loss": 26.6875, "step": 8300 }, { "epoch": 0.07857744625666171, "grad_norm": 503.56390380859375, "learning_rate": 1.987652584454087e-06, "loss": 27.0801, "step": 8301 }, { "epoch": 0.07858691227837676, "grad_norm": 757.4452514648438, "learning_rate": 1.987647781049835e-06, "loss": 38.9141, "step": 8302 }, { "epoch": 0.07859637830009182, "grad_norm": 402.4341125488281, "learning_rate": 1.987642976717258e-06, "loss": 20.2578, "step": 8303 }, { "epoch": 0.07860584432180688, "grad_norm": 297.1852722167969, "learning_rate": 1.9876381714563614e-06, "loss": 19.3203, "step": 8304 }, { "epoch": 0.07861531034352193, "grad_norm": 489.5880432128906, "learning_rate": 1.987633365267149e-06, "loss": 58.625, "step": 8305 }, { "epoch": 0.07862477636523699, "grad_norm": 530.9896240234375, "learning_rate": 1.9876285581496257e-06, "loss": 54.3438, "step": 8306 }, { "epoch": 0.07863424238695203, "grad_norm": 312.6273498535156, "learning_rate": 1.987623750103795e-06, "loss": 33.5234, "step": 8307 }, { "epoch": 0.07864370840866709, "grad_norm": 288.46527099609375, "learning_rate": 1.987618941129663e-06, "loss": 40.625, "step": 8308 }, { "epoch": 0.07865317443038214, "grad_norm": 542.2213745117188, "learning_rate": 1.9876141312272335e-06, "loss": 24.0156, "step": 8309 }, { "epoch": 0.0786626404520972, "grad_norm": 428.4734802246094, "learning_rate": 1.9876093203965107e-06, "loss": 13.9453, "step": 8310 }, { "epoch": 0.07867210647381224, "grad_norm": 213.30455017089844, "learning_rate": 1.987604508637499e-06, "loss": 19.0781, "step": 8311 }, { "epoch": 0.0786815724955273, "grad_norm": 238.19482421875, "learning_rate": 1.9875996959502038e-06, "loss": 21.9453, "step": 8312 }, { "epoch": 0.07869103851724236, "grad_norm": 281.98016357421875, "learning_rate": 1.987594882334629e-06, "loss": 23.6875, "step": 8313 }, { "epoch": 0.07870050453895741, "grad_norm": 2208.97021484375, "learning_rate": 1.9875900677907797e-06, "loss": 56.6875, "step": 8314 }, { "epoch": 0.07870997056067247, "grad_norm": 1503.8074951171875, "learning_rate": 1.9875852523186594e-06, "loss": 57.4844, "step": 8315 }, { "epoch": 0.07871943658238752, "grad_norm": 3.2314400672912598, "learning_rate": 1.987580435918273e-06, "loss": 0.9517, "step": 8316 }, { "epoch": 0.07872890260410258, "grad_norm": 629.7354736328125, "learning_rate": 1.9875756185896257e-06, "loss": 34.5, "step": 8317 }, { "epoch": 0.07873836862581762, "grad_norm": 726.44189453125, "learning_rate": 1.9875708003327215e-06, "loss": 50.2188, "step": 8318 }, { "epoch": 0.07874783464753268, "grad_norm": 857.875732421875, "learning_rate": 1.987565981147565e-06, "loss": 48.5547, "step": 8319 }, { "epoch": 0.07875730066924773, "grad_norm": 442.4071350097656, "learning_rate": 1.987561161034161e-06, "loss": 25.3125, "step": 8320 }, { "epoch": 0.07876676669096279, "grad_norm": 401.05023193359375, "learning_rate": 1.9875563399925133e-06, "loss": 22.7734, "step": 8321 }, { "epoch": 0.07877623271267785, "grad_norm": 128.1033935546875, "learning_rate": 1.987551518022627e-06, "loss": 16.9062, "step": 8322 }, { "epoch": 0.0787856987343929, "grad_norm": 695.3598022460938, "learning_rate": 1.9875466951245068e-06, "loss": 28.6016, "step": 8323 }, { "epoch": 0.07879516475610795, "grad_norm": 334.0130615234375, "learning_rate": 1.9875418712981564e-06, "loss": 34.1406, "step": 8324 }, { "epoch": 0.078804630777823, "grad_norm": 338.6497802734375, "learning_rate": 1.9875370465435812e-06, "loss": 32.4375, "step": 8325 }, { "epoch": 0.07881409679953806, "grad_norm": 3.753122568130493, "learning_rate": 1.9875322208607854e-06, "loss": 0.9658, "step": 8326 }, { "epoch": 0.07882356282125311, "grad_norm": 284.7171630859375, "learning_rate": 1.9875273942497736e-06, "loss": 31.375, "step": 8327 }, { "epoch": 0.07883302884296817, "grad_norm": 179.79754638671875, "learning_rate": 1.98752256671055e-06, "loss": 17.8516, "step": 8328 }, { "epoch": 0.07884249486468321, "grad_norm": 1200.295654296875, "learning_rate": 1.98751773824312e-06, "loss": 48.5, "step": 8329 }, { "epoch": 0.07885196088639827, "grad_norm": 700.0030517578125, "learning_rate": 1.9875129088474872e-06, "loss": 50.0469, "step": 8330 }, { "epoch": 0.07886142690811333, "grad_norm": 299.6881408691406, "learning_rate": 1.9875080785236564e-06, "loss": 21.8828, "step": 8331 }, { "epoch": 0.07887089292982838, "grad_norm": 175.5735321044922, "learning_rate": 1.9875032472716325e-06, "loss": 11.5312, "step": 8332 }, { "epoch": 0.07888035895154344, "grad_norm": 356.0053405761719, "learning_rate": 1.9874984150914197e-06, "loss": 23.4297, "step": 8333 }, { "epoch": 0.07888982497325848, "grad_norm": 374.185546875, "learning_rate": 1.9874935819830227e-06, "loss": 25.6875, "step": 8334 }, { "epoch": 0.07889929099497354, "grad_norm": 614.9449462890625, "learning_rate": 1.987488747946446e-06, "loss": 62.4219, "step": 8335 }, { "epoch": 0.07890875701668859, "grad_norm": 335.2027587890625, "learning_rate": 1.987483912981694e-06, "loss": 24.1172, "step": 8336 }, { "epoch": 0.07891822303840365, "grad_norm": 472.56689453125, "learning_rate": 1.9874790770887714e-06, "loss": 28.8984, "step": 8337 }, { "epoch": 0.0789276890601187, "grad_norm": 277.458251953125, "learning_rate": 1.9874742402676825e-06, "loss": 31.1719, "step": 8338 }, { "epoch": 0.07893715508183376, "grad_norm": 178.8313751220703, "learning_rate": 1.9874694025184322e-06, "loss": 22.9531, "step": 8339 }, { "epoch": 0.07894662110354882, "grad_norm": 512.3998413085938, "learning_rate": 1.987464563841025e-06, "loss": 56.4844, "step": 8340 }, { "epoch": 0.07895608712526386, "grad_norm": 308.81036376953125, "learning_rate": 1.987459724235465e-06, "loss": 33.9062, "step": 8341 }, { "epoch": 0.07896555314697892, "grad_norm": 208.32667541503906, "learning_rate": 1.9874548837017574e-06, "loss": 17.4688, "step": 8342 }, { "epoch": 0.07897501916869397, "grad_norm": 261.7816162109375, "learning_rate": 1.9874500422399067e-06, "loss": 29.125, "step": 8343 }, { "epoch": 0.07898448519040903, "grad_norm": 751.1848754882812, "learning_rate": 1.987445199849917e-06, "loss": 56.3828, "step": 8344 }, { "epoch": 0.07899395121212408, "grad_norm": 527.1466064453125, "learning_rate": 1.9874403565317928e-06, "loss": 33.375, "step": 8345 }, { "epoch": 0.07900341723383913, "grad_norm": 260.6401062011719, "learning_rate": 1.987435512285539e-06, "loss": 20.8438, "step": 8346 }, { "epoch": 0.0790128832555542, "grad_norm": 258.20599365234375, "learning_rate": 1.98743066711116e-06, "loss": 19.5547, "step": 8347 }, { "epoch": 0.07902234927726924, "grad_norm": 293.3506774902344, "learning_rate": 1.9874258210086607e-06, "loss": 17.1953, "step": 8348 }, { "epoch": 0.0790318152989843, "grad_norm": 683.8775634765625, "learning_rate": 1.9874209739780456e-06, "loss": 47.7656, "step": 8349 }, { "epoch": 0.07904128132069935, "grad_norm": 269.09063720703125, "learning_rate": 1.9874161260193184e-06, "loss": 20.8203, "step": 8350 }, { "epoch": 0.0790507473424144, "grad_norm": 219.78627014160156, "learning_rate": 1.9874112771324844e-06, "loss": 19.9297, "step": 8351 }, { "epoch": 0.07906021336412945, "grad_norm": 228.31610107421875, "learning_rate": 1.9874064273175484e-06, "loss": 17.375, "step": 8352 }, { "epoch": 0.07906967938584451, "grad_norm": 353.06597900390625, "learning_rate": 1.987401576574514e-06, "loss": 19.5703, "step": 8353 }, { "epoch": 0.07907914540755956, "grad_norm": 175.94122314453125, "learning_rate": 1.987396724903387e-06, "loss": 25.2305, "step": 8354 }, { "epoch": 0.07908861142927462, "grad_norm": 416.22613525390625, "learning_rate": 1.9873918723041708e-06, "loss": 59.4297, "step": 8355 }, { "epoch": 0.07909807745098968, "grad_norm": 693.0244750976562, "learning_rate": 1.9873870187768706e-06, "loss": 46.0625, "step": 8356 }, { "epoch": 0.07910754347270472, "grad_norm": 408.0664367675781, "learning_rate": 1.987382164321491e-06, "loss": 35.1562, "step": 8357 }, { "epoch": 0.07911700949441978, "grad_norm": 716.0042724609375, "learning_rate": 1.9873773089380366e-06, "loss": 49.1719, "step": 8358 }, { "epoch": 0.07912647551613483, "grad_norm": 189.87136840820312, "learning_rate": 1.987372452626511e-06, "loss": 16.9453, "step": 8359 }, { "epoch": 0.07913594153784989, "grad_norm": 299.8789978027344, "learning_rate": 1.98736759538692e-06, "loss": 22.1641, "step": 8360 }, { "epoch": 0.07914540755956494, "grad_norm": 221.10508728027344, "learning_rate": 1.9873627372192677e-06, "loss": 27.8438, "step": 8361 }, { "epoch": 0.07915487358128, "grad_norm": 2.7189688682556152, "learning_rate": 1.987357878123559e-06, "loss": 0.7998, "step": 8362 }, { "epoch": 0.07916433960299504, "grad_norm": 274.54913330078125, "learning_rate": 1.9873530180997977e-06, "loss": 13.7734, "step": 8363 }, { "epoch": 0.0791738056247101, "grad_norm": 451.6448059082031, "learning_rate": 1.9873481571479886e-06, "loss": 50.2031, "step": 8364 }, { "epoch": 0.07918327164642516, "grad_norm": 2.893922805786133, "learning_rate": 1.9873432952681372e-06, "loss": 0.8262, "step": 8365 }, { "epoch": 0.07919273766814021, "grad_norm": 340.35577392578125, "learning_rate": 1.9873384324602465e-06, "loss": 35.875, "step": 8366 }, { "epoch": 0.07920220368985527, "grad_norm": 172.25277709960938, "learning_rate": 1.9873335687243222e-06, "loss": 18.9297, "step": 8367 }, { "epoch": 0.07921166971157032, "grad_norm": 3.4133033752441406, "learning_rate": 1.987328704060369e-06, "loss": 0.9478, "step": 8368 }, { "epoch": 0.07922113573328537, "grad_norm": 627.8491821289062, "learning_rate": 1.9873238384683905e-06, "loss": 26.7734, "step": 8369 }, { "epoch": 0.07923060175500042, "grad_norm": 220.4805908203125, "learning_rate": 1.987318971948392e-06, "loss": 17.5703, "step": 8370 }, { "epoch": 0.07924006777671548, "grad_norm": 236.9629669189453, "learning_rate": 1.987314104500378e-06, "loss": 26.3906, "step": 8371 }, { "epoch": 0.07924953379843053, "grad_norm": 953.8373413085938, "learning_rate": 1.9873092361243525e-06, "loss": 23.2969, "step": 8372 }, { "epoch": 0.07925899982014559, "grad_norm": 583.4243774414062, "learning_rate": 1.9873043668203213e-06, "loss": 49.5391, "step": 8373 }, { "epoch": 0.07926846584186065, "grad_norm": 299.8168640136719, "learning_rate": 1.987299496588288e-06, "loss": 43.4844, "step": 8374 }, { "epoch": 0.07927793186357569, "grad_norm": 444.7717590332031, "learning_rate": 1.9872946254282568e-06, "loss": 49.5039, "step": 8375 }, { "epoch": 0.07928739788529075, "grad_norm": 508.0465087890625, "learning_rate": 1.9872897533402337e-06, "loss": 30.5703, "step": 8376 }, { "epoch": 0.0792968639070058, "grad_norm": 230.66229248046875, "learning_rate": 1.987284880324222e-06, "loss": 30.8906, "step": 8377 }, { "epoch": 0.07930632992872086, "grad_norm": 563.3212280273438, "learning_rate": 1.987280006380227e-06, "loss": 52.5469, "step": 8378 }, { "epoch": 0.0793157959504359, "grad_norm": 411.8109130859375, "learning_rate": 1.9872751315082525e-06, "loss": 24.3281, "step": 8379 }, { "epoch": 0.07932526197215096, "grad_norm": 647.2501220703125, "learning_rate": 1.987270255708304e-06, "loss": 49.5625, "step": 8380 }, { "epoch": 0.07933472799386601, "grad_norm": 534.6000366210938, "learning_rate": 1.987265378980386e-06, "loss": 45.5469, "step": 8381 }, { "epoch": 0.07934419401558107, "grad_norm": 234.41265869140625, "learning_rate": 1.9872605013245023e-06, "loss": 33.5781, "step": 8382 }, { "epoch": 0.07935366003729613, "grad_norm": 470.29754638671875, "learning_rate": 1.9872556227406576e-06, "loss": 57.3438, "step": 8383 }, { "epoch": 0.07936312605901118, "grad_norm": 207.7190399169922, "learning_rate": 1.9872507432288575e-06, "loss": 22.3203, "step": 8384 }, { "epoch": 0.07937259208072624, "grad_norm": 271.7596130371094, "learning_rate": 1.987245862789106e-06, "loss": 25.2344, "step": 8385 }, { "epoch": 0.07938205810244128, "grad_norm": 2.5706088542938232, "learning_rate": 1.9872409814214075e-06, "loss": 0.8109, "step": 8386 }, { "epoch": 0.07939152412415634, "grad_norm": 191.98509216308594, "learning_rate": 1.9872360991257666e-06, "loss": 24.1797, "step": 8387 }, { "epoch": 0.07940099014587139, "grad_norm": 503.81475830078125, "learning_rate": 1.9872312159021876e-06, "loss": 26.5859, "step": 8388 }, { "epoch": 0.07941045616758645, "grad_norm": 494.9764709472656, "learning_rate": 1.987226331750676e-06, "loss": 68.3594, "step": 8389 }, { "epoch": 0.07941992218930151, "grad_norm": 373.74884033203125, "learning_rate": 1.987221446671236e-06, "loss": 12.3125, "step": 8390 }, { "epoch": 0.07942938821101655, "grad_norm": 287.5040588378906, "learning_rate": 1.9872165606638715e-06, "loss": 28.1172, "step": 8391 }, { "epoch": 0.07943885423273161, "grad_norm": 578.14990234375, "learning_rate": 1.9872116737285878e-06, "loss": 28.3047, "step": 8392 }, { "epoch": 0.07944832025444666, "grad_norm": 175.36109924316406, "learning_rate": 1.987206785865389e-06, "loss": 22.6875, "step": 8393 }, { "epoch": 0.07945778627616172, "grad_norm": 512.2462768554688, "learning_rate": 1.9872018970742808e-06, "loss": 54.5, "step": 8394 }, { "epoch": 0.07946725229787677, "grad_norm": 476.07867431640625, "learning_rate": 1.987197007355267e-06, "loss": 42.0156, "step": 8395 }, { "epoch": 0.07947671831959183, "grad_norm": 645.4096069335938, "learning_rate": 1.987192116708352e-06, "loss": 50.2539, "step": 8396 }, { "epoch": 0.07948618434130687, "grad_norm": 944.76123046875, "learning_rate": 1.9871872251335406e-06, "loss": 43.5078, "step": 8397 }, { "epoch": 0.07949565036302193, "grad_norm": 3.3956398963928223, "learning_rate": 1.987182332630837e-06, "loss": 0.9771, "step": 8398 }, { "epoch": 0.07950511638473699, "grad_norm": 348.6272888183594, "learning_rate": 1.987177439200247e-06, "loss": 35.8594, "step": 8399 }, { "epoch": 0.07951458240645204, "grad_norm": 918.3283081054688, "learning_rate": 1.987172544841774e-06, "loss": 26.5156, "step": 8400 }, { "epoch": 0.0795240484281671, "grad_norm": 509.1302490234375, "learning_rate": 1.987167649555423e-06, "loss": 44.6094, "step": 8401 }, { "epoch": 0.07953351444988215, "grad_norm": 376.84381103515625, "learning_rate": 1.987162753341199e-06, "loss": 17.0273, "step": 8402 }, { "epoch": 0.0795429804715972, "grad_norm": 274.1568603515625, "learning_rate": 1.987157856199106e-06, "loss": 22.5234, "step": 8403 }, { "epoch": 0.07955244649331225, "grad_norm": 456.652587890625, "learning_rate": 1.987152958129149e-06, "loss": 31.2969, "step": 8404 }, { "epoch": 0.07956191251502731, "grad_norm": 201.11021423339844, "learning_rate": 1.987148059131332e-06, "loss": 18.875, "step": 8405 }, { "epoch": 0.07957137853674236, "grad_norm": 240.8824920654297, "learning_rate": 1.9871431592056604e-06, "loss": 22.5547, "step": 8406 }, { "epoch": 0.07958084455845742, "grad_norm": 1208.202392578125, "learning_rate": 1.987138258352138e-06, "loss": 48.9531, "step": 8407 }, { "epoch": 0.07959031058017248, "grad_norm": 666.98974609375, "learning_rate": 1.9871333565707705e-06, "loss": 23.9531, "step": 8408 }, { "epoch": 0.07959977660188752, "grad_norm": 240.38890075683594, "learning_rate": 1.9871284538615614e-06, "loss": 17.7578, "step": 8409 }, { "epoch": 0.07960924262360258, "grad_norm": 318.36688232421875, "learning_rate": 1.987123550224516e-06, "loss": 25.7969, "step": 8410 }, { "epoch": 0.07961870864531763, "grad_norm": 239.87820434570312, "learning_rate": 1.9871186456596385e-06, "loss": 32.2812, "step": 8411 }, { "epoch": 0.07962817466703269, "grad_norm": 540.6273803710938, "learning_rate": 1.987113740166934e-06, "loss": 55.0938, "step": 8412 }, { "epoch": 0.07963764068874774, "grad_norm": 379.4848937988281, "learning_rate": 1.9871088337464065e-06, "loss": 22.6445, "step": 8413 }, { "epoch": 0.0796471067104628, "grad_norm": 213.6057586669922, "learning_rate": 1.987103926398061e-06, "loss": 24.4844, "step": 8414 }, { "epoch": 0.07965657273217784, "grad_norm": 573.684326171875, "learning_rate": 1.987099018121902e-06, "loss": 17.9648, "step": 8415 }, { "epoch": 0.0796660387538929, "grad_norm": 1031.9822998046875, "learning_rate": 1.987094108917934e-06, "loss": 60.6562, "step": 8416 }, { "epoch": 0.07967550477560796, "grad_norm": 642.7047119140625, "learning_rate": 1.987089198786162e-06, "loss": 48.2344, "step": 8417 }, { "epoch": 0.07968497079732301, "grad_norm": 430.5849304199219, "learning_rate": 1.9870842877265906e-06, "loss": 16.8242, "step": 8418 }, { "epoch": 0.07969443681903807, "grad_norm": 211.2175750732422, "learning_rate": 1.9870793757392237e-06, "loss": 22.9844, "step": 8419 }, { "epoch": 0.07970390284075311, "grad_norm": 326.170654296875, "learning_rate": 1.987074462824067e-06, "loss": 23.8516, "step": 8420 }, { "epoch": 0.07971336886246817, "grad_norm": 340.36199951171875, "learning_rate": 1.987069548981124e-06, "loss": 23.9922, "step": 8421 }, { "epoch": 0.07972283488418322, "grad_norm": 309.5887145996094, "learning_rate": 1.9870646342104e-06, "loss": 18.2422, "step": 8422 }, { "epoch": 0.07973230090589828, "grad_norm": 304.1363830566406, "learning_rate": 1.987059718511899e-06, "loss": 23.4492, "step": 8423 }, { "epoch": 0.07974176692761333, "grad_norm": 439.6177978515625, "learning_rate": 1.9870548018856266e-06, "loss": 58.7031, "step": 8424 }, { "epoch": 0.07975123294932839, "grad_norm": 1390.617919921875, "learning_rate": 1.987049884331587e-06, "loss": 53.5312, "step": 8425 }, { "epoch": 0.07976069897104344, "grad_norm": 514.1192016601562, "learning_rate": 1.9870449658497843e-06, "loss": 35.6406, "step": 8426 }, { "epoch": 0.07977016499275849, "grad_norm": 418.88751220703125, "learning_rate": 1.9870400464402233e-06, "loss": 34.0078, "step": 8427 }, { "epoch": 0.07977963101447355, "grad_norm": 647.6624755859375, "learning_rate": 1.9870351261029098e-06, "loss": 33.0078, "step": 8428 }, { "epoch": 0.0797890970361886, "grad_norm": 3.0237152576446533, "learning_rate": 1.987030204837847e-06, "loss": 0.8574, "step": 8429 }, { "epoch": 0.07979856305790366, "grad_norm": 421.3977966308594, "learning_rate": 1.98702528264504e-06, "loss": 48.7969, "step": 8430 }, { "epoch": 0.0798080290796187, "grad_norm": 234.77847290039062, "learning_rate": 1.9870203595244934e-06, "loss": 12.7383, "step": 8431 }, { "epoch": 0.07981749510133376, "grad_norm": 233.3870391845703, "learning_rate": 1.987015435476212e-06, "loss": 20.4609, "step": 8432 }, { "epoch": 0.07982696112304882, "grad_norm": 1149.430419921875, "learning_rate": 1.9870105105002e-06, "loss": 37.3164, "step": 8433 }, { "epoch": 0.07983642714476387, "grad_norm": 188.66136169433594, "learning_rate": 1.9870055845964625e-06, "loss": 25.2656, "step": 8434 }, { "epoch": 0.07984589316647893, "grad_norm": 444.0614318847656, "learning_rate": 1.987000657765004e-06, "loss": 22.1211, "step": 8435 }, { "epoch": 0.07985535918819398, "grad_norm": 264.362548828125, "learning_rate": 1.9869957300058295e-06, "loss": 21.4766, "step": 8436 }, { "epoch": 0.07986482520990903, "grad_norm": 390.21307373046875, "learning_rate": 1.986990801318943e-06, "loss": 42.6875, "step": 8437 }, { "epoch": 0.07987429123162408, "grad_norm": 292.9210205078125, "learning_rate": 1.986985871704349e-06, "loss": 25.3125, "step": 8438 }, { "epoch": 0.07988375725333914, "grad_norm": 280.2536315917969, "learning_rate": 1.9869809411620522e-06, "loss": 31.5, "step": 8439 }, { "epoch": 0.07989322327505419, "grad_norm": 573.0096435546875, "learning_rate": 1.9869760096920584e-06, "loss": 61.1875, "step": 8440 }, { "epoch": 0.07990268929676925, "grad_norm": 222.3168182373047, "learning_rate": 1.986971077294371e-06, "loss": 18.1562, "step": 8441 }, { "epoch": 0.07991215531848431, "grad_norm": 557.6691284179688, "learning_rate": 1.9869661439689946e-06, "loss": 51.8281, "step": 8442 }, { "epoch": 0.07992162134019935, "grad_norm": 335.64801025390625, "learning_rate": 1.9869612097159347e-06, "loss": 24.8828, "step": 8443 }, { "epoch": 0.07993108736191441, "grad_norm": 212.6304931640625, "learning_rate": 1.9869562745351954e-06, "loss": 15.7695, "step": 8444 }, { "epoch": 0.07994055338362946, "grad_norm": 208.56507873535156, "learning_rate": 1.9869513384267814e-06, "loss": 25.1953, "step": 8445 }, { "epoch": 0.07995001940534452, "grad_norm": 489.3790588378906, "learning_rate": 1.986946401390697e-06, "loss": 29.7578, "step": 8446 }, { "epoch": 0.07995948542705957, "grad_norm": 818.2174682617188, "learning_rate": 1.9869414634269475e-06, "loss": 9.4492, "step": 8447 }, { "epoch": 0.07996895144877463, "grad_norm": 315.31536865234375, "learning_rate": 1.9869365245355373e-06, "loss": 22.9141, "step": 8448 }, { "epoch": 0.07997841747048967, "grad_norm": 3.259554862976074, "learning_rate": 1.986931584716471e-06, "loss": 0.9932, "step": 8449 }, { "epoch": 0.07998788349220473, "grad_norm": 711.010498046875, "learning_rate": 1.986926643969753e-06, "loss": 43.7188, "step": 8450 }, { "epoch": 0.07999734951391979, "grad_norm": 2.8715198040008545, "learning_rate": 1.9869217022953885e-06, "loss": 0.8027, "step": 8451 }, { "epoch": 0.08000681553563484, "grad_norm": 232.66920471191406, "learning_rate": 1.9869167596933815e-06, "loss": 22.1406, "step": 8452 }, { "epoch": 0.0800162815573499, "grad_norm": 455.4469909667969, "learning_rate": 1.986911816163737e-06, "loss": 35.625, "step": 8453 }, { "epoch": 0.08002574757906494, "grad_norm": 336.6967468261719, "learning_rate": 1.98690687170646e-06, "loss": 23.4297, "step": 8454 }, { "epoch": 0.08003521360078, "grad_norm": 278.6434326171875, "learning_rate": 1.986901926321554e-06, "loss": 20.2188, "step": 8455 }, { "epoch": 0.08004467962249505, "grad_norm": 247.19412231445312, "learning_rate": 1.9868969800090248e-06, "loss": 20.1328, "step": 8456 }, { "epoch": 0.08005414564421011, "grad_norm": 434.378173828125, "learning_rate": 1.9868920327688767e-06, "loss": 38.3594, "step": 8457 }, { "epoch": 0.08006361166592516, "grad_norm": 185.87579345703125, "learning_rate": 1.986887084601114e-06, "loss": 21.3594, "step": 8458 }, { "epoch": 0.08007307768764022, "grad_norm": 415.3404846191406, "learning_rate": 1.986882135505742e-06, "loss": 36.8906, "step": 8459 }, { "epoch": 0.08008254370935527, "grad_norm": 529.0100708007812, "learning_rate": 1.9868771854827644e-06, "loss": 43.5938, "step": 8460 }, { "epoch": 0.08009200973107032, "grad_norm": 737.5526123046875, "learning_rate": 1.986872234532187e-06, "loss": 34.0781, "step": 8461 }, { "epoch": 0.08010147575278538, "grad_norm": 562.6326293945312, "learning_rate": 1.9868672826540137e-06, "loss": 54.2031, "step": 8462 }, { "epoch": 0.08011094177450043, "grad_norm": 293.9028625488281, "learning_rate": 1.9868623298482493e-06, "loss": 27.3672, "step": 8463 }, { "epoch": 0.08012040779621549, "grad_norm": 489.36895751953125, "learning_rate": 1.9868573761148987e-06, "loss": 50.0938, "step": 8464 }, { "epoch": 0.08012987381793053, "grad_norm": 417.83502197265625, "learning_rate": 1.9868524214539665e-06, "loss": 31.0469, "step": 8465 }, { "epoch": 0.08013933983964559, "grad_norm": 339.16229248046875, "learning_rate": 1.9868474658654567e-06, "loss": 21.3477, "step": 8466 }, { "epoch": 0.08014880586136064, "grad_norm": 360.8052978515625, "learning_rate": 1.986842509349375e-06, "loss": 13.4414, "step": 8467 }, { "epoch": 0.0801582718830757, "grad_norm": 430.62493896484375, "learning_rate": 1.9868375519057253e-06, "loss": 29.2031, "step": 8468 }, { "epoch": 0.08016773790479076, "grad_norm": 455.3436279296875, "learning_rate": 1.9868325935345127e-06, "loss": 46.4531, "step": 8469 }, { "epoch": 0.0801772039265058, "grad_norm": 390.69305419921875, "learning_rate": 1.9868276342357414e-06, "loss": 33.7969, "step": 8470 }, { "epoch": 0.08018666994822087, "grad_norm": 199.37109375, "learning_rate": 1.986822674009416e-06, "loss": 19.9297, "step": 8471 }, { "epoch": 0.08019613596993591, "grad_norm": 311.1738586425781, "learning_rate": 1.9868177128555423e-06, "loss": 11.4922, "step": 8472 }, { "epoch": 0.08020560199165097, "grad_norm": 263.65753173828125, "learning_rate": 1.986812750774124e-06, "loss": 32.0547, "step": 8473 }, { "epoch": 0.08021506801336602, "grad_norm": 798.34716796875, "learning_rate": 1.9868077877651653e-06, "loss": 29.6641, "step": 8474 }, { "epoch": 0.08022453403508108, "grad_norm": 298.0019836425781, "learning_rate": 1.9868028238286718e-06, "loss": 30.5938, "step": 8475 }, { "epoch": 0.08023400005679614, "grad_norm": 213.15919494628906, "learning_rate": 1.986797858964648e-06, "loss": 22.4531, "step": 8476 }, { "epoch": 0.08024346607851118, "grad_norm": 320.39013671875, "learning_rate": 1.986792893173098e-06, "loss": 27.625, "step": 8477 }, { "epoch": 0.08025293210022624, "grad_norm": 307.4263610839844, "learning_rate": 1.9867879264540272e-06, "loss": 19.1016, "step": 8478 }, { "epoch": 0.08026239812194129, "grad_norm": 289.7598571777344, "learning_rate": 1.98678295880744e-06, "loss": 20.9531, "step": 8479 }, { "epoch": 0.08027186414365635, "grad_norm": 501.16448974609375, "learning_rate": 1.986777990233341e-06, "loss": 29.6875, "step": 8480 }, { "epoch": 0.0802813301653714, "grad_norm": 232.2662353515625, "learning_rate": 1.986773020731735e-06, "loss": 19.3594, "step": 8481 }, { "epoch": 0.08029079618708646, "grad_norm": 388.5505065917969, "learning_rate": 1.986768050302626e-06, "loss": 33.8438, "step": 8482 }, { "epoch": 0.0803002622088015, "grad_norm": 227.3174285888672, "learning_rate": 1.9867630789460196e-06, "loss": 27.25, "step": 8483 }, { "epoch": 0.08030972823051656, "grad_norm": 268.02130126953125, "learning_rate": 1.9867581066619203e-06, "loss": 20.3906, "step": 8484 }, { "epoch": 0.08031919425223162, "grad_norm": 516.3768310546875, "learning_rate": 1.986753133450332e-06, "loss": 43.5156, "step": 8485 }, { "epoch": 0.08032866027394667, "grad_norm": 632.3856811523438, "learning_rate": 1.9867481593112603e-06, "loss": 48.2578, "step": 8486 }, { "epoch": 0.08033812629566173, "grad_norm": 397.4874572753906, "learning_rate": 1.98674318424471e-06, "loss": 26.7656, "step": 8487 }, { "epoch": 0.08034759231737677, "grad_norm": 360.2332763671875, "learning_rate": 1.9867382082506846e-06, "loss": 21.8516, "step": 8488 }, { "epoch": 0.08035705833909183, "grad_norm": 456.57318115234375, "learning_rate": 1.9867332313291895e-06, "loss": 54.0469, "step": 8489 }, { "epoch": 0.08036652436080688, "grad_norm": 397.6147155761719, "learning_rate": 1.98672825348023e-06, "loss": 23.9023, "step": 8490 }, { "epoch": 0.08037599038252194, "grad_norm": 414.3427734375, "learning_rate": 1.9867232747038093e-06, "loss": 31.6172, "step": 8491 }, { "epoch": 0.08038545640423699, "grad_norm": 267.4054870605469, "learning_rate": 1.9867182949999335e-06, "loss": 27.9062, "step": 8492 }, { "epoch": 0.08039492242595205, "grad_norm": 204.1593475341797, "learning_rate": 1.986713314368606e-06, "loss": 13.5547, "step": 8493 }, { "epoch": 0.0804043884476671, "grad_norm": 215.73699951171875, "learning_rate": 1.986708332809833e-06, "loss": 20.0156, "step": 8494 }, { "epoch": 0.08041385446938215, "grad_norm": 262.4432678222656, "learning_rate": 1.986703350323618e-06, "loss": 20.4844, "step": 8495 }, { "epoch": 0.08042332049109721, "grad_norm": 497.19744873046875, "learning_rate": 1.986698366909966e-06, "loss": 45.6484, "step": 8496 }, { "epoch": 0.08043278651281226, "grad_norm": 437.8172607421875, "learning_rate": 1.9866933825688816e-06, "loss": 29.3828, "step": 8497 }, { "epoch": 0.08044225253452732, "grad_norm": 3.3869142532348633, "learning_rate": 1.98668839730037e-06, "loss": 0.9375, "step": 8498 }, { "epoch": 0.08045171855624236, "grad_norm": 710.2008056640625, "learning_rate": 1.9866834111044354e-06, "loss": 27.1641, "step": 8499 }, { "epoch": 0.08046118457795742, "grad_norm": 198.98416137695312, "learning_rate": 1.9866784239810824e-06, "loss": 19.8828, "step": 8500 }, { "epoch": 0.08047065059967247, "grad_norm": 398.0941162109375, "learning_rate": 1.986673435930316e-06, "loss": 23.3281, "step": 8501 }, { "epoch": 0.08048011662138753, "grad_norm": 803.37451171875, "learning_rate": 1.9866684469521405e-06, "loss": 64.5469, "step": 8502 }, { "epoch": 0.08048958264310259, "grad_norm": 210.36546325683594, "learning_rate": 1.986663457046561e-06, "loss": 10.7734, "step": 8503 }, { "epoch": 0.08049904866481764, "grad_norm": 344.4302673339844, "learning_rate": 1.986658466213582e-06, "loss": 22.6094, "step": 8504 }, { "epoch": 0.0805085146865327, "grad_norm": 255.50486755371094, "learning_rate": 1.9866534744532083e-06, "loss": 23.9766, "step": 8505 }, { "epoch": 0.08051798070824774, "grad_norm": 309.3825378417969, "learning_rate": 1.9866484817654444e-06, "loss": 27.4062, "step": 8506 }, { "epoch": 0.0805274467299628, "grad_norm": 474.3784484863281, "learning_rate": 1.9866434881502958e-06, "loss": 45.4844, "step": 8507 }, { "epoch": 0.08053691275167785, "grad_norm": 582.953125, "learning_rate": 1.9866384936077656e-06, "loss": 40.0234, "step": 8508 }, { "epoch": 0.08054637877339291, "grad_norm": 562.1674194335938, "learning_rate": 1.9866334981378596e-06, "loss": 36.5234, "step": 8509 }, { "epoch": 0.08055584479510795, "grad_norm": 635.6570434570312, "learning_rate": 1.9866285017405826e-06, "loss": 53.3281, "step": 8510 }, { "epoch": 0.08056531081682301, "grad_norm": 347.3316345214844, "learning_rate": 1.986623504415939e-06, "loss": 25.1797, "step": 8511 }, { "epoch": 0.08057477683853807, "grad_norm": 3.4086503982543945, "learning_rate": 1.9866185061639332e-06, "loss": 1.0352, "step": 8512 }, { "epoch": 0.08058424286025312, "grad_norm": 552.4366455078125, "learning_rate": 1.9866135069845706e-06, "loss": 37.7344, "step": 8513 }, { "epoch": 0.08059370888196818, "grad_norm": 924.2072143554688, "learning_rate": 1.9866085068778547e-06, "loss": 24.5625, "step": 8514 }, { "epoch": 0.08060317490368323, "grad_norm": 212.4166717529297, "learning_rate": 1.986603505843792e-06, "loss": 13.5078, "step": 8515 }, { "epoch": 0.08061264092539829, "grad_norm": 331.066650390625, "learning_rate": 1.9865985038823855e-06, "loss": 38.0156, "step": 8516 }, { "epoch": 0.08062210694711333, "grad_norm": 659.16943359375, "learning_rate": 1.9865935009936407e-06, "loss": 82.6641, "step": 8517 }, { "epoch": 0.08063157296882839, "grad_norm": 472.3448181152344, "learning_rate": 1.986588497177562e-06, "loss": 50.9844, "step": 8518 }, { "epoch": 0.08064103899054345, "grad_norm": 528.5516967773438, "learning_rate": 1.986583492434155e-06, "loss": 42.5312, "step": 8519 }, { "epoch": 0.0806505050122585, "grad_norm": 496.7639465332031, "learning_rate": 1.986578486763423e-06, "loss": 52.6875, "step": 8520 }, { "epoch": 0.08065997103397356, "grad_norm": 300.6073913574219, "learning_rate": 1.9865734801653714e-06, "loss": 22.6602, "step": 8521 }, { "epoch": 0.0806694370556886, "grad_norm": 583.0403442382812, "learning_rate": 1.9865684726400055e-06, "loss": 25.082, "step": 8522 }, { "epoch": 0.08067890307740366, "grad_norm": 300.95819091796875, "learning_rate": 1.9865634641873287e-06, "loss": 26.0547, "step": 8523 }, { "epoch": 0.08068836909911871, "grad_norm": 574.1019897460938, "learning_rate": 1.986558454807347e-06, "loss": 35.4062, "step": 8524 }, { "epoch": 0.08069783512083377, "grad_norm": 249.85816955566406, "learning_rate": 1.9865534445000644e-06, "loss": 19.1523, "step": 8525 }, { "epoch": 0.08070730114254882, "grad_norm": 200.61672973632812, "learning_rate": 1.9865484332654857e-06, "loss": 18.1875, "step": 8526 }, { "epoch": 0.08071676716426388, "grad_norm": 245.45443725585938, "learning_rate": 1.9865434211036157e-06, "loss": 22.6641, "step": 8527 }, { "epoch": 0.08072623318597894, "grad_norm": 277.1518859863281, "learning_rate": 1.986538408014459e-06, "loss": 19.3828, "step": 8528 }, { "epoch": 0.08073569920769398, "grad_norm": 334.88250732421875, "learning_rate": 1.98653339399802e-06, "loss": 23.2188, "step": 8529 }, { "epoch": 0.08074516522940904, "grad_norm": 929.8727416992188, "learning_rate": 1.9865283790543042e-06, "loss": 69.25, "step": 8530 }, { "epoch": 0.08075463125112409, "grad_norm": 353.0323181152344, "learning_rate": 1.986523363183316e-06, "loss": 19.9688, "step": 8531 }, { "epoch": 0.08076409727283915, "grad_norm": 282.87738037109375, "learning_rate": 1.9865183463850597e-06, "loss": 22.1953, "step": 8532 }, { "epoch": 0.0807735632945542, "grad_norm": 440.9579772949219, "learning_rate": 1.9865133286595405e-06, "loss": 44.4688, "step": 8533 }, { "epoch": 0.08078302931626925, "grad_norm": 700.0255126953125, "learning_rate": 1.986508310006763e-06, "loss": 50.1875, "step": 8534 }, { "epoch": 0.0807924953379843, "grad_norm": 268.7870788574219, "learning_rate": 1.9865032904267316e-06, "loss": 11.332, "step": 8535 }, { "epoch": 0.08080196135969936, "grad_norm": 435.78216552734375, "learning_rate": 1.9864982699194516e-06, "loss": 28.25, "step": 8536 }, { "epoch": 0.08081142738141442, "grad_norm": 353.9810485839844, "learning_rate": 1.9864932484849277e-06, "loss": 42.6875, "step": 8537 }, { "epoch": 0.08082089340312947, "grad_norm": 197.31436157226562, "learning_rate": 1.9864882261231635e-06, "loss": 20.1484, "step": 8538 }, { "epoch": 0.08083035942484453, "grad_norm": 771.5092163085938, "learning_rate": 1.986483202834165e-06, "loss": 35.2031, "step": 8539 }, { "epoch": 0.08083982544655957, "grad_norm": 543.054443359375, "learning_rate": 1.9864781786179366e-06, "loss": 30.3594, "step": 8540 }, { "epoch": 0.08084929146827463, "grad_norm": 402.3676452636719, "learning_rate": 1.9864731534744827e-06, "loss": 17.6523, "step": 8541 }, { "epoch": 0.08085875748998968, "grad_norm": 301.1455078125, "learning_rate": 1.986468127403808e-06, "loss": 20.5195, "step": 8542 }, { "epoch": 0.08086822351170474, "grad_norm": 955.1190795898438, "learning_rate": 1.9864631004059176e-06, "loss": 47.9531, "step": 8543 }, { "epoch": 0.08087768953341978, "grad_norm": 421.8614501953125, "learning_rate": 1.9864580724808165e-06, "loss": 16.9766, "step": 8544 }, { "epoch": 0.08088715555513484, "grad_norm": 707.968017578125, "learning_rate": 1.9864530436285086e-06, "loss": 37.4531, "step": 8545 }, { "epoch": 0.0808966215768499, "grad_norm": 2.808866262435913, "learning_rate": 1.986448013848999e-06, "loss": 0.8706, "step": 8546 }, { "epoch": 0.08090608759856495, "grad_norm": 187.32730102539062, "learning_rate": 1.986442983142292e-06, "loss": 22.2188, "step": 8547 }, { "epoch": 0.08091555362028001, "grad_norm": 541.3775024414062, "learning_rate": 1.9864379515083937e-06, "loss": 30.3359, "step": 8548 }, { "epoch": 0.08092501964199506, "grad_norm": 347.1914978027344, "learning_rate": 1.986432918947307e-06, "loss": 28.8281, "step": 8549 }, { "epoch": 0.08093448566371012, "grad_norm": 306.2086181640625, "learning_rate": 1.9864278854590385e-06, "loss": 24.7422, "step": 8550 }, { "epoch": 0.08094395168542516, "grad_norm": 379.7132568359375, "learning_rate": 1.986422851043591e-06, "loss": 34.5, "step": 8551 }, { "epoch": 0.08095341770714022, "grad_norm": 534.699462890625, "learning_rate": 1.9864178157009707e-06, "loss": 38.4219, "step": 8552 }, { "epoch": 0.08096288372885527, "grad_norm": 257.14288330078125, "learning_rate": 1.9864127794311817e-06, "loss": 19.6641, "step": 8553 }, { "epoch": 0.08097234975057033, "grad_norm": 503.1828918457031, "learning_rate": 1.986407742234229e-06, "loss": 26.3281, "step": 8554 }, { "epoch": 0.08098181577228539, "grad_norm": 551.9478149414062, "learning_rate": 1.986402704110117e-06, "loss": 56.6562, "step": 8555 }, { "epoch": 0.08099128179400043, "grad_norm": 372.1324462890625, "learning_rate": 1.9863976650588506e-06, "loss": 22.9766, "step": 8556 }, { "epoch": 0.0810007478157155, "grad_norm": 782.4595336914062, "learning_rate": 1.9863926250804347e-06, "loss": 29.1641, "step": 8557 }, { "epoch": 0.08101021383743054, "grad_norm": 438.630615234375, "learning_rate": 1.9863875841748738e-06, "loss": 25.2578, "step": 8558 }, { "epoch": 0.0810196798591456, "grad_norm": 421.8316345214844, "learning_rate": 1.986382542342173e-06, "loss": 44.25, "step": 8559 }, { "epoch": 0.08102914588086065, "grad_norm": 730.1160278320312, "learning_rate": 1.9863774995823364e-06, "loss": 67.4375, "step": 8560 }, { "epoch": 0.0810386119025757, "grad_norm": 629.3380737304688, "learning_rate": 1.9863724558953696e-06, "loss": 33.9844, "step": 8561 }, { "epoch": 0.08104807792429077, "grad_norm": 374.15399169921875, "learning_rate": 1.9863674112812766e-06, "loss": 17.3594, "step": 8562 }, { "epoch": 0.08105754394600581, "grad_norm": 239.8258056640625, "learning_rate": 1.9863623657400623e-06, "loss": 20.4922, "step": 8563 }, { "epoch": 0.08106700996772087, "grad_norm": 427.66839599609375, "learning_rate": 1.9863573192717316e-06, "loss": 20.6953, "step": 8564 }, { "epoch": 0.08107647598943592, "grad_norm": 769.4589233398438, "learning_rate": 1.986352271876289e-06, "loss": 47.4219, "step": 8565 }, { "epoch": 0.08108594201115098, "grad_norm": 469.4601745605469, "learning_rate": 1.9863472235537396e-06, "loss": 38.5781, "step": 8566 }, { "epoch": 0.08109540803286602, "grad_norm": 257.622802734375, "learning_rate": 1.986342174304088e-06, "loss": 19.6328, "step": 8567 }, { "epoch": 0.08110487405458108, "grad_norm": 480.2428894042969, "learning_rate": 1.986337124127339e-06, "loss": 34.625, "step": 8568 }, { "epoch": 0.08111434007629613, "grad_norm": 1131.1986083984375, "learning_rate": 1.9863320730234973e-06, "loss": 50.9375, "step": 8569 }, { "epoch": 0.08112380609801119, "grad_norm": 402.59967041015625, "learning_rate": 1.9863270209925673e-06, "loss": 35.2109, "step": 8570 }, { "epoch": 0.08113327211972625, "grad_norm": 308.095703125, "learning_rate": 1.9863219680345543e-06, "loss": 21.9609, "step": 8571 }, { "epoch": 0.0811427381414413, "grad_norm": 385.0146179199219, "learning_rate": 1.986316914149463e-06, "loss": 32.4688, "step": 8572 }, { "epoch": 0.08115220416315636, "grad_norm": 845.9793701171875, "learning_rate": 1.9863118593372977e-06, "loss": 44.9688, "step": 8573 }, { "epoch": 0.0811616701848714, "grad_norm": 289.05633544921875, "learning_rate": 1.9863068035980634e-06, "loss": 23.7422, "step": 8574 }, { "epoch": 0.08117113620658646, "grad_norm": 823.8607788085938, "learning_rate": 1.9863017469317647e-06, "loss": 16.5703, "step": 8575 }, { "epoch": 0.08118060222830151, "grad_norm": 338.7046813964844, "learning_rate": 1.986296689338407e-06, "loss": 21.8281, "step": 8576 }, { "epoch": 0.08119006825001657, "grad_norm": 409.7878112792969, "learning_rate": 1.9862916308179944e-06, "loss": 39.7656, "step": 8577 }, { "epoch": 0.08119953427173161, "grad_norm": 719.274169921875, "learning_rate": 1.9862865713705316e-06, "loss": 45.125, "step": 8578 }, { "epoch": 0.08120900029344667, "grad_norm": 945.4529418945312, "learning_rate": 1.986281510996024e-06, "loss": 34.5, "step": 8579 }, { "epoch": 0.08121846631516173, "grad_norm": 248.1065673828125, "learning_rate": 1.9862764496944755e-06, "loss": 18.2188, "step": 8580 }, { "epoch": 0.08122793233687678, "grad_norm": 892.1857299804688, "learning_rate": 1.9862713874658914e-06, "loss": 41.0781, "step": 8581 }, { "epoch": 0.08123739835859184, "grad_norm": 700.2037963867188, "learning_rate": 1.9862663243102764e-06, "loss": 24.7031, "step": 8582 }, { "epoch": 0.08124686438030689, "grad_norm": 197.10028076171875, "learning_rate": 1.9862612602276355e-06, "loss": 18.3516, "step": 8583 }, { "epoch": 0.08125633040202195, "grad_norm": 565.071044921875, "learning_rate": 1.986256195217973e-06, "loss": 50.4062, "step": 8584 }, { "epoch": 0.08126579642373699, "grad_norm": 482.6943054199219, "learning_rate": 1.9862511292812937e-06, "loss": 48.6562, "step": 8585 }, { "epoch": 0.08127526244545205, "grad_norm": 374.1911315917969, "learning_rate": 1.9862460624176026e-06, "loss": 29.2188, "step": 8586 }, { "epoch": 0.0812847284671671, "grad_norm": 323.5276184082031, "learning_rate": 1.9862409946269043e-06, "loss": 20.7812, "step": 8587 }, { "epoch": 0.08129419448888216, "grad_norm": 363.32513427734375, "learning_rate": 1.986235925909204e-06, "loss": 38.8125, "step": 8588 }, { "epoch": 0.08130366051059722, "grad_norm": 264.4052429199219, "learning_rate": 1.9862308562645054e-06, "loss": 25.8125, "step": 8589 }, { "epoch": 0.08131312653231226, "grad_norm": 400.31927490234375, "learning_rate": 1.986225785692814e-06, "loss": 36.0, "step": 8590 }, { "epoch": 0.08132259255402732, "grad_norm": 271.249755859375, "learning_rate": 1.986220714194135e-06, "loss": 24.3125, "step": 8591 }, { "epoch": 0.08133205857574237, "grad_norm": 432.8563232421875, "learning_rate": 1.9862156417684724e-06, "loss": 31.1953, "step": 8592 }, { "epoch": 0.08134152459745743, "grad_norm": 529.2146606445312, "learning_rate": 1.9862105684158313e-06, "loss": 40.9844, "step": 8593 }, { "epoch": 0.08135099061917248, "grad_norm": 450.27862548828125, "learning_rate": 1.986205494136217e-06, "loss": 31.3672, "step": 8594 }, { "epoch": 0.08136045664088754, "grad_norm": 208.42037963867188, "learning_rate": 1.986200418929633e-06, "loss": 16.8516, "step": 8595 }, { "epoch": 0.08136992266260258, "grad_norm": 549.4236450195312, "learning_rate": 1.9861953427960847e-06, "loss": 56.8125, "step": 8596 }, { "epoch": 0.08137938868431764, "grad_norm": 697.4417114257812, "learning_rate": 1.9861902657355773e-06, "loss": 66.9141, "step": 8597 }, { "epoch": 0.0813888547060327, "grad_norm": 2.8923346996307373, "learning_rate": 1.986185187748115e-06, "loss": 0.8467, "step": 8598 }, { "epoch": 0.08139832072774775, "grad_norm": 525.2405395507812, "learning_rate": 1.9861801088337027e-06, "loss": 19.8984, "step": 8599 }, { "epoch": 0.08140778674946281, "grad_norm": 593.7041625976562, "learning_rate": 1.9861750289923455e-06, "loss": 51.9844, "step": 8600 }, { "epoch": 0.08141725277117785, "grad_norm": 176.997314453125, "learning_rate": 1.986169948224048e-06, "loss": 19.3359, "step": 8601 }, { "epoch": 0.08142671879289291, "grad_norm": 541.769775390625, "learning_rate": 1.9861648665288145e-06, "loss": 30.75, "step": 8602 }, { "epoch": 0.08143618481460796, "grad_norm": 555.3895874023438, "learning_rate": 1.9861597839066506e-06, "loss": 60.5781, "step": 8603 }, { "epoch": 0.08144565083632302, "grad_norm": 431.91522216796875, "learning_rate": 1.9861547003575603e-06, "loss": 37.4531, "step": 8604 }, { "epoch": 0.08145511685803808, "grad_norm": 564.4188842773438, "learning_rate": 1.986149615881549e-06, "loss": 35.3359, "step": 8605 }, { "epoch": 0.08146458287975313, "grad_norm": 436.56304931640625, "learning_rate": 1.9861445304786214e-06, "loss": 47.0781, "step": 8606 }, { "epoch": 0.08147404890146819, "grad_norm": 587.87158203125, "learning_rate": 1.9861394441487816e-06, "loss": 54.2891, "step": 8607 }, { "epoch": 0.08148351492318323, "grad_norm": 392.10302734375, "learning_rate": 1.9861343568920354e-06, "loss": 13.1953, "step": 8608 }, { "epoch": 0.08149298094489829, "grad_norm": 367.22003173828125, "learning_rate": 1.9861292687083866e-06, "loss": 11.7383, "step": 8609 }, { "epoch": 0.08150244696661334, "grad_norm": 482.1783447265625, "learning_rate": 1.98612417959784e-06, "loss": 45.1797, "step": 8610 }, { "epoch": 0.0815119129883284, "grad_norm": 294.2837829589844, "learning_rate": 1.9861190895604017e-06, "loss": 34.6719, "step": 8611 }, { "epoch": 0.08152137901004344, "grad_norm": 382.6028137207031, "learning_rate": 1.9861139985960754e-06, "loss": 29.7969, "step": 8612 }, { "epoch": 0.0815308450317585, "grad_norm": 528.97265625, "learning_rate": 1.986108906704866e-06, "loss": 42.1875, "step": 8613 }, { "epoch": 0.08154031105347356, "grad_norm": 596.6533813476562, "learning_rate": 1.9861038138867784e-06, "loss": 65.5938, "step": 8614 }, { "epoch": 0.08154977707518861, "grad_norm": 397.8375244140625, "learning_rate": 1.9860987201418174e-06, "loss": 63.0781, "step": 8615 }, { "epoch": 0.08155924309690367, "grad_norm": 283.2540588378906, "learning_rate": 1.9860936254699877e-06, "loss": 19.4219, "step": 8616 }, { "epoch": 0.08156870911861872, "grad_norm": 421.1691589355469, "learning_rate": 1.9860885298712942e-06, "loss": 51.2812, "step": 8617 }, { "epoch": 0.08157817514033378, "grad_norm": 684.3056640625, "learning_rate": 1.9860834333457418e-06, "loss": 15.6992, "step": 8618 }, { "epoch": 0.08158764116204882, "grad_norm": 564.8286743164062, "learning_rate": 1.9860783358933345e-06, "loss": 58.5625, "step": 8619 }, { "epoch": 0.08159710718376388, "grad_norm": 467.4150085449219, "learning_rate": 1.9860732375140784e-06, "loss": 38.0625, "step": 8620 }, { "epoch": 0.08160657320547893, "grad_norm": 553.298828125, "learning_rate": 1.9860681382079773e-06, "loss": 43.4844, "step": 8621 }, { "epoch": 0.08161603922719399, "grad_norm": 673.1853637695312, "learning_rate": 1.986063037975036e-06, "loss": 57.5625, "step": 8622 }, { "epoch": 0.08162550524890905, "grad_norm": 520.4962768554688, "learning_rate": 1.98605793681526e-06, "loss": 49.25, "step": 8623 }, { "epoch": 0.0816349712706241, "grad_norm": 317.63165283203125, "learning_rate": 1.9860528347286535e-06, "loss": 19.3594, "step": 8624 }, { "epoch": 0.08164443729233915, "grad_norm": 811.7960205078125, "learning_rate": 1.9860477317152217e-06, "loss": 28.7109, "step": 8625 }, { "epoch": 0.0816539033140542, "grad_norm": 592.7666625976562, "learning_rate": 1.986042627774969e-06, "loss": 36.5469, "step": 8626 }, { "epoch": 0.08166336933576926, "grad_norm": 589.6677856445312, "learning_rate": 1.9860375229079004e-06, "loss": 13.7734, "step": 8627 }, { "epoch": 0.0816728353574843, "grad_norm": 303.36669921875, "learning_rate": 1.9860324171140207e-06, "loss": 35.1562, "step": 8628 }, { "epoch": 0.08168230137919937, "grad_norm": 219.7354736328125, "learning_rate": 1.986027310393335e-06, "loss": 16.7734, "step": 8629 }, { "epoch": 0.08169176740091441, "grad_norm": 966.5437622070312, "learning_rate": 1.9860222027458472e-06, "loss": 44.1094, "step": 8630 }, { "epoch": 0.08170123342262947, "grad_norm": 215.709228515625, "learning_rate": 1.9860170941715632e-06, "loss": 18.0, "step": 8631 }, { "epoch": 0.08171069944434453, "grad_norm": 399.2390441894531, "learning_rate": 1.9860119846704867e-06, "loss": 23.5078, "step": 8632 }, { "epoch": 0.08172016546605958, "grad_norm": 168.9862518310547, "learning_rate": 1.9860068742426236e-06, "loss": 17.5859, "step": 8633 }, { "epoch": 0.08172963148777464, "grad_norm": 366.90325927734375, "learning_rate": 1.986001762887978e-06, "loss": 44.0156, "step": 8634 }, { "epoch": 0.08173909750948968, "grad_norm": 853.6576538085938, "learning_rate": 1.985996650606555e-06, "loss": 52.4375, "step": 8635 }, { "epoch": 0.08174856353120474, "grad_norm": 310.4335021972656, "learning_rate": 1.985991537398359e-06, "loss": 23.9375, "step": 8636 }, { "epoch": 0.08175802955291979, "grad_norm": 383.2647399902344, "learning_rate": 1.9859864232633956e-06, "loss": 27.4844, "step": 8637 }, { "epoch": 0.08176749557463485, "grad_norm": 246.62628173828125, "learning_rate": 1.9859813082016685e-06, "loss": 25.5312, "step": 8638 }, { "epoch": 0.0817769615963499, "grad_norm": 155.03610229492188, "learning_rate": 1.985976192213183e-06, "loss": 17.6406, "step": 8639 }, { "epoch": 0.08178642761806496, "grad_norm": 252.03302001953125, "learning_rate": 1.9859710752979446e-06, "loss": 26.3125, "step": 8640 }, { "epoch": 0.08179589363978002, "grad_norm": 520.2264404296875, "learning_rate": 1.985965957455957e-06, "loss": 63.4219, "step": 8641 }, { "epoch": 0.08180535966149506, "grad_norm": 226.48153686523438, "learning_rate": 1.9859608386872264e-06, "loss": 24.1406, "step": 8642 }, { "epoch": 0.08181482568321012, "grad_norm": 286.6160888671875, "learning_rate": 1.985955718991756e-06, "loss": 20.6484, "step": 8643 }, { "epoch": 0.08182429170492517, "grad_norm": 271.9776916503906, "learning_rate": 1.9859505983695514e-06, "loss": 21.6562, "step": 8644 }, { "epoch": 0.08183375772664023, "grad_norm": 243.08273315429688, "learning_rate": 1.9859454768206175e-06, "loss": 10.6992, "step": 8645 }, { "epoch": 0.08184322374835527, "grad_norm": 322.5129699707031, "learning_rate": 1.9859403543449592e-06, "loss": 33.2031, "step": 8646 }, { "epoch": 0.08185268977007033, "grad_norm": 450.21771240234375, "learning_rate": 1.985935230942581e-06, "loss": 26.4219, "step": 8647 }, { "epoch": 0.0818621557917854, "grad_norm": 653.526611328125, "learning_rate": 1.985930106613488e-06, "loss": 26.7969, "step": 8648 }, { "epoch": 0.08187162181350044, "grad_norm": 2.959918737411499, "learning_rate": 1.9859249813576844e-06, "loss": 0.9072, "step": 8649 }, { "epoch": 0.0818810878352155, "grad_norm": 173.8944549560547, "learning_rate": 1.9859198551751758e-06, "loss": 18.0938, "step": 8650 }, { "epoch": 0.08189055385693055, "grad_norm": 206.4402313232422, "learning_rate": 1.9859147280659665e-06, "loss": 23.5, "step": 8651 }, { "epoch": 0.0819000198786456, "grad_norm": 289.3849792480469, "learning_rate": 1.9859096000300616e-06, "loss": 21.375, "step": 8652 }, { "epoch": 0.08190948590036065, "grad_norm": 869.9110717773438, "learning_rate": 1.9859044710674655e-06, "loss": 71.2188, "step": 8653 }, { "epoch": 0.08191895192207571, "grad_norm": 365.70062255859375, "learning_rate": 1.9858993411781835e-06, "loss": 26.4688, "step": 8654 }, { "epoch": 0.08192841794379076, "grad_norm": 186.9750213623047, "learning_rate": 1.9858942103622204e-06, "loss": 22.2188, "step": 8655 }, { "epoch": 0.08193788396550582, "grad_norm": 755.2274780273438, "learning_rate": 1.985889078619581e-06, "loss": 21.9219, "step": 8656 }, { "epoch": 0.08194734998722088, "grad_norm": 277.4668273925781, "learning_rate": 1.9858839459502698e-06, "loss": 28.6562, "step": 8657 }, { "epoch": 0.08195681600893592, "grad_norm": 477.6401672363281, "learning_rate": 1.985878812354292e-06, "loss": 13.2344, "step": 8658 }, { "epoch": 0.08196628203065098, "grad_norm": 465.13360595703125, "learning_rate": 1.9858736778316517e-06, "loss": 20.1172, "step": 8659 }, { "epoch": 0.08197574805236603, "grad_norm": 260.86407470703125, "learning_rate": 1.985868542382355e-06, "loss": 9.3984, "step": 8660 }, { "epoch": 0.08198521407408109, "grad_norm": 822.0299072265625, "learning_rate": 1.9858634060064056e-06, "loss": 52.9688, "step": 8661 }, { "epoch": 0.08199468009579614, "grad_norm": 1221.094482421875, "learning_rate": 1.9858582687038087e-06, "loss": 30.7422, "step": 8662 }, { "epoch": 0.0820041461175112, "grad_norm": 278.4847717285156, "learning_rate": 1.985853130474569e-06, "loss": 24.3281, "step": 8663 }, { "epoch": 0.08201361213922624, "grad_norm": 351.7635192871094, "learning_rate": 1.985847991318692e-06, "loss": 14.4062, "step": 8664 }, { "epoch": 0.0820230781609413, "grad_norm": 453.0286560058594, "learning_rate": 1.985842851236182e-06, "loss": 50.375, "step": 8665 }, { "epoch": 0.08203254418265636, "grad_norm": 3.2739827632904053, "learning_rate": 1.9858377102270437e-06, "loss": 0.9756, "step": 8666 }, { "epoch": 0.08204201020437141, "grad_norm": 2.6491310596466064, "learning_rate": 1.985832568291282e-06, "loss": 0.7944, "step": 8667 }, { "epoch": 0.08205147622608647, "grad_norm": 399.88348388671875, "learning_rate": 1.985827425428902e-06, "loss": 35.3594, "step": 8668 }, { "epoch": 0.08206094224780151, "grad_norm": 414.0464172363281, "learning_rate": 1.985822281639908e-06, "loss": 37.7344, "step": 8669 }, { "epoch": 0.08207040826951657, "grad_norm": 390.7677001953125, "learning_rate": 1.9858171369243057e-06, "loss": 49.5312, "step": 8670 }, { "epoch": 0.08207987429123162, "grad_norm": 521.9815673828125, "learning_rate": 1.985811991282099e-06, "loss": 29.4766, "step": 8671 }, { "epoch": 0.08208934031294668, "grad_norm": 277.93011474609375, "learning_rate": 1.985806844713293e-06, "loss": 23.3438, "step": 8672 }, { "epoch": 0.08209880633466173, "grad_norm": 284.7210693359375, "learning_rate": 1.985801697217893e-06, "loss": 31.5859, "step": 8673 }, { "epoch": 0.08210827235637679, "grad_norm": 511.8755798339844, "learning_rate": 1.9857965487959034e-06, "loss": 23.9609, "step": 8674 }, { "epoch": 0.08211773837809185, "grad_norm": 325.8238220214844, "learning_rate": 1.9857913994473295e-06, "loss": 39.1406, "step": 8675 }, { "epoch": 0.08212720439980689, "grad_norm": 266.0287170410156, "learning_rate": 1.9857862491721756e-06, "loss": 19.3906, "step": 8676 }, { "epoch": 0.08213667042152195, "grad_norm": 450.7208251953125, "learning_rate": 1.9857810979704465e-06, "loss": 12.3164, "step": 8677 }, { "epoch": 0.082146136443237, "grad_norm": 194.34597778320312, "learning_rate": 1.9857759458421477e-06, "loss": 23.0625, "step": 8678 }, { "epoch": 0.08215560246495206, "grad_norm": 349.4842529296875, "learning_rate": 1.9857707927872833e-06, "loss": 36.1406, "step": 8679 }, { "epoch": 0.0821650684866671, "grad_norm": 469.8100280761719, "learning_rate": 1.985765638805859e-06, "loss": 12.0234, "step": 8680 }, { "epoch": 0.08217453450838216, "grad_norm": 287.5538330078125, "learning_rate": 1.9857604838978787e-06, "loss": 23.4141, "step": 8681 }, { "epoch": 0.08218400053009721, "grad_norm": 199.02735900878906, "learning_rate": 1.9857553280633477e-06, "loss": 20.8203, "step": 8682 }, { "epoch": 0.08219346655181227, "grad_norm": 472.733154296875, "learning_rate": 1.985750171302271e-06, "loss": 52.7578, "step": 8683 }, { "epoch": 0.08220293257352733, "grad_norm": 368.31414794921875, "learning_rate": 1.985745013614653e-06, "loss": 24.0312, "step": 8684 }, { "epoch": 0.08221239859524238, "grad_norm": 389.3880615234375, "learning_rate": 1.985739855000499e-06, "loss": 41.2969, "step": 8685 }, { "epoch": 0.08222186461695744, "grad_norm": 562.7379760742188, "learning_rate": 1.9857346954598136e-06, "loss": 54.8828, "step": 8686 }, { "epoch": 0.08223133063867248, "grad_norm": 244.9026641845703, "learning_rate": 1.9857295349926016e-06, "loss": 12.1133, "step": 8687 }, { "epoch": 0.08224079666038754, "grad_norm": 155.0160369873047, "learning_rate": 1.9857243735988685e-06, "loss": 18.3516, "step": 8688 }, { "epoch": 0.08225026268210259, "grad_norm": 531.6895141601562, "learning_rate": 1.985719211278618e-06, "loss": 45.8594, "step": 8689 }, { "epoch": 0.08225972870381765, "grad_norm": 386.4433288574219, "learning_rate": 1.985714048031856e-06, "loss": 23.5547, "step": 8690 }, { "epoch": 0.08226919472553271, "grad_norm": 166.97007751464844, "learning_rate": 1.9857088838585865e-06, "loss": 15.4531, "step": 8691 }, { "epoch": 0.08227866074724775, "grad_norm": 541.2481689453125, "learning_rate": 1.985703718758815e-06, "loss": 53.3438, "step": 8692 }, { "epoch": 0.08228812676896281, "grad_norm": 269.0350646972656, "learning_rate": 1.985698552732546e-06, "loss": 19.0547, "step": 8693 }, { "epoch": 0.08229759279067786, "grad_norm": 317.9525146484375, "learning_rate": 1.985693385779785e-06, "loss": 24.5078, "step": 8694 }, { "epoch": 0.08230705881239292, "grad_norm": 336.6326599121094, "learning_rate": 1.9856882179005356e-06, "loss": 26.0938, "step": 8695 }, { "epoch": 0.08231652483410797, "grad_norm": 298.5812683105469, "learning_rate": 1.985683049094804e-06, "loss": 23.9531, "step": 8696 }, { "epoch": 0.08232599085582303, "grad_norm": 1261.5892333984375, "learning_rate": 1.9856778793625943e-06, "loss": 34.9062, "step": 8697 }, { "epoch": 0.08233545687753807, "grad_norm": 941.9432983398438, "learning_rate": 1.985672708703911e-06, "loss": 42.8906, "step": 8698 }, { "epoch": 0.08234492289925313, "grad_norm": 650.5338134765625, "learning_rate": 1.9856675371187597e-06, "loss": 18.1094, "step": 8699 }, { "epoch": 0.08235438892096819, "grad_norm": 216.5482177734375, "learning_rate": 1.9856623646071453e-06, "loss": 23.3047, "step": 8700 }, { "epoch": 0.08236385494268324, "grad_norm": 482.5940246582031, "learning_rate": 1.9856571911690726e-06, "loss": 23.4219, "step": 8701 }, { "epoch": 0.0823733209643983, "grad_norm": 257.6396789550781, "learning_rate": 1.9856520168045457e-06, "loss": 25.8672, "step": 8702 }, { "epoch": 0.08238278698611334, "grad_norm": 335.8690185546875, "learning_rate": 1.9856468415135706e-06, "loss": 30.1641, "step": 8703 }, { "epoch": 0.0823922530078284, "grad_norm": 175.90028381347656, "learning_rate": 1.985641665296151e-06, "loss": 23.2461, "step": 8704 }, { "epoch": 0.08240171902954345, "grad_norm": 219.3831329345703, "learning_rate": 1.985636488152293e-06, "loss": 16.7891, "step": 8705 }, { "epoch": 0.08241118505125851, "grad_norm": 3.5476040840148926, "learning_rate": 1.9856313100820004e-06, "loss": 0.927, "step": 8706 }, { "epoch": 0.08242065107297356, "grad_norm": 279.5444030761719, "learning_rate": 1.9856261310852786e-06, "loss": 37.4844, "step": 8707 }, { "epoch": 0.08243011709468862, "grad_norm": 315.3930969238281, "learning_rate": 1.9856209511621322e-06, "loss": 16.2812, "step": 8708 }, { "epoch": 0.08243958311640368, "grad_norm": 453.61431884765625, "learning_rate": 1.985615770312566e-06, "loss": 52.8594, "step": 8709 }, { "epoch": 0.08244904913811872, "grad_norm": 598.2427978515625, "learning_rate": 1.985610588536586e-06, "loss": 42.4297, "step": 8710 }, { "epoch": 0.08245851515983378, "grad_norm": 730.796142578125, "learning_rate": 1.9856054058341954e-06, "loss": 38.0586, "step": 8711 }, { "epoch": 0.08246798118154883, "grad_norm": 565.1240234375, "learning_rate": 1.9856002222054e-06, "loss": 33.1172, "step": 8712 }, { "epoch": 0.08247744720326389, "grad_norm": 304.0792541503906, "learning_rate": 1.9855950376502047e-06, "loss": 29.9062, "step": 8713 }, { "epoch": 0.08248691322497893, "grad_norm": 167.6880340576172, "learning_rate": 1.985589852168614e-06, "loss": 16.5781, "step": 8714 }, { "epoch": 0.082496379246694, "grad_norm": 563.47216796875, "learning_rate": 1.985584665760633e-06, "loss": 31.0781, "step": 8715 }, { "epoch": 0.08250584526840904, "grad_norm": 3.488931655883789, "learning_rate": 1.9855794784262663e-06, "loss": 1.0991, "step": 8716 }, { "epoch": 0.0825153112901241, "grad_norm": 410.6516418457031, "learning_rate": 1.9855742901655194e-06, "loss": 49.7266, "step": 8717 }, { "epoch": 0.08252477731183916, "grad_norm": 304.539794921875, "learning_rate": 1.9855691009783966e-06, "loss": 18.7383, "step": 8718 }, { "epoch": 0.0825342433335542, "grad_norm": 240.3153839111328, "learning_rate": 1.985563910864903e-06, "loss": 22.375, "step": 8719 }, { "epoch": 0.08254370935526927, "grad_norm": 364.3797302246094, "learning_rate": 1.985558719825043e-06, "loss": 19.8125, "step": 8720 }, { "epoch": 0.08255317537698431, "grad_norm": 422.76177978515625, "learning_rate": 1.9855535278588226e-06, "loss": 40.3594, "step": 8721 }, { "epoch": 0.08256264139869937, "grad_norm": 254.8544921875, "learning_rate": 1.9855483349662457e-06, "loss": 18.9375, "step": 8722 }, { "epoch": 0.08257210742041442, "grad_norm": 514.790283203125, "learning_rate": 1.9855431411473177e-06, "loss": 32.6562, "step": 8723 }, { "epoch": 0.08258157344212948, "grad_norm": 259.16943359375, "learning_rate": 1.9855379464020426e-06, "loss": 27.9922, "step": 8724 }, { "epoch": 0.08259103946384452, "grad_norm": 739.2986450195312, "learning_rate": 1.985532750730427e-06, "loss": 31.6016, "step": 8725 }, { "epoch": 0.08260050548555958, "grad_norm": 316.5402526855469, "learning_rate": 1.985527554132474e-06, "loss": 30.0781, "step": 8726 }, { "epoch": 0.08260997150727464, "grad_norm": 1177.6724853515625, "learning_rate": 1.9855223566081892e-06, "loss": 55.9062, "step": 8727 }, { "epoch": 0.08261943752898969, "grad_norm": 723.8480834960938, "learning_rate": 1.9855171581575774e-06, "loss": 51.8281, "step": 8728 }, { "epoch": 0.08262890355070475, "grad_norm": 438.4127197265625, "learning_rate": 1.985511958780644e-06, "loss": 37.3906, "step": 8729 }, { "epoch": 0.0826383695724198, "grad_norm": 460.58709716796875, "learning_rate": 1.9855067584773933e-06, "loss": 14.0859, "step": 8730 }, { "epoch": 0.08264783559413486, "grad_norm": 333.8709716796875, "learning_rate": 1.9855015572478303e-06, "loss": 16.8438, "step": 8731 }, { "epoch": 0.0826573016158499, "grad_norm": 293.8708801269531, "learning_rate": 1.98549635509196e-06, "loss": 31.4492, "step": 8732 }, { "epoch": 0.08266676763756496, "grad_norm": 454.6141662597656, "learning_rate": 1.9854911520097874e-06, "loss": 26.2031, "step": 8733 }, { "epoch": 0.08267623365928002, "grad_norm": 378.40753173828125, "learning_rate": 1.9854859480013173e-06, "loss": 28.9219, "step": 8734 }, { "epoch": 0.08268569968099507, "grad_norm": 620.6176147460938, "learning_rate": 1.985480743066554e-06, "loss": 42.7852, "step": 8735 }, { "epoch": 0.08269516570271013, "grad_norm": 226.80517578125, "learning_rate": 1.9854755372055033e-06, "loss": 21.2109, "step": 8736 }, { "epoch": 0.08270463172442517, "grad_norm": 231.06625366210938, "learning_rate": 1.9854703304181696e-06, "loss": 9.6914, "step": 8737 }, { "epoch": 0.08271409774614023, "grad_norm": 242.5263671875, "learning_rate": 1.985465122704558e-06, "loss": 23.1562, "step": 8738 }, { "epoch": 0.08272356376785528, "grad_norm": 228.9398956298828, "learning_rate": 1.985459914064673e-06, "loss": 33.3125, "step": 8739 }, { "epoch": 0.08273302978957034, "grad_norm": 3.227261543273926, "learning_rate": 1.98545470449852e-06, "loss": 0.9126, "step": 8740 }, { "epoch": 0.08274249581128539, "grad_norm": 499.432861328125, "learning_rate": 1.9854494940061036e-06, "loss": 46.9531, "step": 8741 }, { "epoch": 0.08275196183300045, "grad_norm": 389.5716552734375, "learning_rate": 1.9854442825874288e-06, "loss": 41.25, "step": 8742 }, { "epoch": 0.0827614278547155, "grad_norm": 409.2669372558594, "learning_rate": 1.9854390702425006e-06, "loss": 33.3984, "step": 8743 }, { "epoch": 0.08277089387643055, "grad_norm": 229.82664489746094, "learning_rate": 1.9854338569713237e-06, "loss": 16.5469, "step": 8744 }, { "epoch": 0.08278035989814561, "grad_norm": 403.3621520996094, "learning_rate": 1.985428642773903e-06, "loss": 22.7344, "step": 8745 }, { "epoch": 0.08278982591986066, "grad_norm": 266.61468505859375, "learning_rate": 1.9854234276502435e-06, "loss": 25.4922, "step": 8746 }, { "epoch": 0.08279929194157572, "grad_norm": 233.8787841796875, "learning_rate": 1.98541821160035e-06, "loss": 9.7852, "step": 8747 }, { "epoch": 0.08280875796329076, "grad_norm": 260.4866027832031, "learning_rate": 1.985412994624228e-06, "loss": 20.6406, "step": 8748 }, { "epoch": 0.08281822398500582, "grad_norm": 237.79815673828125, "learning_rate": 1.9854077767218816e-06, "loss": 18.9297, "step": 8749 }, { "epoch": 0.08282769000672087, "grad_norm": 578.6331176757812, "learning_rate": 1.9854025578933156e-06, "loss": 24.0312, "step": 8750 }, { "epoch": 0.08283715602843593, "grad_norm": 426.75726318359375, "learning_rate": 1.985397338138536e-06, "loss": 27.7734, "step": 8751 }, { "epoch": 0.08284662205015099, "grad_norm": 245.75962829589844, "learning_rate": 1.9853921174575466e-06, "loss": 27.3125, "step": 8752 }, { "epoch": 0.08285608807186604, "grad_norm": 271.35333251953125, "learning_rate": 1.9853868958503526e-06, "loss": 21.1172, "step": 8753 }, { "epoch": 0.0828655540935811, "grad_norm": 521.1118774414062, "learning_rate": 1.9853816733169593e-06, "loss": 13.1523, "step": 8754 }, { "epoch": 0.08287502011529614, "grad_norm": 227.997802734375, "learning_rate": 1.9853764498573715e-06, "loss": 21.8516, "step": 8755 }, { "epoch": 0.0828844861370112, "grad_norm": 316.5507507324219, "learning_rate": 1.9853712254715934e-06, "loss": 17.8516, "step": 8756 }, { "epoch": 0.08289395215872625, "grad_norm": 448.6460876464844, "learning_rate": 1.9853660001596306e-06, "loss": 27.1406, "step": 8757 }, { "epoch": 0.08290341818044131, "grad_norm": 410.67022705078125, "learning_rate": 1.985360773921488e-06, "loss": 25.0938, "step": 8758 }, { "epoch": 0.08291288420215635, "grad_norm": 334.8908996582031, "learning_rate": 1.9853555467571703e-06, "loss": 23.0156, "step": 8759 }, { "epoch": 0.08292235022387141, "grad_norm": 370.65576171875, "learning_rate": 1.9853503186666823e-06, "loss": 20.1562, "step": 8760 }, { "epoch": 0.08293181624558647, "grad_norm": 476.39459228515625, "learning_rate": 1.98534508965003e-06, "loss": 26.0273, "step": 8761 }, { "epoch": 0.08294128226730152, "grad_norm": 237.46949768066406, "learning_rate": 1.9853398597072163e-06, "loss": 21.1641, "step": 8762 }, { "epoch": 0.08295074828901658, "grad_norm": 564.3192138671875, "learning_rate": 1.9853346288382476e-06, "loss": 47.3125, "step": 8763 }, { "epoch": 0.08296021431073163, "grad_norm": 363.9349670410156, "learning_rate": 1.9853293970431285e-06, "loss": 39.9844, "step": 8764 }, { "epoch": 0.08296968033244669, "grad_norm": 230.90232849121094, "learning_rate": 1.985324164321864e-06, "loss": 21.0156, "step": 8765 }, { "epoch": 0.08297914635416173, "grad_norm": 182.3005828857422, "learning_rate": 1.9853189306744586e-06, "loss": 23.6484, "step": 8766 }, { "epoch": 0.08298861237587679, "grad_norm": 279.4573669433594, "learning_rate": 1.9853136961009176e-06, "loss": 19.8359, "step": 8767 }, { "epoch": 0.08299807839759184, "grad_norm": 465.7752990722656, "learning_rate": 1.985308460601246e-06, "loss": 38.2188, "step": 8768 }, { "epoch": 0.0830075444193069, "grad_norm": 309.4772033691406, "learning_rate": 1.985303224175448e-06, "loss": 39.3125, "step": 8769 }, { "epoch": 0.08301701044102196, "grad_norm": 524.9190673828125, "learning_rate": 1.98529798682353e-06, "loss": 40.9961, "step": 8770 }, { "epoch": 0.083026476462737, "grad_norm": 1024.4036865234375, "learning_rate": 1.9852927485454954e-06, "loss": 55.2969, "step": 8771 }, { "epoch": 0.08303594248445206, "grad_norm": 451.4525451660156, "learning_rate": 1.98528750934135e-06, "loss": 21.7422, "step": 8772 }, { "epoch": 0.08304540850616711, "grad_norm": 361.4324951171875, "learning_rate": 1.985282269211098e-06, "loss": 41.9531, "step": 8773 }, { "epoch": 0.08305487452788217, "grad_norm": 773.0471801757812, "learning_rate": 1.9852770281547453e-06, "loss": 43.1172, "step": 8774 }, { "epoch": 0.08306434054959722, "grad_norm": 297.3982238769531, "learning_rate": 1.985271786172296e-06, "loss": 22.0, "step": 8775 }, { "epoch": 0.08307380657131228, "grad_norm": 416.8920593261719, "learning_rate": 1.9852665432637556e-06, "loss": 31.6406, "step": 8776 }, { "epoch": 0.08308327259302734, "grad_norm": 447.1200866699219, "learning_rate": 1.985261299429128e-06, "loss": 48.1094, "step": 8777 }, { "epoch": 0.08309273861474238, "grad_norm": 3.161116123199463, "learning_rate": 1.9852560546684194e-06, "loss": 0.8359, "step": 8778 }, { "epoch": 0.08310220463645744, "grad_norm": 232.520263671875, "learning_rate": 1.9852508089816343e-06, "loss": 22.75, "step": 8779 }, { "epoch": 0.08311167065817249, "grad_norm": 3.281550168991089, "learning_rate": 1.9852455623687774e-06, "loss": 0.9539, "step": 8780 }, { "epoch": 0.08312113667988755, "grad_norm": 203.04812622070312, "learning_rate": 1.985240314829854e-06, "loss": 21.9531, "step": 8781 }, { "epoch": 0.0831306027016026, "grad_norm": 915.4973754882812, "learning_rate": 1.9852350663648683e-06, "loss": 76.125, "step": 8782 }, { "epoch": 0.08314006872331765, "grad_norm": 305.6681823730469, "learning_rate": 1.985229816973826e-06, "loss": 21.0352, "step": 8783 }, { "epoch": 0.0831495347450327, "grad_norm": 316.6472473144531, "learning_rate": 1.9852245666567318e-06, "loss": 52.8281, "step": 8784 }, { "epoch": 0.08315900076674776, "grad_norm": 795.4052734375, "learning_rate": 1.9852193154135905e-06, "loss": 49.6719, "step": 8785 }, { "epoch": 0.08316846678846282, "grad_norm": 384.4526672363281, "learning_rate": 1.985214063244407e-06, "loss": 37.9219, "step": 8786 }, { "epoch": 0.08317793281017787, "grad_norm": 414.32354736328125, "learning_rate": 1.9852088101491867e-06, "loss": 33.4844, "step": 8787 }, { "epoch": 0.08318739883189293, "grad_norm": 259.5562438964844, "learning_rate": 1.985203556127934e-06, "loss": 23.625, "step": 8788 }, { "epoch": 0.08319686485360797, "grad_norm": 299.328369140625, "learning_rate": 1.985198301180654e-06, "loss": 30.8594, "step": 8789 }, { "epoch": 0.08320633087532303, "grad_norm": 531.0048828125, "learning_rate": 1.9851930453073516e-06, "loss": 14.3906, "step": 8790 }, { "epoch": 0.08321579689703808, "grad_norm": 419.4172058105469, "learning_rate": 1.9851877885080324e-06, "loss": 14.5703, "step": 8791 }, { "epoch": 0.08322526291875314, "grad_norm": 520.8797607421875, "learning_rate": 1.9851825307827004e-06, "loss": 29.4844, "step": 8792 }, { "epoch": 0.08323472894046818, "grad_norm": 324.93402099609375, "learning_rate": 1.985177272131361e-06, "loss": 12.7227, "step": 8793 }, { "epoch": 0.08324419496218324, "grad_norm": 689.3154296875, "learning_rate": 1.985172012554019e-06, "loss": 75.6016, "step": 8794 }, { "epoch": 0.0832536609838983, "grad_norm": 3.1198084354400635, "learning_rate": 1.985166752050679e-06, "loss": 0.9307, "step": 8795 }, { "epoch": 0.08326312700561335, "grad_norm": 355.2467346191406, "learning_rate": 1.985161490621347e-06, "loss": 21.6719, "step": 8796 }, { "epoch": 0.08327259302732841, "grad_norm": 242.8581085205078, "learning_rate": 1.985156228266027e-06, "loss": 28.1875, "step": 8797 }, { "epoch": 0.08328205904904346, "grad_norm": 197.9665985107422, "learning_rate": 1.985150964984724e-06, "loss": 19.6797, "step": 8798 }, { "epoch": 0.08329152507075852, "grad_norm": 229.36068725585938, "learning_rate": 1.9851457007774437e-06, "loss": 18.9609, "step": 8799 }, { "epoch": 0.08330099109247356, "grad_norm": 329.86358642578125, "learning_rate": 1.9851404356441903e-06, "loss": 51.7656, "step": 8800 }, { "epoch": 0.08331045711418862, "grad_norm": 923.4099731445312, "learning_rate": 1.985135169584969e-06, "loss": 32.7656, "step": 8801 }, { "epoch": 0.08331992313590367, "grad_norm": 244.1951904296875, "learning_rate": 1.985129902599784e-06, "loss": 22.5625, "step": 8802 }, { "epoch": 0.08332938915761873, "grad_norm": 299.083251953125, "learning_rate": 1.985124634688642e-06, "loss": 21.0312, "step": 8803 }, { "epoch": 0.08333885517933379, "grad_norm": 723.6370849609375, "learning_rate": 1.9851193658515465e-06, "loss": 39.5312, "step": 8804 }, { "epoch": 0.08334832120104883, "grad_norm": 515.4642333984375, "learning_rate": 1.9851140960885032e-06, "loss": 19.6641, "step": 8805 }, { "epoch": 0.0833577872227639, "grad_norm": 548.3618774414062, "learning_rate": 1.985108825399516e-06, "loss": 57.4531, "step": 8806 }, { "epoch": 0.08336725324447894, "grad_norm": 454.2354431152344, "learning_rate": 1.9851035537845913e-06, "loss": 29.1719, "step": 8807 }, { "epoch": 0.083376719266194, "grad_norm": 161.4385223388672, "learning_rate": 1.985098281243733e-06, "loss": 21.2344, "step": 8808 }, { "epoch": 0.08338618528790905, "grad_norm": 741.9959716796875, "learning_rate": 1.9850930077769465e-06, "loss": 36.4922, "step": 8809 }, { "epoch": 0.0833956513096241, "grad_norm": 277.9853210449219, "learning_rate": 1.9850877333842367e-06, "loss": 23.0781, "step": 8810 }, { "epoch": 0.08340511733133915, "grad_norm": 212.28634643554688, "learning_rate": 1.9850824580656086e-06, "loss": 26.6797, "step": 8811 }, { "epoch": 0.08341458335305421, "grad_norm": 493.63433837890625, "learning_rate": 1.985077181821067e-06, "loss": 47.8125, "step": 8812 }, { "epoch": 0.08342404937476927, "grad_norm": 320.9934997558594, "learning_rate": 1.9850719046506166e-06, "loss": 23.5234, "step": 8813 }, { "epoch": 0.08343351539648432, "grad_norm": 483.96630859375, "learning_rate": 1.9850666265542633e-06, "loss": 23.9453, "step": 8814 }, { "epoch": 0.08344298141819938, "grad_norm": 244.08682250976562, "learning_rate": 1.985061347532011e-06, "loss": 24.3047, "step": 8815 }, { "epoch": 0.08345244743991442, "grad_norm": 534.9404907226562, "learning_rate": 1.985056067583865e-06, "loss": 26.5078, "step": 8816 }, { "epoch": 0.08346191346162948, "grad_norm": 266.70867919921875, "learning_rate": 1.985050786709831e-06, "loss": 27.9844, "step": 8817 }, { "epoch": 0.08347137948334453, "grad_norm": 381.6511535644531, "learning_rate": 1.985045504909913e-06, "loss": 51.0469, "step": 8818 }, { "epoch": 0.08348084550505959, "grad_norm": 635.8932495117188, "learning_rate": 1.9850402221841162e-06, "loss": 48.3906, "step": 8819 }, { "epoch": 0.08349031152677465, "grad_norm": 3.3161020278930664, "learning_rate": 1.9850349385324457e-06, "loss": 0.876, "step": 8820 }, { "epoch": 0.0834997775484897, "grad_norm": 856.2240600585938, "learning_rate": 1.9850296539549063e-06, "loss": 51.9531, "step": 8821 }, { "epoch": 0.08350924357020476, "grad_norm": 444.64862060546875, "learning_rate": 1.9850243684515036e-06, "loss": 64.6641, "step": 8822 }, { "epoch": 0.0835187095919198, "grad_norm": 289.142578125, "learning_rate": 1.985019082022242e-06, "loss": 24.7422, "step": 8823 }, { "epoch": 0.08352817561363486, "grad_norm": 531.5360717773438, "learning_rate": 1.985013794667126e-06, "loss": 24.3438, "step": 8824 }, { "epoch": 0.08353764163534991, "grad_norm": 422.6285400390625, "learning_rate": 1.9850085063861613e-06, "loss": 35.6875, "step": 8825 }, { "epoch": 0.08354710765706497, "grad_norm": 612.0631103515625, "learning_rate": 1.985003217179353e-06, "loss": 26.1719, "step": 8826 }, { "epoch": 0.08355657367878001, "grad_norm": 473.46197509765625, "learning_rate": 1.9849979270467052e-06, "loss": 37.8047, "step": 8827 }, { "epoch": 0.08356603970049507, "grad_norm": 471.254638671875, "learning_rate": 1.9849926359882237e-06, "loss": 30.7109, "step": 8828 }, { "epoch": 0.08357550572221013, "grad_norm": 417.5416564941406, "learning_rate": 1.9849873440039136e-06, "loss": 57.0, "step": 8829 }, { "epoch": 0.08358497174392518, "grad_norm": 164.27415466308594, "learning_rate": 1.984982051093779e-06, "loss": 22.0156, "step": 8830 }, { "epoch": 0.08359443776564024, "grad_norm": 295.36785888671875, "learning_rate": 1.9849767572578257e-06, "loss": 20.1172, "step": 8831 }, { "epoch": 0.08360390378735529, "grad_norm": 2.831653118133545, "learning_rate": 1.984971462496058e-06, "loss": 0.9204, "step": 8832 }, { "epoch": 0.08361336980907035, "grad_norm": 1456.96044921875, "learning_rate": 1.9849661668084814e-06, "loss": 67.1562, "step": 8833 }, { "epoch": 0.08362283583078539, "grad_norm": 248.31365966796875, "learning_rate": 1.9849608701951007e-06, "loss": 19.8125, "step": 8834 }, { "epoch": 0.08363230185250045, "grad_norm": 745.7921752929688, "learning_rate": 1.9849555726559205e-06, "loss": 47.2812, "step": 8835 }, { "epoch": 0.0836417678742155, "grad_norm": 397.4355773925781, "learning_rate": 1.984950274190947e-06, "loss": 33.1719, "step": 8836 }, { "epoch": 0.08365123389593056, "grad_norm": 3.3277812004089355, "learning_rate": 1.9849449748001833e-06, "loss": 0.9858, "step": 8837 }, { "epoch": 0.08366069991764562, "grad_norm": 758.599853515625, "learning_rate": 1.984939674483636e-06, "loss": 26.2031, "step": 8838 }, { "epoch": 0.08367016593936066, "grad_norm": 546.0413208007812, "learning_rate": 1.9849343732413094e-06, "loss": 46.6875, "step": 8839 }, { "epoch": 0.08367963196107572, "grad_norm": 813.3239135742188, "learning_rate": 1.984929071073208e-06, "loss": 32.6562, "step": 8840 }, { "epoch": 0.08368909798279077, "grad_norm": 176.57369995117188, "learning_rate": 1.984923767979338e-06, "loss": 18.8555, "step": 8841 }, { "epoch": 0.08369856400450583, "grad_norm": 245.02989196777344, "learning_rate": 1.9849184639597036e-06, "loss": 23.1953, "step": 8842 }, { "epoch": 0.08370803002622088, "grad_norm": 585.6976928710938, "learning_rate": 1.9849131590143096e-06, "loss": 52.5781, "step": 8843 }, { "epoch": 0.08371749604793594, "grad_norm": 676.7100219726562, "learning_rate": 1.984907853143162e-06, "loss": 33.3047, "step": 8844 }, { "epoch": 0.08372696206965098, "grad_norm": 159.07142639160156, "learning_rate": 1.9849025463462645e-06, "loss": 22.1406, "step": 8845 }, { "epoch": 0.08373642809136604, "grad_norm": 282.001220703125, "learning_rate": 1.984897238623623e-06, "loss": 23.6562, "step": 8846 }, { "epoch": 0.0837458941130811, "grad_norm": 388.9539794921875, "learning_rate": 1.9848919299752416e-06, "loss": 48.1719, "step": 8847 }, { "epoch": 0.08375536013479615, "grad_norm": 393.9342346191406, "learning_rate": 1.9848866204011264e-06, "loss": 48.75, "step": 8848 }, { "epoch": 0.08376482615651121, "grad_norm": 210.8812255859375, "learning_rate": 1.984881309901282e-06, "loss": 18.5859, "step": 8849 }, { "epoch": 0.08377429217822625, "grad_norm": 3.436210870742798, "learning_rate": 1.984875998475713e-06, "loss": 1.0728, "step": 8850 }, { "epoch": 0.08378375819994131, "grad_norm": 3.0027875900268555, "learning_rate": 1.9848706861244245e-06, "loss": 0.9043, "step": 8851 }, { "epoch": 0.08379322422165636, "grad_norm": 230.69186401367188, "learning_rate": 1.9848653728474217e-06, "loss": 25.0234, "step": 8852 }, { "epoch": 0.08380269024337142, "grad_norm": 654.931396484375, "learning_rate": 1.9848600586447094e-06, "loss": 55.5078, "step": 8853 }, { "epoch": 0.08381215626508647, "grad_norm": 330.3341369628906, "learning_rate": 1.984854743516293e-06, "loss": 25.0312, "step": 8854 }, { "epoch": 0.08382162228680153, "grad_norm": 250.2721405029297, "learning_rate": 1.9848494274621766e-06, "loss": 26.1719, "step": 8855 }, { "epoch": 0.08383108830851659, "grad_norm": 173.36387634277344, "learning_rate": 1.9848441104823664e-06, "loss": 15.7969, "step": 8856 }, { "epoch": 0.08384055433023163, "grad_norm": 556.9160766601562, "learning_rate": 1.984838792576867e-06, "loss": 59.4219, "step": 8857 }, { "epoch": 0.08385002035194669, "grad_norm": 522.1920166015625, "learning_rate": 1.9848334737456827e-06, "loss": 30.1406, "step": 8858 }, { "epoch": 0.08385948637366174, "grad_norm": 484.8470764160156, "learning_rate": 1.984828153988819e-06, "loss": 19.9844, "step": 8859 }, { "epoch": 0.0838689523953768, "grad_norm": 1095.81982421875, "learning_rate": 1.984822833306281e-06, "loss": 44.1406, "step": 8860 }, { "epoch": 0.08387841841709184, "grad_norm": 468.5309753417969, "learning_rate": 1.984817511698074e-06, "loss": 24.7734, "step": 8861 }, { "epoch": 0.0838878844388069, "grad_norm": 218.4012908935547, "learning_rate": 1.984812189164202e-06, "loss": 19.0547, "step": 8862 }, { "epoch": 0.08389735046052196, "grad_norm": 488.3345947265625, "learning_rate": 1.984806865704671e-06, "loss": 36.6172, "step": 8863 }, { "epoch": 0.08390681648223701, "grad_norm": 300.9268493652344, "learning_rate": 1.9848015413194856e-06, "loss": 23.6016, "step": 8864 }, { "epoch": 0.08391628250395207, "grad_norm": 202.99546813964844, "learning_rate": 1.9847962160086506e-06, "loss": 17.1094, "step": 8865 }, { "epoch": 0.08392574852566712, "grad_norm": 295.3038024902344, "learning_rate": 1.9847908897721717e-06, "loss": 34.5, "step": 8866 }, { "epoch": 0.08393521454738218, "grad_norm": 222.79852294921875, "learning_rate": 1.9847855626100526e-06, "loss": 15.0781, "step": 8867 }, { "epoch": 0.08394468056909722, "grad_norm": 388.4269104003906, "learning_rate": 1.9847802345223e-06, "loss": 19.6094, "step": 8868 }, { "epoch": 0.08395414659081228, "grad_norm": 180.2424774169922, "learning_rate": 1.9847749055089173e-06, "loss": 17.1172, "step": 8869 }, { "epoch": 0.08396361261252733, "grad_norm": 330.44854736328125, "learning_rate": 1.9847695755699108e-06, "loss": 33.0391, "step": 8870 }, { "epoch": 0.08397307863424239, "grad_norm": 340.24462890625, "learning_rate": 1.9847642447052845e-06, "loss": 36.9531, "step": 8871 }, { "epoch": 0.08398254465595745, "grad_norm": 2.9250519275665283, "learning_rate": 1.9847589129150443e-06, "loss": 0.8091, "step": 8872 }, { "epoch": 0.0839920106776725, "grad_norm": 284.3958740234375, "learning_rate": 1.984753580199195e-06, "loss": 16.7734, "step": 8873 }, { "epoch": 0.08400147669938755, "grad_norm": 238.1319122314453, "learning_rate": 1.984748246557741e-06, "loss": 23.3047, "step": 8874 }, { "epoch": 0.0840109427211026, "grad_norm": 192.00372314453125, "learning_rate": 1.9847429119906873e-06, "loss": 21.9219, "step": 8875 }, { "epoch": 0.08402040874281766, "grad_norm": 444.34039306640625, "learning_rate": 1.98473757649804e-06, "loss": 27.4688, "step": 8876 }, { "epoch": 0.0840298747645327, "grad_norm": 363.1620788574219, "learning_rate": 1.984732240079803e-06, "loss": 23.7031, "step": 8877 }, { "epoch": 0.08403934078624777, "grad_norm": 238.79112243652344, "learning_rate": 1.984726902735982e-06, "loss": 31.8125, "step": 8878 }, { "epoch": 0.08404880680796281, "grad_norm": 318.61712646484375, "learning_rate": 1.984721564466582e-06, "loss": 18.3281, "step": 8879 }, { "epoch": 0.08405827282967787, "grad_norm": 699.9796142578125, "learning_rate": 1.984716225271607e-06, "loss": 19.3203, "step": 8880 }, { "epoch": 0.08406773885139293, "grad_norm": 351.974365234375, "learning_rate": 1.9847108851510635e-06, "loss": 29.7656, "step": 8881 }, { "epoch": 0.08407720487310798, "grad_norm": 230.63966369628906, "learning_rate": 1.9847055441049555e-06, "loss": 21.9766, "step": 8882 }, { "epoch": 0.08408667089482304, "grad_norm": 449.7486877441406, "learning_rate": 1.984700202133289e-06, "loss": 26.1719, "step": 8883 }, { "epoch": 0.08409613691653808, "grad_norm": 382.6612243652344, "learning_rate": 1.9846948592360676e-06, "loss": 21.293, "step": 8884 }, { "epoch": 0.08410560293825314, "grad_norm": 458.1452941894531, "learning_rate": 1.984689515413297e-06, "loss": 22.5469, "step": 8885 }, { "epoch": 0.08411506895996819, "grad_norm": 3.5151407718658447, "learning_rate": 1.984684170664983e-06, "loss": 1.064, "step": 8886 }, { "epoch": 0.08412453498168325, "grad_norm": 470.7999572753906, "learning_rate": 1.98467882499113e-06, "loss": 27.1172, "step": 8887 }, { "epoch": 0.0841340010033983, "grad_norm": 612.2626953125, "learning_rate": 1.984673478391742e-06, "loss": 47.5156, "step": 8888 }, { "epoch": 0.08414346702511336, "grad_norm": 650.4542846679688, "learning_rate": 1.984668130866826e-06, "loss": 19.7656, "step": 8889 }, { "epoch": 0.08415293304682842, "grad_norm": 3.203543186187744, "learning_rate": 1.9846627824163854e-06, "loss": 1.0146, "step": 8890 }, { "epoch": 0.08416239906854346, "grad_norm": 301.44696044921875, "learning_rate": 1.984657433040426e-06, "loss": 24.6016, "step": 8891 }, { "epoch": 0.08417186509025852, "grad_norm": 3.6490440368652344, "learning_rate": 1.9846520827389527e-06, "loss": 0.8613, "step": 8892 }, { "epoch": 0.08418133111197357, "grad_norm": 981.7557373046875, "learning_rate": 1.9846467315119702e-06, "loss": 58.0, "step": 8893 }, { "epoch": 0.08419079713368863, "grad_norm": 2.8892717361450195, "learning_rate": 1.9846413793594845e-06, "loss": 0.7439, "step": 8894 }, { "epoch": 0.08420026315540367, "grad_norm": 372.880615234375, "learning_rate": 1.9846360262814996e-06, "loss": 33.4766, "step": 8895 }, { "epoch": 0.08420972917711873, "grad_norm": 215.8834686279297, "learning_rate": 1.9846306722780207e-06, "loss": 26.125, "step": 8896 }, { "epoch": 0.08421919519883378, "grad_norm": 3.05358624458313, "learning_rate": 1.9846253173490532e-06, "loss": 0.873, "step": 8897 }, { "epoch": 0.08422866122054884, "grad_norm": 685.8557739257812, "learning_rate": 1.984619961494602e-06, "loss": 29.5234, "step": 8898 }, { "epoch": 0.0842381272422639, "grad_norm": 270.11041259765625, "learning_rate": 1.984614604714672e-06, "loss": 20.7031, "step": 8899 }, { "epoch": 0.08424759326397895, "grad_norm": 212.61422729492188, "learning_rate": 1.9846092470092684e-06, "loss": 14.5234, "step": 8900 }, { "epoch": 0.084257059285694, "grad_norm": 424.1892395019531, "learning_rate": 1.984603888378396e-06, "loss": 35.4141, "step": 8901 }, { "epoch": 0.08426652530740905, "grad_norm": 490.9993591308594, "learning_rate": 1.98459852882206e-06, "loss": 12.1758, "step": 8902 }, { "epoch": 0.08427599132912411, "grad_norm": 445.6024475097656, "learning_rate": 1.9845931683402656e-06, "loss": 26.4297, "step": 8903 }, { "epoch": 0.08428545735083916, "grad_norm": 289.1441955566406, "learning_rate": 1.9845878069330174e-06, "loss": 24.7656, "step": 8904 }, { "epoch": 0.08429492337255422, "grad_norm": 462.58441162109375, "learning_rate": 1.984582444600321e-06, "loss": 31.0625, "step": 8905 }, { "epoch": 0.08430438939426928, "grad_norm": 458.9753112792969, "learning_rate": 1.984577081342181e-06, "loss": 13.668, "step": 8906 }, { "epoch": 0.08431385541598432, "grad_norm": 264.06951904296875, "learning_rate": 1.984571717158603e-06, "loss": 22.8516, "step": 8907 }, { "epoch": 0.08432332143769938, "grad_norm": 220.81752014160156, "learning_rate": 1.984566352049591e-06, "loss": 23.2969, "step": 8908 }, { "epoch": 0.08433278745941443, "grad_norm": 710.1553344726562, "learning_rate": 1.9845609860151515e-06, "loss": 29.7266, "step": 8909 }, { "epoch": 0.08434225348112949, "grad_norm": 613.0198364257812, "learning_rate": 1.984555619055288e-06, "loss": 29.375, "step": 8910 }, { "epoch": 0.08435171950284454, "grad_norm": 448.0309143066406, "learning_rate": 1.9845502511700064e-06, "loss": 26.6602, "step": 8911 }, { "epoch": 0.0843611855245596, "grad_norm": 950.5946044921875, "learning_rate": 1.9845448823593116e-06, "loss": 44.3281, "step": 8912 }, { "epoch": 0.08437065154627464, "grad_norm": 707.7650756835938, "learning_rate": 1.984539512623209e-06, "loss": 32.5312, "step": 8913 }, { "epoch": 0.0843801175679897, "grad_norm": 277.1792907714844, "learning_rate": 1.984534141961703e-06, "loss": 22.6719, "step": 8914 }, { "epoch": 0.08438958358970476, "grad_norm": 401.34832763671875, "learning_rate": 1.9845287703747987e-06, "loss": 51.7188, "step": 8915 }, { "epoch": 0.08439904961141981, "grad_norm": 358.3507995605469, "learning_rate": 1.9845233978625018e-06, "loss": 49.875, "step": 8916 }, { "epoch": 0.08440851563313487, "grad_norm": 374.16357421875, "learning_rate": 1.984518024424817e-06, "loss": 24.0547, "step": 8917 }, { "epoch": 0.08441798165484991, "grad_norm": 740.1757202148438, "learning_rate": 1.9845126500617493e-06, "loss": 20.7891, "step": 8918 }, { "epoch": 0.08442744767656497, "grad_norm": 319.4224548339844, "learning_rate": 1.9845072747733035e-06, "loss": 21.3477, "step": 8919 }, { "epoch": 0.08443691369828002, "grad_norm": 641.6824340820312, "learning_rate": 1.984501898559485e-06, "loss": 38.3594, "step": 8920 }, { "epoch": 0.08444637971999508, "grad_norm": 327.9729919433594, "learning_rate": 1.984496521420299e-06, "loss": 17.1016, "step": 8921 }, { "epoch": 0.08445584574171013, "grad_norm": 3.042351722717285, "learning_rate": 1.98449114335575e-06, "loss": 0.7656, "step": 8922 }, { "epoch": 0.08446531176342519, "grad_norm": 259.6384582519531, "learning_rate": 1.9844857643658435e-06, "loss": 28.3125, "step": 8923 }, { "epoch": 0.08447477778514025, "grad_norm": 439.8496398925781, "learning_rate": 1.9844803844505845e-06, "loss": 21.5625, "step": 8924 }, { "epoch": 0.08448424380685529, "grad_norm": 784.4655151367188, "learning_rate": 1.984475003609978e-06, "loss": 39.6016, "step": 8925 }, { "epoch": 0.08449370982857035, "grad_norm": 356.92474365234375, "learning_rate": 1.9844696218440286e-06, "loss": 22.3906, "step": 8926 }, { "epoch": 0.0845031758502854, "grad_norm": 427.600341796875, "learning_rate": 1.9844642391527423e-06, "loss": 51.7031, "step": 8927 }, { "epoch": 0.08451264187200046, "grad_norm": 202.8555145263672, "learning_rate": 1.9844588555361233e-06, "loss": 21.0781, "step": 8928 }, { "epoch": 0.0845221078937155, "grad_norm": 325.9989929199219, "learning_rate": 1.984453470994177e-06, "loss": 28.1172, "step": 8929 }, { "epoch": 0.08453157391543056, "grad_norm": 444.90142822265625, "learning_rate": 1.984448085526909e-06, "loss": 34.9531, "step": 8930 }, { "epoch": 0.08454103993714561, "grad_norm": 470.87457275390625, "learning_rate": 1.9844426991343238e-06, "loss": 31.6953, "step": 8931 }, { "epoch": 0.08455050595886067, "grad_norm": 385.0086975097656, "learning_rate": 1.984437311816426e-06, "loss": 25.5664, "step": 8932 }, { "epoch": 0.08455997198057573, "grad_norm": 266.005615234375, "learning_rate": 1.9844319235732214e-06, "loss": 20.8906, "step": 8933 }, { "epoch": 0.08456943800229078, "grad_norm": 643.6508178710938, "learning_rate": 1.984426534404715e-06, "loss": 54.3828, "step": 8934 }, { "epoch": 0.08457890402400584, "grad_norm": 562.8931884765625, "learning_rate": 1.9844211443109115e-06, "loss": 29.5, "step": 8935 }, { "epoch": 0.08458837004572088, "grad_norm": 2.835160970687866, "learning_rate": 1.9844157532918163e-06, "loss": 0.8364, "step": 8936 }, { "epoch": 0.08459783606743594, "grad_norm": 242.38140869140625, "learning_rate": 1.984410361347434e-06, "loss": 28.2344, "step": 8937 }, { "epoch": 0.08460730208915099, "grad_norm": 497.8084411621094, "learning_rate": 1.9844049684777704e-06, "loss": 60.375, "step": 8938 }, { "epoch": 0.08461676811086605, "grad_norm": 341.8622131347656, "learning_rate": 1.98439957468283e-06, "loss": 16.8828, "step": 8939 }, { "epoch": 0.0846262341325811, "grad_norm": 612.1652221679688, "learning_rate": 1.984394179962618e-06, "loss": 58.4375, "step": 8940 }, { "epoch": 0.08463570015429615, "grad_norm": 384.6694641113281, "learning_rate": 1.9843887843171397e-06, "loss": 26.8125, "step": 8941 }, { "epoch": 0.08464516617601121, "grad_norm": 464.9880065917969, "learning_rate": 1.9843833877463995e-06, "loss": 19.3125, "step": 8942 }, { "epoch": 0.08465463219772626, "grad_norm": 3.190570116043091, "learning_rate": 1.9843779902504037e-06, "loss": 0.8154, "step": 8943 }, { "epoch": 0.08466409821944132, "grad_norm": 397.5469970703125, "learning_rate": 1.984372591829156e-06, "loss": 19.6875, "step": 8944 }, { "epoch": 0.08467356424115637, "grad_norm": 252.61300659179688, "learning_rate": 1.9843671924826622e-06, "loss": 22.8672, "step": 8945 }, { "epoch": 0.08468303026287143, "grad_norm": 347.6247863769531, "learning_rate": 1.9843617922109276e-06, "loss": 25.8516, "step": 8946 }, { "epoch": 0.08469249628458647, "grad_norm": 196.84426879882812, "learning_rate": 1.984356391013957e-06, "loss": 22.3594, "step": 8947 }, { "epoch": 0.08470196230630153, "grad_norm": 169.17178344726562, "learning_rate": 1.9843509888917547e-06, "loss": 20.7266, "step": 8948 }, { "epoch": 0.08471142832801659, "grad_norm": 446.05029296875, "learning_rate": 1.9843455858443274e-06, "loss": 49.7344, "step": 8949 }, { "epoch": 0.08472089434973164, "grad_norm": 483.9273986816406, "learning_rate": 1.984340181871679e-06, "loss": 24.125, "step": 8950 }, { "epoch": 0.0847303603714467, "grad_norm": 426.08087158203125, "learning_rate": 1.9843347769738145e-06, "loss": 30.9844, "step": 8951 }, { "epoch": 0.08473982639316174, "grad_norm": 764.5841674804688, "learning_rate": 1.98432937115074e-06, "loss": 23.0078, "step": 8952 }, { "epoch": 0.0847492924148768, "grad_norm": 250.4618682861328, "learning_rate": 1.984323964402459e-06, "loss": 22.7891, "step": 8953 }, { "epoch": 0.08475875843659185, "grad_norm": 347.85308837890625, "learning_rate": 1.984318556728978e-06, "loss": 19.0625, "step": 8954 }, { "epoch": 0.08476822445830691, "grad_norm": 434.8587341308594, "learning_rate": 1.9843131481303017e-06, "loss": 60.8594, "step": 8955 }, { "epoch": 0.08477769048002196, "grad_norm": 602.5391845703125, "learning_rate": 1.984307738606435e-06, "loss": 51.2109, "step": 8956 }, { "epoch": 0.08478715650173702, "grad_norm": 634.9931030273438, "learning_rate": 1.984302328157383e-06, "loss": 20.5547, "step": 8957 }, { "epoch": 0.08479662252345208, "grad_norm": 868.7211303710938, "learning_rate": 1.984296916783151e-06, "loss": 46.7109, "step": 8958 }, { "epoch": 0.08480608854516712, "grad_norm": 148.088623046875, "learning_rate": 1.984291504483744e-06, "loss": 22.4766, "step": 8959 }, { "epoch": 0.08481555456688218, "grad_norm": 321.1512756347656, "learning_rate": 1.9842860912591665e-06, "loss": 12.6953, "step": 8960 }, { "epoch": 0.08482502058859723, "grad_norm": 430.67913818359375, "learning_rate": 1.9842806771094247e-06, "loss": 27.3125, "step": 8961 }, { "epoch": 0.08483448661031229, "grad_norm": 173.428466796875, "learning_rate": 1.9842752620345226e-06, "loss": 18.8516, "step": 8962 }, { "epoch": 0.08484395263202733, "grad_norm": 470.8623352050781, "learning_rate": 1.984269846034466e-06, "loss": 66.125, "step": 8963 }, { "epoch": 0.0848534186537424, "grad_norm": 336.7995300292969, "learning_rate": 1.98426442910926e-06, "loss": 31.8125, "step": 8964 }, { "epoch": 0.08486288467545744, "grad_norm": 439.6339111328125, "learning_rate": 1.984259011258909e-06, "loss": 43.1328, "step": 8965 }, { "epoch": 0.0848723506971725, "grad_norm": 3.148829221725464, "learning_rate": 1.984253592483419e-06, "loss": 0.9243, "step": 8966 }, { "epoch": 0.08488181671888756, "grad_norm": 260.5761413574219, "learning_rate": 1.9842481727827946e-06, "loss": 18.5703, "step": 8967 }, { "epoch": 0.0848912827406026, "grad_norm": 895.5426025390625, "learning_rate": 1.984242752157041e-06, "loss": 54.0, "step": 8968 }, { "epoch": 0.08490074876231767, "grad_norm": 541.4494018554688, "learning_rate": 1.9842373306061627e-06, "loss": 25.1562, "step": 8969 }, { "epoch": 0.08491021478403271, "grad_norm": 3.313176393508911, "learning_rate": 1.984231908130166e-06, "loss": 0.9561, "step": 8970 }, { "epoch": 0.08491968080574777, "grad_norm": 715.0614624023438, "learning_rate": 1.9842264847290547e-06, "loss": 28.3359, "step": 8971 }, { "epoch": 0.08492914682746282, "grad_norm": 316.01031494140625, "learning_rate": 1.984221060402835e-06, "loss": 21.0625, "step": 8972 }, { "epoch": 0.08493861284917788, "grad_norm": 303.5615539550781, "learning_rate": 1.9842156351515114e-06, "loss": 30.8516, "step": 8973 }, { "epoch": 0.08494807887089292, "grad_norm": 515.254638671875, "learning_rate": 1.9842102089750893e-06, "loss": 46.6562, "step": 8974 }, { "epoch": 0.08495754489260798, "grad_norm": 336.7696533203125, "learning_rate": 1.9842047818735733e-06, "loss": 24.7891, "step": 8975 }, { "epoch": 0.08496701091432304, "grad_norm": 225.42103576660156, "learning_rate": 1.984199353846969e-06, "loss": 23.5234, "step": 8976 }, { "epoch": 0.08497647693603809, "grad_norm": 438.22174072265625, "learning_rate": 1.984193924895281e-06, "loss": 20.3359, "step": 8977 }, { "epoch": 0.08498594295775315, "grad_norm": 314.52557373046875, "learning_rate": 1.9841884950185156e-06, "loss": 19.6719, "step": 8978 }, { "epoch": 0.0849954089794682, "grad_norm": 451.1203308105469, "learning_rate": 1.984183064216676e-06, "loss": 24.5, "step": 8979 }, { "epoch": 0.08500487500118326, "grad_norm": 284.1127624511719, "learning_rate": 1.984177632489769e-06, "loss": 15.6562, "step": 8980 }, { "epoch": 0.0850143410228983, "grad_norm": 152.5570526123047, "learning_rate": 1.984172199837799e-06, "loss": 21.2812, "step": 8981 }, { "epoch": 0.08502380704461336, "grad_norm": 233.97743225097656, "learning_rate": 1.984166766260771e-06, "loss": 12.2266, "step": 8982 }, { "epoch": 0.08503327306632841, "grad_norm": 450.0826416015625, "learning_rate": 1.9841613317586904e-06, "loss": 41.2344, "step": 8983 }, { "epoch": 0.08504273908804347, "grad_norm": 507.8577575683594, "learning_rate": 1.9841558963315622e-06, "loss": 34.5469, "step": 8984 }, { "epoch": 0.08505220510975853, "grad_norm": 677.7987670898438, "learning_rate": 1.9841504599793915e-06, "loss": 55.875, "step": 8985 }, { "epoch": 0.08506167113147357, "grad_norm": 211.3102264404297, "learning_rate": 1.984145022702183e-06, "loss": 26.5469, "step": 8986 }, { "epoch": 0.08507113715318863, "grad_norm": 147.4817657470703, "learning_rate": 1.9841395844999424e-06, "loss": 15.3281, "step": 8987 }, { "epoch": 0.08508060317490368, "grad_norm": 254.62643432617188, "learning_rate": 1.984134145372675e-06, "loss": 24.4922, "step": 8988 }, { "epoch": 0.08509006919661874, "grad_norm": 466.29986572265625, "learning_rate": 1.984128705320385e-06, "loss": 9.2852, "step": 8989 }, { "epoch": 0.08509953521833379, "grad_norm": 1727.5743408203125, "learning_rate": 1.9841232643430783e-06, "loss": 49.0859, "step": 8990 }, { "epoch": 0.08510900124004885, "grad_norm": 236.56930541992188, "learning_rate": 1.9841178224407598e-06, "loss": 27.8906, "step": 8991 }, { "epoch": 0.08511846726176389, "grad_norm": 738.1075439453125, "learning_rate": 1.9841123796134343e-06, "loss": 41.6875, "step": 8992 }, { "epoch": 0.08512793328347895, "grad_norm": 252.18765258789062, "learning_rate": 1.9841069358611073e-06, "loss": 27.2656, "step": 8993 }, { "epoch": 0.08513739930519401, "grad_norm": 240.696533203125, "learning_rate": 1.984101491183784e-06, "loss": 15.7266, "step": 8994 }, { "epoch": 0.08514686532690906, "grad_norm": 444.804443359375, "learning_rate": 1.984096045581469e-06, "loss": 41.5156, "step": 8995 }, { "epoch": 0.08515633134862412, "grad_norm": 193.24325561523438, "learning_rate": 1.9840905990541683e-06, "loss": 21.7812, "step": 8996 }, { "epoch": 0.08516579737033916, "grad_norm": 462.8013610839844, "learning_rate": 1.9840851516018857e-06, "loss": 40.3594, "step": 8997 }, { "epoch": 0.08517526339205422, "grad_norm": 752.9466552734375, "learning_rate": 1.9840797032246276e-06, "loss": 23.5547, "step": 8998 }, { "epoch": 0.08518472941376927, "grad_norm": 488.2502136230469, "learning_rate": 1.9840742539223984e-06, "loss": 23.2188, "step": 8999 }, { "epoch": 0.08519419543548433, "grad_norm": 900.9025268554688, "learning_rate": 1.9840688036952034e-06, "loss": 39.1953, "step": 9000 }, { "epoch": 0.08520366145719939, "grad_norm": 568.57177734375, "learning_rate": 1.984063352543048e-06, "loss": 25.25, "step": 9001 }, { "epoch": 0.08521312747891444, "grad_norm": 626.4536743164062, "learning_rate": 1.9840579004659367e-06, "loss": 47.0469, "step": 9002 }, { "epoch": 0.0852225935006295, "grad_norm": 932.9011840820312, "learning_rate": 1.9840524474638752e-06, "loss": 80.6094, "step": 9003 }, { "epoch": 0.08523205952234454, "grad_norm": 2.3400521278381348, "learning_rate": 1.9840469935368686e-06, "loss": 0.7202, "step": 9004 }, { "epoch": 0.0852415255440596, "grad_norm": 248.3699493408203, "learning_rate": 1.9840415386849215e-06, "loss": 19.2578, "step": 9005 }, { "epoch": 0.08525099156577465, "grad_norm": 700.3169555664062, "learning_rate": 1.9840360829080394e-06, "loss": 22.2812, "step": 9006 }, { "epoch": 0.08526045758748971, "grad_norm": 356.7626037597656, "learning_rate": 1.9840306262062274e-06, "loss": 35.625, "step": 9007 }, { "epoch": 0.08526992360920475, "grad_norm": 469.1622619628906, "learning_rate": 1.9840251685794906e-06, "loss": 22.9844, "step": 9008 }, { "epoch": 0.08527938963091981, "grad_norm": 301.9053955078125, "learning_rate": 1.9840197100278345e-06, "loss": 41.7812, "step": 9009 }, { "epoch": 0.08528885565263487, "grad_norm": 360.13427734375, "learning_rate": 1.9840142505512637e-06, "loss": 23.7031, "step": 9010 }, { "epoch": 0.08529832167434992, "grad_norm": 363.123291015625, "learning_rate": 1.9840087901497837e-06, "loss": 24.4141, "step": 9011 }, { "epoch": 0.08530778769606498, "grad_norm": 426.5337829589844, "learning_rate": 1.9840033288233992e-06, "loss": 41.7031, "step": 9012 }, { "epoch": 0.08531725371778003, "grad_norm": 726.8045043945312, "learning_rate": 1.9839978665721153e-06, "loss": 22.625, "step": 9013 }, { "epoch": 0.08532671973949509, "grad_norm": 485.836669921875, "learning_rate": 1.9839924033959376e-06, "loss": 50.7266, "step": 9014 }, { "epoch": 0.08533618576121013, "grad_norm": 222.4981231689453, "learning_rate": 1.9839869392948714e-06, "loss": 30.0312, "step": 9015 }, { "epoch": 0.08534565178292519, "grad_norm": 255.98147583007812, "learning_rate": 1.9839814742689214e-06, "loss": 16.3828, "step": 9016 }, { "epoch": 0.08535511780464024, "grad_norm": 2.5820746421813965, "learning_rate": 1.9839760083180924e-06, "loss": 0.8901, "step": 9017 }, { "epoch": 0.0853645838263553, "grad_norm": 638.98095703125, "learning_rate": 1.9839705414423902e-06, "loss": 54.6719, "step": 9018 }, { "epoch": 0.08537404984807036, "grad_norm": 221.1435546875, "learning_rate": 1.98396507364182e-06, "loss": 24.9922, "step": 9019 }, { "epoch": 0.0853835158697854, "grad_norm": 428.1808776855469, "learning_rate": 1.9839596049163863e-06, "loss": 15.9453, "step": 9020 }, { "epoch": 0.08539298189150046, "grad_norm": 663.3126831054688, "learning_rate": 1.9839541352660947e-06, "loss": 42.4844, "step": 9021 }, { "epoch": 0.08540244791321551, "grad_norm": 460.8239440917969, "learning_rate": 1.9839486646909503e-06, "loss": 24.9609, "step": 9022 }, { "epoch": 0.08541191393493057, "grad_norm": 612.2810668945312, "learning_rate": 1.983943193190958e-06, "loss": 51.2734, "step": 9023 }, { "epoch": 0.08542137995664562, "grad_norm": 418.028076171875, "learning_rate": 1.9839377207661233e-06, "loss": 30.5859, "step": 9024 }, { "epoch": 0.08543084597836068, "grad_norm": 227.06149291992188, "learning_rate": 1.983932247416451e-06, "loss": 22.8125, "step": 9025 }, { "epoch": 0.08544031200007572, "grad_norm": 300.8431396484375, "learning_rate": 1.9839267731419464e-06, "loss": 22.3047, "step": 9026 }, { "epoch": 0.08544977802179078, "grad_norm": 891.8139038085938, "learning_rate": 1.983921297942615e-06, "loss": 33.6562, "step": 9027 }, { "epoch": 0.08545924404350584, "grad_norm": 415.286865234375, "learning_rate": 1.9839158218184607e-06, "loss": 52.1562, "step": 9028 }, { "epoch": 0.08546871006522089, "grad_norm": 463.89208984375, "learning_rate": 1.9839103447694905e-06, "loss": 44.2812, "step": 9029 }, { "epoch": 0.08547817608693595, "grad_norm": 404.8325500488281, "learning_rate": 1.983904866795708e-06, "loss": 33.8438, "step": 9030 }, { "epoch": 0.085487642108651, "grad_norm": 615.2754516601562, "learning_rate": 1.9838993878971195e-06, "loss": 38.2734, "step": 9031 }, { "epoch": 0.08549710813036605, "grad_norm": 203.61416625976562, "learning_rate": 1.983893908073729e-06, "loss": 26.5469, "step": 9032 }, { "epoch": 0.0855065741520811, "grad_norm": 3.249326229095459, "learning_rate": 1.983888427325542e-06, "loss": 0.9663, "step": 9033 }, { "epoch": 0.08551604017379616, "grad_norm": 402.7342529296875, "learning_rate": 1.9838829456525644e-06, "loss": 27.7344, "step": 9034 }, { "epoch": 0.0855255061955112, "grad_norm": 417.86590576171875, "learning_rate": 1.9838774630548008e-06, "loss": 51.0, "step": 9035 }, { "epoch": 0.08553497221722627, "grad_norm": 186.13804626464844, "learning_rate": 1.9838719795322567e-06, "loss": 22.9531, "step": 9036 }, { "epoch": 0.08554443823894133, "grad_norm": 563.5912475585938, "learning_rate": 1.9838664950849365e-06, "loss": 41.4062, "step": 9037 }, { "epoch": 0.08555390426065637, "grad_norm": 317.5250244140625, "learning_rate": 1.983861009712846e-06, "loss": 22.6406, "step": 9038 }, { "epoch": 0.08556337028237143, "grad_norm": 262.7423400878906, "learning_rate": 1.9838555234159896e-06, "loss": 35.5781, "step": 9039 }, { "epoch": 0.08557283630408648, "grad_norm": 178.1310272216797, "learning_rate": 1.983850036194374e-06, "loss": 18.5312, "step": 9040 }, { "epoch": 0.08558230232580154, "grad_norm": 2.8643033504486084, "learning_rate": 1.9838445480480024e-06, "loss": 0.9084, "step": 9041 }, { "epoch": 0.08559176834751658, "grad_norm": 227.80746459960938, "learning_rate": 1.983839058976882e-06, "loss": 23.9688, "step": 9042 }, { "epoch": 0.08560123436923164, "grad_norm": 271.30908203125, "learning_rate": 1.9838335689810155e-06, "loss": 22.5, "step": 9043 }, { "epoch": 0.0856107003909467, "grad_norm": 279.7651672363281, "learning_rate": 1.9838280780604103e-06, "loss": 29.7656, "step": 9044 }, { "epoch": 0.08562016641266175, "grad_norm": 228.44247436523438, "learning_rate": 1.9838225862150705e-06, "loss": 22.6719, "step": 9045 }, { "epoch": 0.08562963243437681, "grad_norm": 226.6572265625, "learning_rate": 1.983817093445002e-06, "loss": 22.8047, "step": 9046 }, { "epoch": 0.08563909845609186, "grad_norm": 659.06298828125, "learning_rate": 1.983811599750209e-06, "loss": 53.6641, "step": 9047 }, { "epoch": 0.08564856447780692, "grad_norm": 410.4881591796875, "learning_rate": 1.9838061051306967e-06, "loss": 18.8398, "step": 9048 }, { "epoch": 0.08565803049952196, "grad_norm": 211.89747619628906, "learning_rate": 1.983800609586471e-06, "loss": 21.0, "step": 9049 }, { "epoch": 0.08566749652123702, "grad_norm": 390.17803955078125, "learning_rate": 1.983795113117537e-06, "loss": 27.2188, "step": 9050 }, { "epoch": 0.08567696254295207, "grad_norm": 719.3101196289062, "learning_rate": 1.9837896157238994e-06, "loss": 45.7734, "step": 9051 }, { "epoch": 0.08568642856466713, "grad_norm": 193.02049255371094, "learning_rate": 1.9837841174055635e-06, "loss": 21.8359, "step": 9052 }, { "epoch": 0.08569589458638219, "grad_norm": 231.52651977539062, "learning_rate": 1.9837786181625347e-06, "loss": 27.1797, "step": 9053 }, { "epoch": 0.08570536060809723, "grad_norm": 767.3444213867188, "learning_rate": 1.983773117994818e-06, "loss": 40.3906, "step": 9054 }, { "epoch": 0.0857148266298123, "grad_norm": 645.8029174804688, "learning_rate": 1.983767616902418e-06, "loss": 21.1641, "step": 9055 }, { "epoch": 0.08572429265152734, "grad_norm": 267.6380310058594, "learning_rate": 1.9837621148853412e-06, "loss": 21.1641, "step": 9056 }, { "epoch": 0.0857337586732424, "grad_norm": 872.2036743164062, "learning_rate": 1.983756611943592e-06, "loss": 25.7109, "step": 9057 }, { "epoch": 0.08574322469495745, "grad_norm": 363.88604736328125, "learning_rate": 1.983751108077175e-06, "loss": 16.8242, "step": 9058 }, { "epoch": 0.0857526907166725, "grad_norm": 212.43003845214844, "learning_rate": 1.983745603286096e-06, "loss": 20.9609, "step": 9059 }, { "epoch": 0.08576215673838755, "grad_norm": 502.1639709472656, "learning_rate": 1.9837400975703605e-06, "loss": 50.0156, "step": 9060 }, { "epoch": 0.08577162276010261, "grad_norm": 182.04483032226562, "learning_rate": 1.9837345909299732e-06, "loss": 17.4062, "step": 9061 }, { "epoch": 0.08578108878181767, "grad_norm": 317.3255310058594, "learning_rate": 1.983729083364939e-06, "loss": 29.3594, "step": 9062 }, { "epoch": 0.08579055480353272, "grad_norm": 450.9927673339844, "learning_rate": 1.983723574875264e-06, "loss": 22.5977, "step": 9063 }, { "epoch": 0.08580002082524778, "grad_norm": 772.4249877929688, "learning_rate": 1.9837180654609527e-06, "loss": 63.1562, "step": 9064 }, { "epoch": 0.08580948684696282, "grad_norm": 389.6232604980469, "learning_rate": 1.9837125551220105e-06, "loss": 15.7969, "step": 9065 }, { "epoch": 0.08581895286867788, "grad_norm": 756.192626953125, "learning_rate": 1.983707043858442e-06, "loss": 58.4375, "step": 9066 }, { "epoch": 0.08582841889039293, "grad_norm": 281.69207763671875, "learning_rate": 1.9837015316702533e-06, "loss": 40.9375, "step": 9067 }, { "epoch": 0.08583788491210799, "grad_norm": 242.1265106201172, "learning_rate": 1.9836960185574492e-06, "loss": 21.1719, "step": 9068 }, { "epoch": 0.08584735093382304, "grad_norm": 274.39007568359375, "learning_rate": 1.9836905045200346e-06, "loss": 20.1016, "step": 9069 }, { "epoch": 0.0858568169555381, "grad_norm": 313.30047607421875, "learning_rate": 1.9836849895580154e-06, "loss": 25.4922, "step": 9070 }, { "epoch": 0.08586628297725316, "grad_norm": 422.71759033203125, "learning_rate": 1.9836794736713957e-06, "loss": 27.7812, "step": 9071 }, { "epoch": 0.0858757489989682, "grad_norm": 723.7015991210938, "learning_rate": 1.9836739568601816e-06, "loss": 47.2188, "step": 9072 }, { "epoch": 0.08588521502068326, "grad_norm": 409.81719970703125, "learning_rate": 1.983668439124378e-06, "loss": 37.7188, "step": 9073 }, { "epoch": 0.08589468104239831, "grad_norm": 676.652099609375, "learning_rate": 1.9836629204639897e-06, "loss": 50.2656, "step": 9074 }, { "epoch": 0.08590414706411337, "grad_norm": 339.5746154785156, "learning_rate": 1.9836574008790222e-06, "loss": 21.6328, "step": 9075 }, { "epoch": 0.08591361308582841, "grad_norm": 483.7355041503906, "learning_rate": 1.9836518803694814e-06, "loss": 46.3438, "step": 9076 }, { "epoch": 0.08592307910754347, "grad_norm": 195.63955688476562, "learning_rate": 1.9836463589353715e-06, "loss": 16.1484, "step": 9077 }, { "epoch": 0.08593254512925852, "grad_norm": 3.366938829421997, "learning_rate": 1.983640836576698e-06, "loss": 0.8806, "step": 9078 }, { "epoch": 0.08594201115097358, "grad_norm": 536.8746337890625, "learning_rate": 1.9836353132934656e-06, "loss": 23.5, "step": 9079 }, { "epoch": 0.08595147717268864, "grad_norm": 666.0939331054688, "learning_rate": 1.9836297890856807e-06, "loss": 51.7031, "step": 9080 }, { "epoch": 0.08596094319440369, "grad_norm": 214.5516357421875, "learning_rate": 1.9836242639533474e-06, "loss": 19.4062, "step": 9081 }, { "epoch": 0.08597040921611875, "grad_norm": 247.93124389648438, "learning_rate": 1.9836187378964718e-06, "loss": 29.7422, "step": 9082 }, { "epoch": 0.08597987523783379, "grad_norm": 860.36767578125, "learning_rate": 1.983613210915058e-06, "loss": 57.0078, "step": 9083 }, { "epoch": 0.08598934125954885, "grad_norm": 372.3347473144531, "learning_rate": 1.983607683009112e-06, "loss": 32.8125, "step": 9084 }, { "epoch": 0.0859988072812639, "grad_norm": 1307.647705078125, "learning_rate": 1.9836021541786388e-06, "loss": 20.5312, "step": 9085 }, { "epoch": 0.08600827330297896, "grad_norm": 388.0370788574219, "learning_rate": 1.983596624423643e-06, "loss": 13.6328, "step": 9086 }, { "epoch": 0.08601773932469402, "grad_norm": 198.6277313232422, "learning_rate": 1.9835910937441313e-06, "loss": 21.8516, "step": 9087 }, { "epoch": 0.08602720534640906, "grad_norm": 443.94525146484375, "learning_rate": 1.9835855621401076e-06, "loss": 49.5391, "step": 9088 }, { "epoch": 0.08603667136812412, "grad_norm": 2.86672043800354, "learning_rate": 1.9835800296115775e-06, "loss": 0.8228, "step": 9089 }, { "epoch": 0.08604613738983917, "grad_norm": 961.7407836914062, "learning_rate": 1.9835744961585457e-06, "loss": 77.1562, "step": 9090 }, { "epoch": 0.08605560341155423, "grad_norm": 2.6305527687072754, "learning_rate": 1.9835689617810185e-06, "loss": 0.7747, "step": 9091 }, { "epoch": 0.08606506943326928, "grad_norm": 302.6685791015625, "learning_rate": 1.983563426479e-06, "loss": 18.4844, "step": 9092 }, { "epoch": 0.08607453545498434, "grad_norm": 253.1156005859375, "learning_rate": 1.9835578902524957e-06, "loss": 22.5625, "step": 9093 }, { "epoch": 0.08608400147669938, "grad_norm": 384.2945556640625, "learning_rate": 1.9835523531015116e-06, "loss": 31.6094, "step": 9094 }, { "epoch": 0.08609346749841444, "grad_norm": 244.89149475097656, "learning_rate": 1.983546815026052e-06, "loss": 19.5703, "step": 9095 }, { "epoch": 0.0861029335201295, "grad_norm": 571.0215454101562, "learning_rate": 1.9835412760261225e-06, "loss": 30.7969, "step": 9096 }, { "epoch": 0.08611239954184455, "grad_norm": 400.8146667480469, "learning_rate": 1.9835357361017276e-06, "loss": 34.1406, "step": 9097 }, { "epoch": 0.08612186556355961, "grad_norm": 739.0099487304688, "learning_rate": 1.9835301952528737e-06, "loss": 52.1328, "step": 9098 }, { "epoch": 0.08613133158527465, "grad_norm": 2.4297990798950195, "learning_rate": 1.983524653479565e-06, "loss": 0.7888, "step": 9099 }, { "epoch": 0.08614079760698971, "grad_norm": 391.1317443847656, "learning_rate": 1.9835191107818074e-06, "loss": 25.4375, "step": 9100 }, { "epoch": 0.08615026362870476, "grad_norm": 194.17178344726562, "learning_rate": 1.983513567159606e-06, "loss": 25.5781, "step": 9101 }, { "epoch": 0.08615972965041982, "grad_norm": 423.1429748535156, "learning_rate": 1.983508022612966e-06, "loss": 41.4688, "step": 9102 }, { "epoch": 0.08616919567213487, "grad_norm": 317.99163818359375, "learning_rate": 1.9835024771418917e-06, "loss": 21.9609, "step": 9103 }, { "epoch": 0.08617866169384993, "grad_norm": 813.2364501953125, "learning_rate": 1.983496930746389e-06, "loss": 21.2969, "step": 9104 }, { "epoch": 0.08618812771556499, "grad_norm": 318.7381286621094, "learning_rate": 1.9834913834264636e-06, "loss": 22.25, "step": 9105 }, { "epoch": 0.08619759373728003, "grad_norm": 305.7060852050781, "learning_rate": 1.9834858351821203e-06, "loss": 22.7578, "step": 9106 }, { "epoch": 0.08620705975899509, "grad_norm": 200.40927124023438, "learning_rate": 1.983480286013364e-06, "loss": 20.6172, "step": 9107 }, { "epoch": 0.08621652578071014, "grad_norm": 490.8055114746094, "learning_rate": 1.9834747359202006e-06, "loss": 43.0, "step": 9108 }, { "epoch": 0.0862259918024252, "grad_norm": 325.3564147949219, "learning_rate": 1.983469184902635e-06, "loss": 31.1719, "step": 9109 }, { "epoch": 0.08623545782414024, "grad_norm": 183.74655151367188, "learning_rate": 1.9834636329606717e-06, "loss": 21.8516, "step": 9110 }, { "epoch": 0.0862449238458553, "grad_norm": 885.63134765625, "learning_rate": 1.983458080094317e-06, "loss": 78.5703, "step": 9111 }, { "epoch": 0.08625438986757035, "grad_norm": 639.6661376953125, "learning_rate": 1.983452526303576e-06, "loss": 28.9062, "step": 9112 }, { "epoch": 0.08626385588928541, "grad_norm": 701.4891357421875, "learning_rate": 1.983446971588453e-06, "loss": 46.9375, "step": 9113 }, { "epoch": 0.08627332191100047, "grad_norm": 474.6315002441406, "learning_rate": 1.9834414159489542e-06, "loss": 15.1719, "step": 9114 }, { "epoch": 0.08628278793271552, "grad_norm": 538.543212890625, "learning_rate": 1.983435859385084e-06, "loss": 61.5469, "step": 9115 }, { "epoch": 0.08629225395443058, "grad_norm": 880.2410278320312, "learning_rate": 1.9834303018968487e-06, "loss": 55.5234, "step": 9116 }, { "epoch": 0.08630171997614562, "grad_norm": 3.1427080631256104, "learning_rate": 1.9834247434842526e-06, "loss": 0.9038, "step": 9117 }, { "epoch": 0.08631118599786068, "grad_norm": 297.1977233886719, "learning_rate": 1.983419184147301e-06, "loss": 25.0, "step": 9118 }, { "epoch": 0.08632065201957573, "grad_norm": 519.7793579101562, "learning_rate": 1.9834136238859993e-06, "loss": 37.7031, "step": 9119 }, { "epoch": 0.08633011804129079, "grad_norm": 207.9677734375, "learning_rate": 1.9834080627003533e-06, "loss": 21.4219, "step": 9120 }, { "epoch": 0.08633958406300583, "grad_norm": 377.840576171875, "learning_rate": 1.9834025005903672e-06, "loss": 34.8438, "step": 9121 }, { "epoch": 0.0863490500847209, "grad_norm": 233.80841064453125, "learning_rate": 1.983396937556047e-06, "loss": 22.5859, "step": 9122 }, { "epoch": 0.08635851610643595, "grad_norm": 308.7026672363281, "learning_rate": 1.9833913735973974e-06, "loss": 24.7266, "step": 9123 }, { "epoch": 0.086367982128151, "grad_norm": 297.0133972167969, "learning_rate": 1.9833858087144246e-06, "loss": 23.2812, "step": 9124 }, { "epoch": 0.08637744814986606, "grad_norm": 179.03147888183594, "learning_rate": 1.983380242907132e-06, "loss": 21.1562, "step": 9125 }, { "epoch": 0.0863869141715811, "grad_norm": 307.73358154296875, "learning_rate": 1.983374676175527e-06, "loss": 25.375, "step": 9126 }, { "epoch": 0.08639638019329617, "grad_norm": 555.0011596679688, "learning_rate": 1.983369108519613e-06, "loss": 21.2344, "step": 9127 }, { "epoch": 0.08640584621501121, "grad_norm": 200.1666717529297, "learning_rate": 1.9833635399393963e-06, "loss": 17.5547, "step": 9128 }, { "epoch": 0.08641531223672627, "grad_norm": 236.20730590820312, "learning_rate": 1.983357970434882e-06, "loss": 16.1133, "step": 9129 }, { "epoch": 0.08642477825844133, "grad_norm": 184.51126098632812, "learning_rate": 1.983352400006075e-06, "loss": 19.25, "step": 9130 }, { "epoch": 0.08643424428015638, "grad_norm": 466.5748291015625, "learning_rate": 1.983346828652981e-06, "loss": 22.25, "step": 9131 }, { "epoch": 0.08644371030187144, "grad_norm": 534.889404296875, "learning_rate": 1.9833412563756043e-06, "loss": 38.0469, "step": 9132 }, { "epoch": 0.08645317632358648, "grad_norm": 280.76654052734375, "learning_rate": 1.983335683173951e-06, "loss": 23.2734, "step": 9133 }, { "epoch": 0.08646264234530154, "grad_norm": 639.2067260742188, "learning_rate": 1.9833301090480268e-06, "loss": 50.875, "step": 9134 }, { "epoch": 0.08647210836701659, "grad_norm": 435.1859436035156, "learning_rate": 1.9833245339978354e-06, "loss": 36.4531, "step": 9135 }, { "epoch": 0.08648157438873165, "grad_norm": 286.775634765625, "learning_rate": 1.9833189580233833e-06, "loss": 38.625, "step": 9136 }, { "epoch": 0.0864910404104467, "grad_norm": 495.6844177246094, "learning_rate": 1.9833133811246755e-06, "loss": 29.125, "step": 9137 }, { "epoch": 0.08650050643216176, "grad_norm": 806.4127197265625, "learning_rate": 1.983307803301717e-06, "loss": 43.6719, "step": 9138 }, { "epoch": 0.08650997245387682, "grad_norm": 292.70965576171875, "learning_rate": 1.983302224554513e-06, "loss": 12.4219, "step": 9139 }, { "epoch": 0.08651943847559186, "grad_norm": 971.04150390625, "learning_rate": 1.983296644883069e-06, "loss": 31.5391, "step": 9140 }, { "epoch": 0.08652890449730692, "grad_norm": 337.11627197265625, "learning_rate": 1.9832910642873897e-06, "loss": 37.1875, "step": 9141 }, { "epoch": 0.08653837051902197, "grad_norm": 204.2744140625, "learning_rate": 1.9832854827674815e-06, "loss": 24.875, "step": 9142 }, { "epoch": 0.08654783654073703, "grad_norm": 668.0250854492188, "learning_rate": 1.983279900323348e-06, "loss": 44.8906, "step": 9143 }, { "epoch": 0.08655730256245207, "grad_norm": 300.5992431640625, "learning_rate": 1.9832743169549963e-06, "loss": 30.2656, "step": 9144 }, { "epoch": 0.08656676858416713, "grad_norm": 272.24212646484375, "learning_rate": 1.9832687326624304e-06, "loss": 22.8125, "step": 9145 }, { "epoch": 0.08657623460588218, "grad_norm": 532.3703002929688, "learning_rate": 1.9832631474456558e-06, "loss": 45.9297, "step": 9146 }, { "epoch": 0.08658570062759724, "grad_norm": 1517.7958984375, "learning_rate": 1.9832575613046776e-06, "loss": 30.1406, "step": 9147 }, { "epoch": 0.0865951666493123, "grad_norm": 142.1226348876953, "learning_rate": 1.9832519742395017e-06, "loss": 22.1016, "step": 9148 }, { "epoch": 0.08660463267102735, "grad_norm": 440.59674072265625, "learning_rate": 1.9832463862501325e-06, "loss": 48.7656, "step": 9149 }, { "epoch": 0.0866140986927424, "grad_norm": 182.52345275878906, "learning_rate": 1.9832407973365757e-06, "loss": 22.2031, "step": 9150 }, { "epoch": 0.08662356471445745, "grad_norm": 409.4872131347656, "learning_rate": 1.9832352074988366e-06, "loss": 44.0781, "step": 9151 }, { "epoch": 0.08663303073617251, "grad_norm": 440.7464294433594, "learning_rate": 1.98322961673692e-06, "loss": 38.625, "step": 9152 }, { "epoch": 0.08664249675788756, "grad_norm": 378.56170654296875, "learning_rate": 1.9832240250508323e-06, "loss": 22.6172, "step": 9153 }, { "epoch": 0.08665196277960262, "grad_norm": 233.40965270996094, "learning_rate": 1.9832184324405773e-06, "loss": 21.1172, "step": 9154 }, { "epoch": 0.08666142880131766, "grad_norm": 1039.8565673828125, "learning_rate": 1.9832128389061615e-06, "loss": 36.8594, "step": 9155 }, { "epoch": 0.08667089482303272, "grad_norm": 333.0272521972656, "learning_rate": 1.9832072444475895e-06, "loss": 33.4453, "step": 9156 }, { "epoch": 0.08668036084474778, "grad_norm": 752.9513549804688, "learning_rate": 1.983201649064866e-06, "loss": 49.8828, "step": 9157 }, { "epoch": 0.08668982686646283, "grad_norm": 400.7919616699219, "learning_rate": 1.9831960527579975e-06, "loss": 18.4531, "step": 9158 }, { "epoch": 0.08669929288817789, "grad_norm": 562.2817993164062, "learning_rate": 1.9831904555269885e-06, "loss": 12.5117, "step": 9159 }, { "epoch": 0.08670875890989294, "grad_norm": 271.43817138671875, "learning_rate": 1.9831848573718444e-06, "loss": 25.7422, "step": 9160 }, { "epoch": 0.086718224931608, "grad_norm": 485.77447509765625, "learning_rate": 1.9831792582925705e-06, "loss": 57.1562, "step": 9161 }, { "epoch": 0.08672769095332304, "grad_norm": 3.3752806186676025, "learning_rate": 1.9831736582891723e-06, "loss": 0.9204, "step": 9162 }, { "epoch": 0.0867371569750381, "grad_norm": 408.979736328125, "learning_rate": 1.9831680573616547e-06, "loss": 33.4688, "step": 9163 }, { "epoch": 0.08674662299675315, "grad_norm": 436.4665222167969, "learning_rate": 1.983162455510023e-06, "loss": 34.4844, "step": 9164 }, { "epoch": 0.08675608901846821, "grad_norm": 3.4913454055786133, "learning_rate": 1.983156852734283e-06, "loss": 0.9785, "step": 9165 }, { "epoch": 0.08676555504018327, "grad_norm": 298.89923095703125, "learning_rate": 1.9831512490344387e-06, "loss": 19.7969, "step": 9166 }, { "epoch": 0.08677502106189831, "grad_norm": 211.33091735839844, "learning_rate": 1.9831456444104964e-06, "loss": 26.125, "step": 9167 }, { "epoch": 0.08678448708361337, "grad_norm": 212.5766143798828, "learning_rate": 1.9831400388624615e-06, "loss": 28.4531, "step": 9168 }, { "epoch": 0.08679395310532842, "grad_norm": 251.88780212402344, "learning_rate": 1.9831344323903387e-06, "loss": 23.4297, "step": 9169 }, { "epoch": 0.08680341912704348, "grad_norm": 288.4961242675781, "learning_rate": 1.9831288249941334e-06, "loss": 14.6289, "step": 9170 }, { "epoch": 0.08681288514875853, "grad_norm": 284.4944152832031, "learning_rate": 1.9831232166738516e-06, "loss": 23.8203, "step": 9171 }, { "epoch": 0.08682235117047359, "grad_norm": 312.96270751953125, "learning_rate": 1.983117607429497e-06, "loss": 23.4453, "step": 9172 }, { "epoch": 0.08683181719218865, "grad_norm": 454.9992980957031, "learning_rate": 1.9831119972610762e-06, "loss": 26.2031, "step": 9173 }, { "epoch": 0.08684128321390369, "grad_norm": 581.040771484375, "learning_rate": 1.9831063861685945e-06, "loss": 15.8438, "step": 9174 }, { "epoch": 0.08685074923561875, "grad_norm": 253.28958129882812, "learning_rate": 1.9831007741520566e-06, "loss": 31.5625, "step": 9175 }, { "epoch": 0.0868602152573338, "grad_norm": 321.7626037597656, "learning_rate": 1.983095161211467e-06, "loss": 17.375, "step": 9176 }, { "epoch": 0.08686968127904886, "grad_norm": 302.66595458984375, "learning_rate": 1.983089547346833e-06, "loss": 22.5703, "step": 9177 }, { "epoch": 0.0868791473007639, "grad_norm": 313.091064453125, "learning_rate": 1.983083932558158e-06, "loss": 39.2969, "step": 9178 }, { "epoch": 0.08688861332247896, "grad_norm": 186.38754272460938, "learning_rate": 1.983078316845449e-06, "loss": 19.4844, "step": 9179 }, { "epoch": 0.08689807934419401, "grad_norm": 331.19866943359375, "learning_rate": 1.9830727002087095e-06, "loss": 41.1094, "step": 9180 }, { "epoch": 0.08690754536590907, "grad_norm": 273.9647521972656, "learning_rate": 1.9830670826479463e-06, "loss": 19.6172, "step": 9181 }, { "epoch": 0.08691701138762413, "grad_norm": 253.29685974121094, "learning_rate": 1.983061464163163e-06, "loss": 24.6797, "step": 9182 }, { "epoch": 0.08692647740933918, "grad_norm": 166.2800750732422, "learning_rate": 1.9830558447543667e-06, "loss": 8.0469, "step": 9183 }, { "epoch": 0.08693594343105424, "grad_norm": 488.0263977050781, "learning_rate": 1.9830502244215615e-06, "loss": 58.625, "step": 9184 }, { "epoch": 0.08694540945276928, "grad_norm": 422.2090148925781, "learning_rate": 1.9830446031647534e-06, "loss": 15.2578, "step": 9185 }, { "epoch": 0.08695487547448434, "grad_norm": 586.7027587890625, "learning_rate": 1.9830389809839468e-06, "loss": 52.6719, "step": 9186 }, { "epoch": 0.08696434149619939, "grad_norm": 226.85586547851562, "learning_rate": 1.983033357879148e-06, "loss": 19.0234, "step": 9187 }, { "epoch": 0.08697380751791445, "grad_norm": 370.0135192871094, "learning_rate": 1.9830277338503614e-06, "loss": 26.8984, "step": 9188 }, { "epoch": 0.0869832735396295, "grad_norm": 326.68426513671875, "learning_rate": 1.9830221088975932e-06, "loss": 23.0977, "step": 9189 }, { "epoch": 0.08699273956134455, "grad_norm": 721.1836547851562, "learning_rate": 1.9830164830208477e-06, "loss": 23.8281, "step": 9190 }, { "epoch": 0.08700220558305961, "grad_norm": 259.8715515136719, "learning_rate": 1.983010856220131e-06, "loss": 19.8672, "step": 9191 }, { "epoch": 0.08701167160477466, "grad_norm": 525.2345581054688, "learning_rate": 1.9830052284954476e-06, "loss": 40.4062, "step": 9192 }, { "epoch": 0.08702113762648972, "grad_norm": 325.6142883300781, "learning_rate": 1.9829995998468036e-06, "loss": 27.1562, "step": 9193 }, { "epoch": 0.08703060364820477, "grad_norm": 398.7509460449219, "learning_rate": 1.982993970274204e-06, "loss": 31.0938, "step": 9194 }, { "epoch": 0.08704006966991983, "grad_norm": 477.3588562011719, "learning_rate": 1.982988339777654e-06, "loss": 49.9062, "step": 9195 }, { "epoch": 0.08704953569163487, "grad_norm": 395.7779846191406, "learning_rate": 1.9829827083571585e-06, "loss": 49.0156, "step": 9196 }, { "epoch": 0.08705900171334993, "grad_norm": 284.2933044433594, "learning_rate": 1.9829770760127235e-06, "loss": 32.5781, "step": 9197 }, { "epoch": 0.08706846773506498, "grad_norm": 359.8265686035156, "learning_rate": 1.9829714427443538e-06, "loss": 24.3125, "step": 9198 }, { "epoch": 0.08707793375678004, "grad_norm": 398.5655822753906, "learning_rate": 1.982965808552055e-06, "loss": 38.9375, "step": 9199 }, { "epoch": 0.0870873997784951, "grad_norm": 290.1617736816406, "learning_rate": 1.982960173435832e-06, "loss": 24.5, "step": 9200 }, { "epoch": 0.08709686580021014, "grad_norm": 422.3478698730469, "learning_rate": 1.9829545373956906e-06, "loss": 21.7695, "step": 9201 }, { "epoch": 0.0871063318219252, "grad_norm": 231.11801147460938, "learning_rate": 1.982948900431636e-06, "loss": 23.3984, "step": 9202 }, { "epoch": 0.08711579784364025, "grad_norm": 397.3639221191406, "learning_rate": 1.9829432625436733e-06, "loss": 19.1367, "step": 9203 }, { "epoch": 0.08712526386535531, "grad_norm": 194.36087036132812, "learning_rate": 1.9829376237318084e-06, "loss": 25.9688, "step": 9204 }, { "epoch": 0.08713472988707036, "grad_norm": 286.4955139160156, "learning_rate": 1.9829319839960453e-06, "loss": 34.8828, "step": 9205 }, { "epoch": 0.08714419590878542, "grad_norm": 276.7548828125, "learning_rate": 1.9829263433363907e-06, "loss": 24.1328, "step": 9206 }, { "epoch": 0.08715366193050046, "grad_norm": 451.14215087890625, "learning_rate": 1.9829207017528485e-06, "loss": 21.875, "step": 9207 }, { "epoch": 0.08716312795221552, "grad_norm": 319.40692138671875, "learning_rate": 1.9829150592454255e-06, "loss": 40.2969, "step": 9208 }, { "epoch": 0.08717259397393058, "grad_norm": 557.6298828125, "learning_rate": 1.9829094158141258e-06, "loss": 48.7891, "step": 9209 }, { "epoch": 0.08718205999564563, "grad_norm": 363.0107116699219, "learning_rate": 1.9829037714589554e-06, "loss": 20.6641, "step": 9210 }, { "epoch": 0.08719152601736069, "grad_norm": 669.59716796875, "learning_rate": 1.98289812617992e-06, "loss": 55.2969, "step": 9211 }, { "epoch": 0.08720099203907573, "grad_norm": 231.87112426757812, "learning_rate": 1.9828924799770233e-06, "loss": 20.2188, "step": 9212 }, { "epoch": 0.0872104580607908, "grad_norm": 422.0999450683594, "learning_rate": 1.982886832850272e-06, "loss": 53.7031, "step": 9213 }, { "epoch": 0.08721992408250584, "grad_norm": 371.87664794921875, "learning_rate": 1.982881184799671e-06, "loss": 14.7383, "step": 9214 }, { "epoch": 0.0872293901042209, "grad_norm": 456.50335693359375, "learning_rate": 1.9828755358252256e-06, "loss": 37.8438, "step": 9215 }, { "epoch": 0.08723885612593596, "grad_norm": 504.8052978515625, "learning_rate": 1.9828698859269416e-06, "loss": 10.3633, "step": 9216 }, { "epoch": 0.087248322147651, "grad_norm": 650.5972290039062, "learning_rate": 1.9828642351048234e-06, "loss": 37.8594, "step": 9217 }, { "epoch": 0.08725778816936607, "grad_norm": 1000.7470703125, "learning_rate": 1.9828585833588767e-06, "loss": 37.1172, "step": 9218 }, { "epoch": 0.08726725419108111, "grad_norm": 166.939208984375, "learning_rate": 1.9828529306891073e-06, "loss": 23.3594, "step": 9219 }, { "epoch": 0.08727672021279617, "grad_norm": 245.42166137695312, "learning_rate": 1.9828472770955192e-06, "loss": 14.4297, "step": 9220 }, { "epoch": 0.08728618623451122, "grad_norm": 290.3560791015625, "learning_rate": 1.9828416225781194e-06, "loss": 14.8359, "step": 9221 }, { "epoch": 0.08729565225622628, "grad_norm": 295.231201171875, "learning_rate": 1.9828359671369123e-06, "loss": 9.7695, "step": 9222 }, { "epoch": 0.08730511827794132, "grad_norm": 679.3731689453125, "learning_rate": 1.982830310771903e-06, "loss": 52.8438, "step": 9223 }, { "epoch": 0.08731458429965638, "grad_norm": 581.2213134765625, "learning_rate": 1.982824653483098e-06, "loss": 24.4766, "step": 9224 }, { "epoch": 0.08732405032137144, "grad_norm": 462.62103271484375, "learning_rate": 1.9828189952705007e-06, "loss": 24.9766, "step": 9225 }, { "epoch": 0.08733351634308649, "grad_norm": 726.2408447265625, "learning_rate": 1.982813336134118e-06, "loss": 58.0156, "step": 9226 }, { "epoch": 0.08734298236480155, "grad_norm": 824.8374633789062, "learning_rate": 1.9828076760739547e-06, "loss": 53.6641, "step": 9227 }, { "epoch": 0.0873524483865166, "grad_norm": 272.56878662109375, "learning_rate": 1.9828020150900156e-06, "loss": 26.4062, "step": 9228 }, { "epoch": 0.08736191440823166, "grad_norm": 633.4901733398438, "learning_rate": 1.9827963531823074e-06, "loss": 22.0625, "step": 9229 }, { "epoch": 0.0873713804299467, "grad_norm": 466.4870300292969, "learning_rate": 1.9827906903508336e-06, "loss": 28.375, "step": 9230 }, { "epoch": 0.08738084645166176, "grad_norm": 354.4967956542969, "learning_rate": 1.9827850265956014e-06, "loss": 30.207, "step": 9231 }, { "epoch": 0.08739031247337681, "grad_norm": 222.26382446289062, "learning_rate": 1.9827793619166145e-06, "loss": 24.6016, "step": 9232 }, { "epoch": 0.08739977849509187, "grad_norm": 638.90185546875, "learning_rate": 1.9827736963138797e-06, "loss": 40.1953, "step": 9233 }, { "epoch": 0.08740924451680693, "grad_norm": 208.5233612060547, "learning_rate": 1.982768029787401e-06, "loss": 21.1172, "step": 9234 }, { "epoch": 0.08741871053852197, "grad_norm": 198.8594207763672, "learning_rate": 1.9827623623371843e-06, "loss": 14.4102, "step": 9235 }, { "epoch": 0.08742817656023703, "grad_norm": 872.0149536132812, "learning_rate": 1.982756693963235e-06, "loss": 55.6406, "step": 9236 }, { "epoch": 0.08743764258195208, "grad_norm": 204.7392578125, "learning_rate": 1.9827510246655584e-06, "loss": 27.1406, "step": 9237 }, { "epoch": 0.08744710860366714, "grad_norm": 411.25634765625, "learning_rate": 1.9827453544441596e-06, "loss": 26.4219, "step": 9238 }, { "epoch": 0.08745657462538219, "grad_norm": 199.57626342773438, "learning_rate": 1.9827396832990443e-06, "loss": 18.2891, "step": 9239 }, { "epoch": 0.08746604064709725, "grad_norm": 301.3692626953125, "learning_rate": 1.9827340112302175e-06, "loss": 21.6328, "step": 9240 }, { "epoch": 0.08747550666881229, "grad_norm": 225.6087188720703, "learning_rate": 1.982728338237685e-06, "loss": 22.0547, "step": 9241 }, { "epoch": 0.08748497269052735, "grad_norm": 167.18060302734375, "learning_rate": 1.9827226643214513e-06, "loss": 16.625, "step": 9242 }, { "epoch": 0.08749443871224241, "grad_norm": 176.27557373046875, "learning_rate": 1.9827169894815223e-06, "loss": 15.3477, "step": 9243 }, { "epoch": 0.08750390473395746, "grad_norm": 435.64154052734375, "learning_rate": 1.9827113137179036e-06, "loss": 38.5312, "step": 9244 }, { "epoch": 0.08751337075567252, "grad_norm": 259.5101318359375, "learning_rate": 1.9827056370306e-06, "loss": 25.2852, "step": 9245 }, { "epoch": 0.08752283677738756, "grad_norm": 462.1337585449219, "learning_rate": 1.9826999594196168e-06, "loss": 50.7656, "step": 9246 }, { "epoch": 0.08753230279910262, "grad_norm": 365.2279968261719, "learning_rate": 1.98269428088496e-06, "loss": 20.9609, "step": 9247 }, { "epoch": 0.08754176882081767, "grad_norm": 680.7598266601562, "learning_rate": 1.982688601426634e-06, "loss": 33.3047, "step": 9248 }, { "epoch": 0.08755123484253273, "grad_norm": 465.4015808105469, "learning_rate": 1.982682921044645e-06, "loss": 31.5625, "step": 9249 }, { "epoch": 0.08756070086424778, "grad_norm": 332.328369140625, "learning_rate": 1.982677239738998e-06, "loss": 23.6328, "step": 9250 }, { "epoch": 0.08757016688596284, "grad_norm": 345.8831787109375, "learning_rate": 1.982671557509698e-06, "loss": 27.0703, "step": 9251 }, { "epoch": 0.0875796329076779, "grad_norm": 263.89886474609375, "learning_rate": 1.982665874356751e-06, "loss": 29.7656, "step": 9252 }, { "epoch": 0.08758909892939294, "grad_norm": 209.02186584472656, "learning_rate": 1.982660190280162e-06, "loss": 25.8203, "step": 9253 }, { "epoch": 0.087598564951108, "grad_norm": 264.72015380859375, "learning_rate": 1.982654505279936e-06, "loss": 18.375, "step": 9254 }, { "epoch": 0.08760803097282305, "grad_norm": 355.1997375488281, "learning_rate": 1.982648819356079e-06, "loss": 19.5625, "step": 9255 }, { "epoch": 0.08761749699453811, "grad_norm": 261.6010437011719, "learning_rate": 1.9826431325085963e-06, "loss": 21.5859, "step": 9256 }, { "epoch": 0.08762696301625315, "grad_norm": 604.2498779296875, "learning_rate": 1.9826374447374927e-06, "loss": 49.5469, "step": 9257 }, { "epoch": 0.08763642903796821, "grad_norm": 417.19268798828125, "learning_rate": 1.9826317560427738e-06, "loss": 27.5156, "step": 9258 }, { "epoch": 0.08764589505968327, "grad_norm": 615.6971435546875, "learning_rate": 1.9826260664244446e-06, "loss": 27.6797, "step": 9259 }, { "epoch": 0.08765536108139832, "grad_norm": 776.2155151367188, "learning_rate": 1.982620375882511e-06, "loss": 19.2188, "step": 9260 }, { "epoch": 0.08766482710311338, "grad_norm": 626.379150390625, "learning_rate": 1.982614684416979e-06, "loss": 32.1523, "step": 9261 }, { "epoch": 0.08767429312482843, "grad_norm": 306.3669738769531, "learning_rate": 1.982608992027852e-06, "loss": 22.8203, "step": 9262 }, { "epoch": 0.08768375914654349, "grad_norm": 430.14178466796875, "learning_rate": 1.982603298715137e-06, "loss": 41.3906, "step": 9263 }, { "epoch": 0.08769322516825853, "grad_norm": 358.7335510253906, "learning_rate": 1.982597604478839e-06, "loss": 30.6641, "step": 9264 }, { "epoch": 0.08770269118997359, "grad_norm": 679.46630859375, "learning_rate": 1.9825919093189626e-06, "loss": 66.2969, "step": 9265 }, { "epoch": 0.08771215721168864, "grad_norm": 789.9998168945312, "learning_rate": 1.9825862132355143e-06, "loss": 82.0, "step": 9266 }, { "epoch": 0.0877216232334037, "grad_norm": 177.16490173339844, "learning_rate": 1.9825805162284985e-06, "loss": 19.0078, "step": 9267 }, { "epoch": 0.08773108925511876, "grad_norm": 228.80386352539062, "learning_rate": 1.982574818297921e-06, "loss": 22.75, "step": 9268 }, { "epoch": 0.0877405552768338, "grad_norm": 622.7556762695312, "learning_rate": 1.9825691194437874e-06, "loss": 37.4531, "step": 9269 }, { "epoch": 0.08775002129854886, "grad_norm": 463.32025146484375, "learning_rate": 1.9825634196661023e-06, "loss": 19.0898, "step": 9270 }, { "epoch": 0.08775948732026391, "grad_norm": 2.8457183837890625, "learning_rate": 1.9825577189648718e-06, "loss": 0.958, "step": 9271 }, { "epoch": 0.08776895334197897, "grad_norm": 399.63580322265625, "learning_rate": 1.982552017340101e-06, "loss": 25.3047, "step": 9272 }, { "epoch": 0.08777841936369402, "grad_norm": 2.5856130123138428, "learning_rate": 1.9825463147917947e-06, "loss": 0.8413, "step": 9273 }, { "epoch": 0.08778788538540908, "grad_norm": 280.9276123046875, "learning_rate": 1.9825406113199596e-06, "loss": 28.3125, "step": 9274 }, { "epoch": 0.08779735140712412, "grad_norm": 209.05191040039062, "learning_rate": 1.9825349069246e-06, "loss": 19.1953, "step": 9275 }, { "epoch": 0.08780681742883918, "grad_norm": 604.0938110351562, "learning_rate": 1.9825292016057213e-06, "loss": 48.5781, "step": 9276 }, { "epoch": 0.08781628345055424, "grad_norm": 308.3072509765625, "learning_rate": 1.982523495363329e-06, "loss": 26.4258, "step": 9277 }, { "epoch": 0.08782574947226929, "grad_norm": 573.1517333984375, "learning_rate": 1.9825177881974288e-06, "loss": 41.8672, "step": 9278 }, { "epoch": 0.08783521549398435, "grad_norm": 479.74761962890625, "learning_rate": 1.9825120801080254e-06, "loss": 25.2383, "step": 9279 }, { "epoch": 0.0878446815156994, "grad_norm": 854.904541015625, "learning_rate": 1.9825063710951248e-06, "loss": 47.5312, "step": 9280 }, { "epoch": 0.08785414753741445, "grad_norm": 388.1696472167969, "learning_rate": 1.982500661158732e-06, "loss": 44.7031, "step": 9281 }, { "epoch": 0.0878636135591295, "grad_norm": 473.7981262207031, "learning_rate": 1.982494950298853e-06, "loss": 59.2109, "step": 9282 }, { "epoch": 0.08787307958084456, "grad_norm": 2.8932385444641113, "learning_rate": 1.9824892385154918e-06, "loss": 1.0166, "step": 9283 }, { "epoch": 0.0878825456025596, "grad_norm": 216.54718017578125, "learning_rate": 1.9824835258086555e-06, "loss": 23.5859, "step": 9284 }, { "epoch": 0.08789201162427467, "grad_norm": 263.5341491699219, "learning_rate": 1.982477812178348e-06, "loss": 10.0898, "step": 9285 }, { "epoch": 0.08790147764598973, "grad_norm": 517.8690185546875, "learning_rate": 1.9824720976245754e-06, "loss": 59.5781, "step": 9286 }, { "epoch": 0.08791094366770477, "grad_norm": 496.2189636230469, "learning_rate": 1.982466382147343e-06, "loss": 33.7656, "step": 9287 }, { "epoch": 0.08792040968941983, "grad_norm": 283.5580749511719, "learning_rate": 1.982460665746656e-06, "loss": 20.3516, "step": 9288 }, { "epoch": 0.08792987571113488, "grad_norm": 935.7689208984375, "learning_rate": 1.98245494842252e-06, "loss": 50.5781, "step": 9289 }, { "epoch": 0.08793934173284994, "grad_norm": 875.5374145507812, "learning_rate": 1.9824492301749406e-06, "loss": 57.6406, "step": 9290 }, { "epoch": 0.08794880775456498, "grad_norm": 498.2384338378906, "learning_rate": 1.9824435110039224e-06, "loss": 36.8438, "step": 9291 }, { "epoch": 0.08795827377628004, "grad_norm": 200.67340087890625, "learning_rate": 1.9824377909094715e-06, "loss": 14.8164, "step": 9292 }, { "epoch": 0.08796773979799509, "grad_norm": 393.6011657714844, "learning_rate": 1.982432069891593e-06, "loss": 39.5781, "step": 9293 }, { "epoch": 0.08797720581971015, "grad_norm": 3.336850643157959, "learning_rate": 1.982426347950292e-06, "loss": 0.6831, "step": 9294 }, { "epoch": 0.08798667184142521, "grad_norm": 638.999267578125, "learning_rate": 1.982420625085574e-06, "loss": 36.5859, "step": 9295 }, { "epoch": 0.08799613786314026, "grad_norm": 288.67596435546875, "learning_rate": 1.982414901297445e-06, "loss": 23.4688, "step": 9296 }, { "epoch": 0.08800560388485532, "grad_norm": 305.5658874511719, "learning_rate": 1.9824091765859093e-06, "loss": 27.3125, "step": 9297 }, { "epoch": 0.08801506990657036, "grad_norm": 190.75408935546875, "learning_rate": 1.9824034509509735e-06, "loss": 23.7891, "step": 9298 }, { "epoch": 0.08802453592828542, "grad_norm": 1700.4879150390625, "learning_rate": 1.9823977243926418e-06, "loss": 38.3828, "step": 9299 }, { "epoch": 0.08803400195000047, "grad_norm": 468.86761474609375, "learning_rate": 1.9823919969109205e-06, "loss": 46.5391, "step": 9300 }, { "epoch": 0.08804346797171553, "grad_norm": 173.03955078125, "learning_rate": 1.9823862685058146e-06, "loss": 25.625, "step": 9301 }, { "epoch": 0.08805293399343059, "grad_norm": 245.9062042236328, "learning_rate": 1.98238053917733e-06, "loss": 16.9609, "step": 9302 }, { "epoch": 0.08806240001514563, "grad_norm": 572.93310546875, "learning_rate": 1.9823748089254707e-06, "loss": 40.1172, "step": 9303 }, { "epoch": 0.0880718660368607, "grad_norm": 533.8823852539062, "learning_rate": 1.9823690777502436e-06, "loss": 10.7656, "step": 9304 }, { "epoch": 0.08808133205857574, "grad_norm": 2.535731315612793, "learning_rate": 1.982363345651653e-06, "loss": 0.8491, "step": 9305 }, { "epoch": 0.0880907980802908, "grad_norm": 356.3128356933594, "learning_rate": 1.9823576126297053e-06, "loss": 41.1875, "step": 9306 }, { "epoch": 0.08810026410200585, "grad_norm": 602.2106323242188, "learning_rate": 1.982351878684405e-06, "loss": 39.2656, "step": 9307 }, { "epoch": 0.0881097301237209, "grad_norm": 152.7186737060547, "learning_rate": 1.9823461438157582e-06, "loss": 24.0938, "step": 9308 }, { "epoch": 0.08811919614543595, "grad_norm": 170.76805114746094, "learning_rate": 1.9823404080237695e-06, "loss": 18.8672, "step": 9309 }, { "epoch": 0.08812866216715101, "grad_norm": 278.7886047363281, "learning_rate": 1.9823346713084447e-06, "loss": 9.5273, "step": 9310 }, { "epoch": 0.08813812818886607, "grad_norm": 183.64280700683594, "learning_rate": 1.9823289336697895e-06, "loss": 18.6406, "step": 9311 }, { "epoch": 0.08814759421058112, "grad_norm": 274.5676574707031, "learning_rate": 1.982323195107809e-06, "loss": 12.9883, "step": 9312 }, { "epoch": 0.08815706023229618, "grad_norm": 2.7212166786193848, "learning_rate": 1.9823174556225086e-06, "loss": 0.7988, "step": 9313 }, { "epoch": 0.08816652625401122, "grad_norm": 232.03269958496094, "learning_rate": 1.9823117152138935e-06, "loss": 10.9453, "step": 9314 }, { "epoch": 0.08817599227572628, "grad_norm": 427.5469055175781, "learning_rate": 1.9823059738819693e-06, "loss": 41.5938, "step": 9315 }, { "epoch": 0.08818545829744133, "grad_norm": 328.6736145019531, "learning_rate": 1.9823002316267414e-06, "loss": 29.1562, "step": 9316 }, { "epoch": 0.08819492431915639, "grad_norm": 353.6867980957031, "learning_rate": 1.9822944884482155e-06, "loss": 30.3203, "step": 9317 }, { "epoch": 0.08820439034087144, "grad_norm": 1618.1947021484375, "learning_rate": 1.982288744346396e-06, "loss": 48.2188, "step": 9318 }, { "epoch": 0.0882138563625865, "grad_norm": 641.8336181640625, "learning_rate": 1.9822829993212897e-06, "loss": 39.8672, "step": 9319 }, { "epoch": 0.08822332238430156, "grad_norm": 442.0866394042969, "learning_rate": 1.982277253372901e-06, "loss": 41.75, "step": 9320 }, { "epoch": 0.0882327884060166, "grad_norm": 220.56227111816406, "learning_rate": 1.9822715065012354e-06, "loss": 19.9844, "step": 9321 }, { "epoch": 0.08824225442773166, "grad_norm": 324.9588623046875, "learning_rate": 1.982265758706299e-06, "loss": 34.1406, "step": 9322 }, { "epoch": 0.08825172044944671, "grad_norm": 278.526611328125, "learning_rate": 1.9822600099880963e-06, "loss": 20.9844, "step": 9323 }, { "epoch": 0.08826118647116177, "grad_norm": 696.2850341796875, "learning_rate": 1.982254260346633e-06, "loss": 24.4219, "step": 9324 }, { "epoch": 0.08827065249287681, "grad_norm": 254.92588806152344, "learning_rate": 1.9822485097819147e-06, "loss": 17.6602, "step": 9325 }, { "epoch": 0.08828011851459187, "grad_norm": 400.280029296875, "learning_rate": 1.982242758293947e-06, "loss": 18.1328, "step": 9326 }, { "epoch": 0.08828958453630692, "grad_norm": 544.7694702148438, "learning_rate": 1.9822370058827347e-06, "loss": 59.3438, "step": 9327 }, { "epoch": 0.08829905055802198, "grad_norm": 326.2803039550781, "learning_rate": 1.9822312525482837e-06, "loss": 26.875, "step": 9328 }, { "epoch": 0.08830851657973704, "grad_norm": 368.6722717285156, "learning_rate": 1.982225498290599e-06, "loss": 27.2109, "step": 9329 }, { "epoch": 0.08831798260145209, "grad_norm": 413.07440185546875, "learning_rate": 1.9822197431096865e-06, "loss": 41.9219, "step": 9330 }, { "epoch": 0.08832744862316715, "grad_norm": 305.7964172363281, "learning_rate": 1.982213987005551e-06, "loss": 23.1562, "step": 9331 }, { "epoch": 0.08833691464488219, "grad_norm": 327.628662109375, "learning_rate": 1.982208229978198e-06, "loss": 41.2188, "step": 9332 }, { "epoch": 0.08834638066659725, "grad_norm": 2.959294080734253, "learning_rate": 1.982202472027634e-06, "loss": 1.0142, "step": 9333 }, { "epoch": 0.0883558466883123, "grad_norm": 505.7881774902344, "learning_rate": 1.982196713153863e-06, "loss": 35.375, "step": 9334 }, { "epoch": 0.08836531271002736, "grad_norm": 355.1046142578125, "learning_rate": 1.9821909533568914e-06, "loss": 23.1719, "step": 9335 }, { "epoch": 0.0883747787317424, "grad_norm": 311.53863525390625, "learning_rate": 1.9821851926367235e-06, "loss": 45.4531, "step": 9336 }, { "epoch": 0.08838424475345746, "grad_norm": 343.7705078125, "learning_rate": 1.982179430993366e-06, "loss": 17.0586, "step": 9337 }, { "epoch": 0.08839371077517252, "grad_norm": 495.4217529296875, "learning_rate": 1.9821736684268238e-06, "loss": 33.7812, "step": 9338 }, { "epoch": 0.08840317679688757, "grad_norm": 202.22962951660156, "learning_rate": 1.9821679049371017e-06, "loss": 10.1719, "step": 9339 }, { "epoch": 0.08841264281860263, "grad_norm": 300.59844970703125, "learning_rate": 1.982162140524206e-06, "loss": 28.5391, "step": 9340 }, { "epoch": 0.08842210884031768, "grad_norm": 206.52919006347656, "learning_rate": 1.9821563751881418e-06, "loss": 23.5703, "step": 9341 }, { "epoch": 0.08843157486203274, "grad_norm": 657.4034423828125, "learning_rate": 1.9821506089289146e-06, "loss": 34.0625, "step": 9342 }, { "epoch": 0.08844104088374778, "grad_norm": 3.3939132690429688, "learning_rate": 1.9821448417465295e-06, "loss": 1.0078, "step": 9343 }, { "epoch": 0.08845050690546284, "grad_norm": 566.9185180664062, "learning_rate": 1.9821390736409924e-06, "loss": 36.9375, "step": 9344 }, { "epoch": 0.0884599729271779, "grad_norm": 288.721923828125, "learning_rate": 1.982133304612308e-06, "loss": 16.8594, "step": 9345 }, { "epoch": 0.08846943894889295, "grad_norm": 544.5038452148438, "learning_rate": 1.9821275346604826e-06, "loss": 38.8594, "step": 9346 }, { "epoch": 0.08847890497060801, "grad_norm": 3.6464970111846924, "learning_rate": 1.982121763785521e-06, "loss": 0.8872, "step": 9347 }, { "epoch": 0.08848837099232305, "grad_norm": 465.5851135253906, "learning_rate": 1.982115991987429e-06, "loss": 29.0, "step": 9348 }, { "epoch": 0.08849783701403811, "grad_norm": 572.0011596679688, "learning_rate": 1.982110219266212e-06, "loss": 35.5391, "step": 9349 }, { "epoch": 0.08850730303575316, "grad_norm": 692.63671875, "learning_rate": 1.982104445621875e-06, "loss": 68.6133, "step": 9350 }, { "epoch": 0.08851676905746822, "grad_norm": 372.8194580078125, "learning_rate": 1.9820986710544236e-06, "loss": 31.9688, "step": 9351 }, { "epoch": 0.08852623507918327, "grad_norm": 260.6785888671875, "learning_rate": 1.9820928955638637e-06, "loss": 22.0156, "step": 9352 }, { "epoch": 0.08853570110089833, "grad_norm": 192.28460693359375, "learning_rate": 1.9820871191502e-06, "loss": 19.9688, "step": 9353 }, { "epoch": 0.08854516712261339, "grad_norm": 698.9056396484375, "learning_rate": 1.9820813418134384e-06, "loss": 40.4375, "step": 9354 }, { "epoch": 0.08855463314432843, "grad_norm": 269.8890075683594, "learning_rate": 1.9820755635535843e-06, "loss": 13.7695, "step": 9355 }, { "epoch": 0.08856409916604349, "grad_norm": 240.4335479736328, "learning_rate": 1.982069784370643e-06, "loss": 16.8984, "step": 9356 }, { "epoch": 0.08857356518775854, "grad_norm": 505.01861572265625, "learning_rate": 1.98206400426462e-06, "loss": 44.0938, "step": 9357 }, { "epoch": 0.0885830312094736, "grad_norm": 214.09149169921875, "learning_rate": 1.982058223235521e-06, "loss": 20.2734, "step": 9358 }, { "epoch": 0.08859249723118864, "grad_norm": 3.2525599002838135, "learning_rate": 1.982052441283351e-06, "loss": 0.9893, "step": 9359 }, { "epoch": 0.0886019632529037, "grad_norm": 303.97930908203125, "learning_rate": 1.9820466584081156e-06, "loss": 24.2031, "step": 9360 }, { "epoch": 0.08861142927461875, "grad_norm": 694.4467163085938, "learning_rate": 1.98204087460982e-06, "loss": 65.75, "step": 9361 }, { "epoch": 0.08862089529633381, "grad_norm": 488.81329345703125, "learning_rate": 1.9820350898884698e-06, "loss": 26.6484, "step": 9362 }, { "epoch": 0.08863036131804887, "grad_norm": 175.79751586914062, "learning_rate": 1.982029304244071e-06, "loss": 17.8945, "step": 9363 }, { "epoch": 0.08863982733976392, "grad_norm": 288.3592834472656, "learning_rate": 1.982023517676628e-06, "loss": 32.2578, "step": 9364 }, { "epoch": 0.08864929336147898, "grad_norm": 542.0755004882812, "learning_rate": 1.9820177301861473e-06, "loss": 18.1719, "step": 9365 }, { "epoch": 0.08865875938319402, "grad_norm": 230.34359741210938, "learning_rate": 1.9820119417726336e-06, "loss": 18.5469, "step": 9366 }, { "epoch": 0.08866822540490908, "grad_norm": 477.7102355957031, "learning_rate": 1.982006152436092e-06, "loss": 50.4688, "step": 9367 }, { "epoch": 0.08867769142662413, "grad_norm": 855.0917358398438, "learning_rate": 1.9820003621765294e-06, "loss": 65.0469, "step": 9368 }, { "epoch": 0.08868715744833919, "grad_norm": 362.2286376953125, "learning_rate": 1.98199457099395e-06, "loss": 23.7812, "step": 9369 }, { "epoch": 0.08869662347005423, "grad_norm": 523.5137939453125, "learning_rate": 1.9819887788883593e-06, "loss": 23.0156, "step": 9370 }, { "epoch": 0.0887060894917693, "grad_norm": 256.0611877441406, "learning_rate": 1.9819829858597633e-06, "loss": 19.9141, "step": 9371 }, { "epoch": 0.08871555551348435, "grad_norm": 424.1212158203125, "learning_rate": 1.9819771919081673e-06, "loss": 23.1797, "step": 9372 }, { "epoch": 0.0887250215351994, "grad_norm": 500.6457824707031, "learning_rate": 1.981971397033576e-06, "loss": 47.5469, "step": 9373 }, { "epoch": 0.08873448755691446, "grad_norm": 261.1625061035156, "learning_rate": 1.9819656012359964e-06, "loss": 10.4844, "step": 9374 }, { "epoch": 0.0887439535786295, "grad_norm": 194.53831481933594, "learning_rate": 1.9819598045154324e-06, "loss": 17.8359, "step": 9375 }, { "epoch": 0.08875341960034457, "grad_norm": 485.48126220703125, "learning_rate": 1.9819540068718905e-06, "loss": 26.3906, "step": 9376 }, { "epoch": 0.08876288562205961, "grad_norm": 526.7757568359375, "learning_rate": 1.9819482083053753e-06, "loss": 36.8594, "step": 9377 }, { "epoch": 0.08877235164377467, "grad_norm": 271.79937744140625, "learning_rate": 1.9819424088158928e-06, "loss": 34.1094, "step": 9378 }, { "epoch": 0.08878181766548972, "grad_norm": 3.0203287601470947, "learning_rate": 1.981936608403448e-06, "loss": 0.8042, "step": 9379 }, { "epoch": 0.08879128368720478, "grad_norm": 1130.525146484375, "learning_rate": 1.9819308070680473e-06, "loss": 57.5391, "step": 9380 }, { "epoch": 0.08880074970891984, "grad_norm": 358.7857360839844, "learning_rate": 1.981925004809695e-06, "loss": 30.6641, "step": 9381 }, { "epoch": 0.08881021573063488, "grad_norm": 337.1576843261719, "learning_rate": 1.9819192016283973e-06, "loss": 24.7188, "step": 9382 }, { "epoch": 0.08881968175234994, "grad_norm": 192.81781005859375, "learning_rate": 1.9819133975241595e-06, "loss": 19.7344, "step": 9383 }, { "epoch": 0.08882914777406499, "grad_norm": 2.704434633255005, "learning_rate": 1.981907592496987e-06, "loss": 0.894, "step": 9384 }, { "epoch": 0.08883861379578005, "grad_norm": 440.28973388671875, "learning_rate": 1.981901786546885e-06, "loss": 31.4219, "step": 9385 }, { "epoch": 0.0888480798174951, "grad_norm": 316.3153076171875, "learning_rate": 1.9818959796738596e-06, "loss": 20.9844, "step": 9386 }, { "epoch": 0.08885754583921016, "grad_norm": 207.21246337890625, "learning_rate": 1.9818901718779154e-06, "loss": 23.8125, "step": 9387 }, { "epoch": 0.08886701186092522, "grad_norm": 303.5054626464844, "learning_rate": 1.981884363159058e-06, "loss": 27.3242, "step": 9388 }, { "epoch": 0.08887647788264026, "grad_norm": 318.9969787597656, "learning_rate": 1.981878553517294e-06, "loss": 19.7656, "step": 9389 }, { "epoch": 0.08888594390435532, "grad_norm": 388.9466552734375, "learning_rate": 1.981872742952628e-06, "loss": 39.4453, "step": 9390 }, { "epoch": 0.08889540992607037, "grad_norm": 351.9349365234375, "learning_rate": 1.9818669314650647e-06, "loss": 30.625, "step": 9391 }, { "epoch": 0.08890487594778543, "grad_norm": 561.6893920898438, "learning_rate": 1.981861119054611e-06, "loss": 33.8516, "step": 9392 }, { "epoch": 0.08891434196950047, "grad_norm": 668.3001708984375, "learning_rate": 1.981855305721272e-06, "loss": 50.25, "step": 9393 }, { "epoch": 0.08892380799121553, "grad_norm": 312.4421081542969, "learning_rate": 1.9818494914650523e-06, "loss": 22.4453, "step": 9394 }, { "epoch": 0.08893327401293058, "grad_norm": 3.088897943496704, "learning_rate": 1.9818436762859578e-06, "loss": 0.9824, "step": 9395 }, { "epoch": 0.08894274003464564, "grad_norm": 494.02606201171875, "learning_rate": 1.9818378601839945e-06, "loss": 40.7969, "step": 9396 }, { "epoch": 0.0889522060563607, "grad_norm": 491.4949645996094, "learning_rate": 1.9818320431591673e-06, "loss": 36.5391, "step": 9397 }, { "epoch": 0.08896167207807575, "grad_norm": 3.1077561378479004, "learning_rate": 1.981826225211482e-06, "loss": 0.9062, "step": 9398 }, { "epoch": 0.0889711380997908, "grad_norm": 920.8452758789062, "learning_rate": 1.9818204063409442e-06, "loss": 78.9453, "step": 9399 }, { "epoch": 0.08898060412150585, "grad_norm": 361.4859924316406, "learning_rate": 1.9818145865475585e-06, "loss": 9.3516, "step": 9400 }, { "epoch": 0.08899007014322091, "grad_norm": 2.9828546047210693, "learning_rate": 1.9818087658313312e-06, "loss": 0.8882, "step": 9401 }, { "epoch": 0.08899953616493596, "grad_norm": 523.2352294921875, "learning_rate": 1.981802944192268e-06, "loss": 50.0469, "step": 9402 }, { "epoch": 0.08900900218665102, "grad_norm": 223.9906768798828, "learning_rate": 1.981797121630373e-06, "loss": 19.0156, "step": 9403 }, { "epoch": 0.08901846820836606, "grad_norm": 1066.9813232421875, "learning_rate": 1.9817912981456535e-06, "loss": 59.1133, "step": 9404 }, { "epoch": 0.08902793423008112, "grad_norm": 389.8690185546875, "learning_rate": 1.9817854737381134e-06, "loss": 21.6484, "step": 9405 }, { "epoch": 0.08903740025179618, "grad_norm": 387.2532958984375, "learning_rate": 1.981779648407759e-06, "loss": 51.5625, "step": 9406 }, { "epoch": 0.08904686627351123, "grad_norm": 480.5852355957031, "learning_rate": 1.9817738221545957e-06, "loss": 41.0625, "step": 9407 }, { "epoch": 0.08905633229522629, "grad_norm": 645.7107543945312, "learning_rate": 1.981767994978629e-06, "loss": 46.6094, "step": 9408 }, { "epoch": 0.08906579831694134, "grad_norm": 490.3713684082031, "learning_rate": 1.981762166879864e-06, "loss": 33.7109, "step": 9409 }, { "epoch": 0.0890752643386564, "grad_norm": 294.6695861816406, "learning_rate": 1.9817563378583062e-06, "loss": 20.2344, "step": 9410 }, { "epoch": 0.08908473036037144, "grad_norm": 617.3006591796875, "learning_rate": 1.9817505079139616e-06, "loss": 47.875, "step": 9411 }, { "epoch": 0.0890941963820865, "grad_norm": 316.7355651855469, "learning_rate": 1.981744677046835e-06, "loss": 30.9062, "step": 9412 }, { "epoch": 0.08910366240380155, "grad_norm": 643.7457275390625, "learning_rate": 1.9817388452569333e-06, "loss": 10.2539, "step": 9413 }, { "epoch": 0.08911312842551661, "grad_norm": 549.659423828125, "learning_rate": 1.98173301254426e-06, "loss": 60.25, "step": 9414 }, { "epoch": 0.08912259444723167, "grad_norm": 517.6708374023438, "learning_rate": 1.981727178908822e-06, "loss": 42.4062, "step": 9415 }, { "epoch": 0.08913206046894671, "grad_norm": 501.07904052734375, "learning_rate": 1.981721344350624e-06, "loss": 42.8594, "step": 9416 }, { "epoch": 0.08914152649066177, "grad_norm": 291.6895751953125, "learning_rate": 1.981715508869672e-06, "loss": 31.3047, "step": 9417 }, { "epoch": 0.08915099251237682, "grad_norm": 414.514404296875, "learning_rate": 1.9817096724659714e-06, "loss": 29.8203, "step": 9418 }, { "epoch": 0.08916045853409188, "grad_norm": 333.0033264160156, "learning_rate": 1.981703835139527e-06, "loss": 61.9531, "step": 9419 }, { "epoch": 0.08916992455580693, "grad_norm": 499.8777160644531, "learning_rate": 1.981697996890346e-06, "loss": 36.2891, "step": 9420 }, { "epoch": 0.08917939057752199, "grad_norm": 839.9603271484375, "learning_rate": 1.9816921577184316e-06, "loss": 9.4805, "step": 9421 }, { "epoch": 0.08918885659923703, "grad_norm": 189.38743591308594, "learning_rate": 1.981686317623791e-06, "loss": 19.6953, "step": 9422 }, { "epoch": 0.08919832262095209, "grad_norm": 348.5921325683594, "learning_rate": 1.981680476606429e-06, "loss": 22.3203, "step": 9423 }, { "epoch": 0.08920778864266715, "grad_norm": 461.4508972167969, "learning_rate": 1.981674634666351e-06, "loss": 21.8945, "step": 9424 }, { "epoch": 0.0892172546643822, "grad_norm": 152.05003356933594, "learning_rate": 1.981668791803563e-06, "loss": 19.9922, "step": 9425 }, { "epoch": 0.08922672068609726, "grad_norm": 542.9703979492188, "learning_rate": 1.98166294801807e-06, "loss": 54.9844, "step": 9426 }, { "epoch": 0.0892361867078123, "grad_norm": 416.2648620605469, "learning_rate": 1.981657103309878e-06, "loss": 23.2773, "step": 9427 }, { "epoch": 0.08924565272952736, "grad_norm": 2.7456905841827393, "learning_rate": 1.981651257678992e-06, "loss": 0.7986, "step": 9428 }, { "epoch": 0.08925511875124241, "grad_norm": 3.066311836242676, "learning_rate": 1.9816454111254175e-06, "loss": 0.8369, "step": 9429 }, { "epoch": 0.08926458477295747, "grad_norm": 288.93792724609375, "learning_rate": 1.9816395636491604e-06, "loss": 28.5938, "step": 9430 }, { "epoch": 0.08927405079467253, "grad_norm": 400.378173828125, "learning_rate": 1.9816337152502256e-06, "loss": 21.2109, "step": 9431 }, { "epoch": 0.08928351681638758, "grad_norm": 3.756993055343628, "learning_rate": 1.9816278659286196e-06, "loss": 1.0552, "step": 9432 }, { "epoch": 0.08929298283810264, "grad_norm": 382.3153076171875, "learning_rate": 1.981622015684347e-06, "loss": 25.6484, "step": 9433 }, { "epoch": 0.08930244885981768, "grad_norm": 320.0689392089844, "learning_rate": 1.981616164517413e-06, "loss": 11.3242, "step": 9434 }, { "epoch": 0.08931191488153274, "grad_norm": 216.01150512695312, "learning_rate": 1.9816103124278244e-06, "loss": 23.4297, "step": 9435 }, { "epoch": 0.08932138090324779, "grad_norm": 148.4545440673828, "learning_rate": 1.9816044594155857e-06, "loss": 20.6094, "step": 9436 }, { "epoch": 0.08933084692496285, "grad_norm": 526.0708618164062, "learning_rate": 1.9815986054807028e-06, "loss": 78.2656, "step": 9437 }, { "epoch": 0.0893403129466779, "grad_norm": 584.0662841796875, "learning_rate": 1.9815927506231807e-06, "loss": 32.0391, "step": 9438 }, { "epoch": 0.08934977896839295, "grad_norm": 284.74981689453125, "learning_rate": 1.981586894843026e-06, "loss": 40.5781, "step": 9439 }, { "epoch": 0.08935924499010801, "grad_norm": 283.4690246582031, "learning_rate": 1.9815810381402427e-06, "loss": 22.5234, "step": 9440 }, { "epoch": 0.08936871101182306, "grad_norm": 3.7398362159729004, "learning_rate": 1.9815751805148376e-06, "loss": 0.8989, "step": 9441 }, { "epoch": 0.08937817703353812, "grad_norm": 309.2441711425781, "learning_rate": 1.981569321966815e-06, "loss": 22.0391, "step": 9442 }, { "epoch": 0.08938764305525317, "grad_norm": 333.94097900390625, "learning_rate": 1.9815634624961818e-06, "loss": 22.125, "step": 9443 }, { "epoch": 0.08939710907696823, "grad_norm": 521.0274658203125, "learning_rate": 1.9815576021029424e-06, "loss": 27.0, "step": 9444 }, { "epoch": 0.08940657509868327, "grad_norm": 205.16038513183594, "learning_rate": 1.9815517407871033e-06, "loss": 19.1406, "step": 9445 }, { "epoch": 0.08941604112039833, "grad_norm": 724.651123046875, "learning_rate": 1.9815458785486688e-06, "loss": 64.9375, "step": 9446 }, { "epoch": 0.08942550714211338, "grad_norm": 418.0701904296875, "learning_rate": 1.981540015387645e-06, "loss": 50.5312, "step": 9447 }, { "epoch": 0.08943497316382844, "grad_norm": 3.225876569747925, "learning_rate": 1.981534151304038e-06, "loss": 0.8169, "step": 9448 }, { "epoch": 0.0894444391855435, "grad_norm": 167.68124389648438, "learning_rate": 1.9815282862978523e-06, "loss": 19.6328, "step": 9449 }, { "epoch": 0.08945390520725854, "grad_norm": 336.68408203125, "learning_rate": 1.9815224203690938e-06, "loss": 12.5117, "step": 9450 }, { "epoch": 0.0894633712289736, "grad_norm": 388.9901123046875, "learning_rate": 1.9815165535177686e-06, "loss": 16.3828, "step": 9451 }, { "epoch": 0.08947283725068865, "grad_norm": 363.2774353027344, "learning_rate": 1.9815106857438814e-06, "loss": 53.3828, "step": 9452 }, { "epoch": 0.08948230327240371, "grad_norm": 285.150390625, "learning_rate": 1.9815048170474377e-06, "loss": 28.6562, "step": 9453 }, { "epoch": 0.08949176929411876, "grad_norm": 398.74359130859375, "learning_rate": 1.9814989474284438e-06, "loss": 18.6953, "step": 9454 }, { "epoch": 0.08950123531583382, "grad_norm": 246.92210388183594, "learning_rate": 1.9814930768869044e-06, "loss": 17.8672, "step": 9455 }, { "epoch": 0.08951070133754886, "grad_norm": 229.83639526367188, "learning_rate": 1.9814872054228254e-06, "loss": 18.5, "step": 9456 }, { "epoch": 0.08952016735926392, "grad_norm": 373.35125732421875, "learning_rate": 1.9814813330362123e-06, "loss": 19.2656, "step": 9457 }, { "epoch": 0.08952963338097898, "grad_norm": 420.3739318847656, "learning_rate": 1.9814754597270707e-06, "loss": 36.3672, "step": 9458 }, { "epoch": 0.08953909940269403, "grad_norm": 228.38804626464844, "learning_rate": 1.981469585495406e-06, "loss": 13.7422, "step": 9459 }, { "epoch": 0.08954856542440909, "grad_norm": 170.71864318847656, "learning_rate": 1.9814637103412237e-06, "loss": 12.25, "step": 9460 }, { "epoch": 0.08955803144612413, "grad_norm": 436.7677001953125, "learning_rate": 1.9814578342645294e-06, "loss": 29.8594, "step": 9461 }, { "epoch": 0.0895674974678392, "grad_norm": 197.26287841796875, "learning_rate": 1.9814519572653285e-06, "loss": 23.375, "step": 9462 }, { "epoch": 0.08957696348955424, "grad_norm": 816.8046875, "learning_rate": 1.9814460793436266e-06, "loss": 23.3594, "step": 9463 }, { "epoch": 0.0895864295112693, "grad_norm": 398.2343444824219, "learning_rate": 1.981440200499429e-06, "loss": 30.1562, "step": 9464 }, { "epoch": 0.08959589553298435, "grad_norm": 1319.264404296875, "learning_rate": 1.9814343207327417e-06, "loss": 44.2578, "step": 9465 }, { "epoch": 0.0896053615546994, "grad_norm": 654.756103515625, "learning_rate": 1.98142844004357e-06, "loss": 42.4531, "step": 9466 }, { "epoch": 0.08961482757641447, "grad_norm": 218.04486083984375, "learning_rate": 1.9814225584319195e-06, "loss": 15.7109, "step": 9467 }, { "epoch": 0.08962429359812951, "grad_norm": 579.1248779296875, "learning_rate": 1.981416675897796e-06, "loss": 62.625, "step": 9468 }, { "epoch": 0.08963375961984457, "grad_norm": 525.848876953125, "learning_rate": 1.981410792441204e-06, "loss": 49.8906, "step": 9469 }, { "epoch": 0.08964322564155962, "grad_norm": 420.0704040527344, "learning_rate": 1.98140490806215e-06, "loss": 31.4297, "step": 9470 }, { "epoch": 0.08965269166327468, "grad_norm": 292.6224365234375, "learning_rate": 1.9813990227606392e-06, "loss": 17.3516, "step": 9471 }, { "epoch": 0.08966215768498972, "grad_norm": 3.1609318256378174, "learning_rate": 1.981393136536677e-06, "loss": 0.9102, "step": 9472 }, { "epoch": 0.08967162370670478, "grad_norm": 516.6405639648438, "learning_rate": 1.981387249390269e-06, "loss": 55.8438, "step": 9473 }, { "epoch": 0.08968108972841984, "grad_norm": 3.2265400886535645, "learning_rate": 1.9813813613214213e-06, "loss": 0.947, "step": 9474 }, { "epoch": 0.08969055575013489, "grad_norm": 206.45135498046875, "learning_rate": 1.981375472330139e-06, "loss": 18.3125, "step": 9475 }, { "epoch": 0.08970002177184995, "grad_norm": 175.5547637939453, "learning_rate": 1.981369582416427e-06, "loss": 24.4375, "step": 9476 }, { "epoch": 0.089709487793565, "grad_norm": 238.46664428710938, "learning_rate": 1.981363691580292e-06, "loss": 24.6797, "step": 9477 }, { "epoch": 0.08971895381528006, "grad_norm": 2.987643003463745, "learning_rate": 1.9813577998217385e-06, "loss": 0.8547, "step": 9478 }, { "epoch": 0.0897284198369951, "grad_norm": 306.7988586425781, "learning_rate": 1.9813519071407726e-06, "loss": 24.1016, "step": 9479 }, { "epoch": 0.08973788585871016, "grad_norm": 612.2770385742188, "learning_rate": 1.9813460135374004e-06, "loss": 44.9609, "step": 9480 }, { "epoch": 0.08974735188042521, "grad_norm": 394.6059265136719, "learning_rate": 1.9813401190116263e-06, "loss": 25.1875, "step": 9481 }, { "epoch": 0.08975681790214027, "grad_norm": 390.64093017578125, "learning_rate": 1.981334223563456e-06, "loss": 23.4062, "step": 9482 }, { "epoch": 0.08976628392385533, "grad_norm": 492.9010314941406, "learning_rate": 1.9813283271928956e-06, "loss": 57.2969, "step": 9483 }, { "epoch": 0.08977574994557037, "grad_norm": 456.67120361328125, "learning_rate": 1.981322429899951e-06, "loss": 29.9844, "step": 9484 }, { "epoch": 0.08978521596728543, "grad_norm": 719.1285400390625, "learning_rate": 1.981316531684626e-06, "loss": 43.6875, "step": 9485 }, { "epoch": 0.08979468198900048, "grad_norm": 206.8585205078125, "learning_rate": 1.9813106325469283e-06, "loss": 17.75, "step": 9486 }, { "epoch": 0.08980414801071554, "grad_norm": 1360.60009765625, "learning_rate": 1.981304732486862e-06, "loss": 56.0469, "step": 9487 }, { "epoch": 0.08981361403243059, "grad_norm": 376.3085632324219, "learning_rate": 1.981298831504433e-06, "loss": 25.0312, "step": 9488 }, { "epoch": 0.08982308005414565, "grad_norm": 1118.7540283203125, "learning_rate": 1.9812929295996473e-06, "loss": 51.1328, "step": 9489 }, { "epoch": 0.08983254607586069, "grad_norm": 481.20477294921875, "learning_rate": 1.98128702677251e-06, "loss": 18.5547, "step": 9490 }, { "epoch": 0.08984201209757575, "grad_norm": 659.1195678710938, "learning_rate": 1.9812811230230264e-06, "loss": 31.1367, "step": 9491 }, { "epoch": 0.08985147811929081, "grad_norm": 599.1149291992188, "learning_rate": 1.981275218351203e-06, "loss": 31.2344, "step": 9492 }, { "epoch": 0.08986094414100586, "grad_norm": 656.2215576171875, "learning_rate": 1.981269312757044e-06, "loss": 40.5703, "step": 9493 }, { "epoch": 0.08987041016272092, "grad_norm": 372.67327880859375, "learning_rate": 1.981263406240556e-06, "loss": 34.75, "step": 9494 }, { "epoch": 0.08987987618443596, "grad_norm": 400.66156005859375, "learning_rate": 1.9812574988017444e-06, "loss": 33.9062, "step": 9495 }, { "epoch": 0.08988934220615102, "grad_norm": 190.00209045410156, "learning_rate": 1.9812515904406146e-06, "loss": 23.3672, "step": 9496 }, { "epoch": 0.08989880822786607, "grad_norm": 920.4474487304688, "learning_rate": 1.981245681157172e-06, "loss": 58.25, "step": 9497 }, { "epoch": 0.08990827424958113, "grad_norm": 401.5257873535156, "learning_rate": 1.981239770951422e-06, "loss": 37.0156, "step": 9498 }, { "epoch": 0.08991774027129618, "grad_norm": 1113.6953125, "learning_rate": 1.9812338598233705e-06, "loss": 59.2031, "step": 9499 }, { "epoch": 0.08992720629301124, "grad_norm": 301.6733093261719, "learning_rate": 1.9812279477730235e-06, "loss": 35.0781, "step": 9500 }, { "epoch": 0.0899366723147263, "grad_norm": 141.7421417236328, "learning_rate": 1.981222034800386e-06, "loss": 13.6562, "step": 9501 }, { "epoch": 0.08994613833644134, "grad_norm": 517.6517333984375, "learning_rate": 1.981216120905463e-06, "loss": 35.7969, "step": 9502 }, { "epoch": 0.0899556043581564, "grad_norm": 523.3798217773438, "learning_rate": 1.9812102060882614e-06, "loss": 24.8125, "step": 9503 }, { "epoch": 0.08996507037987145, "grad_norm": 373.04168701171875, "learning_rate": 1.9812042903487856e-06, "loss": 35.2969, "step": 9504 }, { "epoch": 0.08997453640158651, "grad_norm": 226.73255920410156, "learning_rate": 1.9811983736870417e-06, "loss": 26.9766, "step": 9505 }, { "epoch": 0.08998400242330155, "grad_norm": 169.91038513183594, "learning_rate": 1.981192456103035e-06, "loss": 14.6797, "step": 9506 }, { "epoch": 0.08999346844501661, "grad_norm": 281.8894958496094, "learning_rate": 1.9811865375967715e-06, "loss": 24.8281, "step": 9507 }, { "epoch": 0.09000293446673166, "grad_norm": 166.61549377441406, "learning_rate": 1.9811806181682567e-06, "loss": 18.25, "step": 9508 }, { "epoch": 0.09001240048844672, "grad_norm": 343.2150573730469, "learning_rate": 1.9811746978174954e-06, "loss": 22.1797, "step": 9509 }, { "epoch": 0.09002186651016178, "grad_norm": 411.0964660644531, "learning_rate": 1.9811687765444944e-06, "loss": 21.5625, "step": 9510 }, { "epoch": 0.09003133253187683, "grad_norm": 287.27874755859375, "learning_rate": 1.981162854349258e-06, "loss": 8.0664, "step": 9511 }, { "epoch": 0.09004079855359189, "grad_norm": 438.58990478515625, "learning_rate": 1.9811569312317926e-06, "loss": 51.1406, "step": 9512 }, { "epoch": 0.09005026457530693, "grad_norm": 3.2245101928710938, "learning_rate": 1.981151007192103e-06, "loss": 0.9058, "step": 9513 }, { "epoch": 0.09005973059702199, "grad_norm": 517.1991577148438, "learning_rate": 1.9811450822301954e-06, "loss": 39.3438, "step": 9514 }, { "epoch": 0.09006919661873704, "grad_norm": 662.112060546875, "learning_rate": 1.9811391563460758e-06, "loss": 36.9805, "step": 9515 }, { "epoch": 0.0900786626404521, "grad_norm": 477.8849182128906, "learning_rate": 1.981133229539749e-06, "loss": 37.2812, "step": 9516 }, { "epoch": 0.09008812866216716, "grad_norm": 284.6169128417969, "learning_rate": 1.9811273018112206e-06, "loss": 20.8906, "step": 9517 }, { "epoch": 0.0900975946838822, "grad_norm": 611.748291015625, "learning_rate": 1.9811213731604965e-06, "loss": 8.3281, "step": 9518 }, { "epoch": 0.09010706070559726, "grad_norm": 381.3407287597656, "learning_rate": 1.9811154435875823e-06, "loss": 28.6797, "step": 9519 }, { "epoch": 0.09011652672731231, "grad_norm": 511.77374267578125, "learning_rate": 1.9811095130924834e-06, "loss": 34.1094, "step": 9520 }, { "epoch": 0.09012599274902737, "grad_norm": 264.94122314453125, "learning_rate": 1.9811035816752053e-06, "loss": 32.5547, "step": 9521 }, { "epoch": 0.09013545877074242, "grad_norm": 222.18309020996094, "learning_rate": 1.9810976493357535e-06, "loss": 23.75, "step": 9522 }, { "epoch": 0.09014492479245748, "grad_norm": 444.715087890625, "learning_rate": 1.981091716074134e-06, "loss": 22.4766, "step": 9523 }, { "epoch": 0.09015439081417252, "grad_norm": 3.580080032348633, "learning_rate": 1.981085781890352e-06, "loss": 0.9854, "step": 9524 }, { "epoch": 0.09016385683588758, "grad_norm": 168.17236328125, "learning_rate": 1.9810798467844126e-06, "loss": 20.5703, "step": 9525 }, { "epoch": 0.09017332285760264, "grad_norm": 160.619140625, "learning_rate": 1.9810739107563225e-06, "loss": 23.3203, "step": 9526 }, { "epoch": 0.09018278887931769, "grad_norm": 3.090773105621338, "learning_rate": 1.981067973806087e-06, "loss": 0.9648, "step": 9527 }, { "epoch": 0.09019225490103275, "grad_norm": 567.5475463867188, "learning_rate": 1.981062035933711e-06, "loss": 36.2734, "step": 9528 }, { "epoch": 0.0902017209227478, "grad_norm": 188.54696655273438, "learning_rate": 1.981056097139201e-06, "loss": 20.5898, "step": 9529 }, { "epoch": 0.09021118694446285, "grad_norm": 259.3170166015625, "learning_rate": 1.9810501574225613e-06, "loss": 34.2188, "step": 9530 }, { "epoch": 0.0902206529661779, "grad_norm": 515.0632934570312, "learning_rate": 1.981044216783799e-06, "loss": 31.9609, "step": 9531 }, { "epoch": 0.09023011898789296, "grad_norm": 605.9942626953125, "learning_rate": 1.9810382752229185e-06, "loss": 24.7344, "step": 9532 }, { "epoch": 0.090239585009608, "grad_norm": 430.25482177734375, "learning_rate": 1.981032332739926e-06, "loss": 18.6328, "step": 9533 }, { "epoch": 0.09024905103132307, "grad_norm": 399.2234191894531, "learning_rate": 1.981026389334827e-06, "loss": 24.2188, "step": 9534 }, { "epoch": 0.09025851705303813, "grad_norm": 182.85650634765625, "learning_rate": 1.9810204450076272e-06, "loss": 19.8906, "step": 9535 }, { "epoch": 0.09026798307475317, "grad_norm": 514.7698364257812, "learning_rate": 1.9810144997583316e-06, "loss": 47.7344, "step": 9536 }, { "epoch": 0.09027744909646823, "grad_norm": 594.4711303710938, "learning_rate": 1.9810085535869466e-06, "loss": 38.0859, "step": 9537 }, { "epoch": 0.09028691511818328, "grad_norm": 358.90704345703125, "learning_rate": 1.9810026064934773e-06, "loss": 29.2812, "step": 9538 }, { "epoch": 0.09029638113989834, "grad_norm": 263.97552490234375, "learning_rate": 1.980996658477929e-06, "loss": 21.2891, "step": 9539 }, { "epoch": 0.09030584716161338, "grad_norm": 266.4091491699219, "learning_rate": 1.9809907095403077e-06, "loss": 14.8633, "step": 9540 }, { "epoch": 0.09031531318332844, "grad_norm": 176.7696990966797, "learning_rate": 1.980984759680619e-06, "loss": 20.4609, "step": 9541 }, { "epoch": 0.09032477920504349, "grad_norm": 285.677490234375, "learning_rate": 1.9809788088988688e-06, "loss": 24.6406, "step": 9542 }, { "epoch": 0.09033424522675855, "grad_norm": 971.498779296875, "learning_rate": 1.9809728571950622e-06, "loss": 55.4219, "step": 9543 }, { "epoch": 0.09034371124847361, "grad_norm": 592.8601684570312, "learning_rate": 1.980966904569205e-06, "loss": 42.2344, "step": 9544 }, { "epoch": 0.09035317727018866, "grad_norm": 3.393580913543701, "learning_rate": 1.9809609510213025e-06, "loss": 0.9897, "step": 9545 }, { "epoch": 0.09036264329190372, "grad_norm": 298.6016540527344, "learning_rate": 1.9809549965513604e-06, "loss": 8.7578, "step": 9546 }, { "epoch": 0.09037210931361876, "grad_norm": 610.9742431640625, "learning_rate": 1.9809490411593844e-06, "loss": 42.5234, "step": 9547 }, { "epoch": 0.09038157533533382, "grad_norm": 3.244966506958008, "learning_rate": 1.9809430848453804e-06, "loss": 0.9741, "step": 9548 }, { "epoch": 0.09039104135704887, "grad_norm": 517.9107055664062, "learning_rate": 1.9809371276093534e-06, "loss": 37.3281, "step": 9549 }, { "epoch": 0.09040050737876393, "grad_norm": 331.4424133300781, "learning_rate": 1.9809311694513096e-06, "loss": 34.6875, "step": 9550 }, { "epoch": 0.09040997340047897, "grad_norm": 941.544189453125, "learning_rate": 1.9809252103712547e-06, "loss": 29.3047, "step": 9551 }, { "epoch": 0.09041943942219403, "grad_norm": 377.02398681640625, "learning_rate": 1.9809192503691934e-06, "loss": 40.6016, "step": 9552 }, { "epoch": 0.0904289054439091, "grad_norm": 207.14637756347656, "learning_rate": 1.980913289445132e-06, "loss": 19.8828, "step": 9553 }, { "epoch": 0.09043837146562414, "grad_norm": 408.3224182128906, "learning_rate": 1.9809073275990754e-06, "loss": 24.5312, "step": 9554 }, { "epoch": 0.0904478374873392, "grad_norm": 648.103759765625, "learning_rate": 1.9809013648310306e-06, "loss": 50.2969, "step": 9555 }, { "epoch": 0.09045730350905425, "grad_norm": 464.6663818359375, "learning_rate": 1.9808954011410014e-06, "loss": 53.5469, "step": 9556 }, { "epoch": 0.0904667695307693, "grad_norm": 442.8744812011719, "learning_rate": 1.980889436528995e-06, "loss": 42.5781, "step": 9557 }, { "epoch": 0.09047623555248435, "grad_norm": 236.3868865966797, "learning_rate": 1.9808834709950163e-06, "loss": 21.0391, "step": 9558 }, { "epoch": 0.09048570157419941, "grad_norm": 555.3199462890625, "learning_rate": 1.9808775045390705e-06, "loss": 45.7812, "step": 9559 }, { "epoch": 0.09049516759591447, "grad_norm": 228.523193359375, "learning_rate": 1.980871537161164e-06, "loss": 19.4688, "step": 9560 }, { "epoch": 0.09050463361762952, "grad_norm": 453.7644348144531, "learning_rate": 1.980865568861302e-06, "loss": 17.2422, "step": 9561 }, { "epoch": 0.09051409963934458, "grad_norm": 719.947021484375, "learning_rate": 1.9808595996394904e-06, "loss": 60.5312, "step": 9562 }, { "epoch": 0.09052356566105962, "grad_norm": 497.8179931640625, "learning_rate": 1.9808536294957343e-06, "loss": 20.3984, "step": 9563 }, { "epoch": 0.09053303168277468, "grad_norm": 4.035803318023682, "learning_rate": 1.98084765843004e-06, "loss": 0.9849, "step": 9564 }, { "epoch": 0.09054249770448973, "grad_norm": 328.01708984375, "learning_rate": 1.980841686442412e-06, "loss": 20.8203, "step": 9565 }, { "epoch": 0.09055196372620479, "grad_norm": 389.71795654296875, "learning_rate": 1.980835713532857e-06, "loss": 29.3594, "step": 9566 }, { "epoch": 0.09056142974791984, "grad_norm": 288.1833801269531, "learning_rate": 1.9808297397013803e-06, "loss": 25.7578, "step": 9567 }, { "epoch": 0.0905708957696349, "grad_norm": 320.91424560546875, "learning_rate": 1.9808237649479874e-06, "loss": 26.2422, "step": 9568 }, { "epoch": 0.09058036179134996, "grad_norm": 298.38177490234375, "learning_rate": 1.9808177892726843e-06, "loss": 29.4297, "step": 9569 }, { "epoch": 0.090589827813065, "grad_norm": 711.2406616210938, "learning_rate": 1.9808118126754756e-06, "loss": 40.75, "step": 9570 }, { "epoch": 0.09059929383478006, "grad_norm": 386.9328918457031, "learning_rate": 1.980805835156368e-06, "loss": 22.5469, "step": 9571 }, { "epoch": 0.09060875985649511, "grad_norm": 365.0610656738281, "learning_rate": 1.980799856715367e-06, "loss": 22.2188, "step": 9572 }, { "epoch": 0.09061822587821017, "grad_norm": 269.36285400390625, "learning_rate": 1.9807938773524774e-06, "loss": 32.5781, "step": 9573 }, { "epoch": 0.09062769189992521, "grad_norm": 408.8167419433594, "learning_rate": 1.9807878970677052e-06, "loss": 31.8438, "step": 9574 }, { "epoch": 0.09063715792164027, "grad_norm": 366.47808837890625, "learning_rate": 1.9807819158610567e-06, "loss": 11.6484, "step": 9575 }, { "epoch": 0.09064662394335532, "grad_norm": 307.1010437011719, "learning_rate": 1.980775933732537e-06, "loss": 21.3984, "step": 9576 }, { "epoch": 0.09065608996507038, "grad_norm": 396.2312927246094, "learning_rate": 1.9807699506821514e-06, "loss": 41.125, "step": 9577 }, { "epoch": 0.09066555598678544, "grad_norm": 303.9492492675781, "learning_rate": 1.980763966709906e-06, "loss": 22.4961, "step": 9578 }, { "epoch": 0.09067502200850049, "grad_norm": 750.0142822265625, "learning_rate": 1.9807579818158063e-06, "loss": 28.4766, "step": 9579 }, { "epoch": 0.09068448803021555, "grad_norm": 650.0199584960938, "learning_rate": 1.980751995999858e-06, "loss": 56.0312, "step": 9580 }, { "epoch": 0.09069395405193059, "grad_norm": 190.72264099121094, "learning_rate": 1.9807460092620664e-06, "loss": 24.2188, "step": 9581 }, { "epoch": 0.09070342007364565, "grad_norm": 462.9262390136719, "learning_rate": 1.9807400216024375e-06, "loss": 26.5312, "step": 9582 }, { "epoch": 0.0907128860953607, "grad_norm": 534.6539306640625, "learning_rate": 1.980734033020977e-06, "loss": 28.3125, "step": 9583 }, { "epoch": 0.09072235211707576, "grad_norm": 507.11138916015625, "learning_rate": 1.9807280435176897e-06, "loss": 33.0312, "step": 9584 }, { "epoch": 0.0907318181387908, "grad_norm": 3.51324462890625, "learning_rate": 1.980722053092582e-06, "loss": 0.9424, "step": 9585 }, { "epoch": 0.09074128416050586, "grad_norm": 3.152210235595703, "learning_rate": 1.98071606174566e-06, "loss": 0.8901, "step": 9586 }, { "epoch": 0.09075075018222092, "grad_norm": 615.6299438476562, "learning_rate": 1.980710069476928e-06, "loss": 44.7891, "step": 9587 }, { "epoch": 0.09076021620393597, "grad_norm": 330.4158935546875, "learning_rate": 1.9807040762863928e-06, "loss": 19.7031, "step": 9588 }, { "epoch": 0.09076968222565103, "grad_norm": 2.685065507888794, "learning_rate": 1.9806980821740597e-06, "loss": 0.9844, "step": 9589 }, { "epoch": 0.09077914824736608, "grad_norm": 295.3648376464844, "learning_rate": 1.980692087139934e-06, "loss": 33.3125, "step": 9590 }, { "epoch": 0.09078861426908114, "grad_norm": 253.79444885253906, "learning_rate": 1.9806860911840213e-06, "loss": 19.6328, "step": 9591 }, { "epoch": 0.09079808029079618, "grad_norm": 268.2671203613281, "learning_rate": 1.9806800943063277e-06, "loss": 22.4844, "step": 9592 }, { "epoch": 0.09080754631251124, "grad_norm": 258.3752746582031, "learning_rate": 1.980674096506858e-06, "loss": 21.2734, "step": 9593 }, { "epoch": 0.09081701233422629, "grad_norm": 408.5722961425781, "learning_rate": 1.980668097785619e-06, "loss": 23.3906, "step": 9594 }, { "epoch": 0.09082647835594135, "grad_norm": 575.11572265625, "learning_rate": 1.9806620981426157e-06, "loss": 49.8594, "step": 9595 }, { "epoch": 0.09083594437765641, "grad_norm": 554.806640625, "learning_rate": 1.980656097577854e-06, "loss": 44.4844, "step": 9596 }, { "epoch": 0.09084541039937145, "grad_norm": 227.0635528564453, "learning_rate": 1.9806500960913396e-06, "loss": 18.4297, "step": 9597 }, { "epoch": 0.09085487642108651, "grad_norm": 723.376708984375, "learning_rate": 1.9806440936830773e-06, "loss": 32.3281, "step": 9598 }, { "epoch": 0.09086434244280156, "grad_norm": 193.87933349609375, "learning_rate": 1.9806380903530734e-06, "loss": 21.0391, "step": 9599 }, { "epoch": 0.09087380846451662, "grad_norm": 831.2883911132812, "learning_rate": 1.980632086101334e-06, "loss": 28.625, "step": 9600 }, { "epoch": 0.09088327448623167, "grad_norm": 570.9581909179688, "learning_rate": 1.9806260809278635e-06, "loss": 26.5391, "step": 9601 }, { "epoch": 0.09089274050794673, "grad_norm": 3.1142196655273438, "learning_rate": 1.980620074832669e-06, "loss": 1.0186, "step": 9602 }, { "epoch": 0.09090220652966179, "grad_norm": 425.13909912109375, "learning_rate": 1.9806140678157546e-06, "loss": 37.1094, "step": 9603 }, { "epoch": 0.09091167255137683, "grad_norm": 525.424072265625, "learning_rate": 1.980608059877128e-06, "loss": 43.0312, "step": 9604 }, { "epoch": 0.09092113857309189, "grad_norm": 488.20806884765625, "learning_rate": 1.9806020510167925e-06, "loss": 49.4844, "step": 9605 }, { "epoch": 0.09093060459480694, "grad_norm": 171.5394744873047, "learning_rate": 1.9805960412347553e-06, "loss": 16.6367, "step": 9606 }, { "epoch": 0.090940070616522, "grad_norm": 355.5564880371094, "learning_rate": 1.9805900305310217e-06, "loss": 43.5938, "step": 9607 }, { "epoch": 0.09094953663823704, "grad_norm": 399.0274658203125, "learning_rate": 1.9805840189055968e-06, "loss": 25.5859, "step": 9608 }, { "epoch": 0.0909590026599521, "grad_norm": 424.30426025390625, "learning_rate": 1.980578006358487e-06, "loss": 32.5859, "step": 9609 }, { "epoch": 0.09096846868166715, "grad_norm": 368.096923828125, "learning_rate": 1.9805719928896978e-06, "loss": 17.8359, "step": 9610 }, { "epoch": 0.09097793470338221, "grad_norm": 361.4423522949219, "learning_rate": 1.9805659784992346e-06, "loss": 48.5, "step": 9611 }, { "epoch": 0.09098740072509727, "grad_norm": 466.4801940917969, "learning_rate": 1.980559963187103e-06, "loss": 40.2656, "step": 9612 }, { "epoch": 0.09099686674681232, "grad_norm": 762.5079345703125, "learning_rate": 1.9805539469533095e-06, "loss": 60.8867, "step": 9613 }, { "epoch": 0.09100633276852738, "grad_norm": 3.375452756881714, "learning_rate": 1.9805479297978585e-06, "loss": 0.8892, "step": 9614 }, { "epoch": 0.09101579879024242, "grad_norm": 184.2909698486328, "learning_rate": 1.9805419117207565e-06, "loss": 18.9453, "step": 9615 }, { "epoch": 0.09102526481195748, "grad_norm": 409.1004333496094, "learning_rate": 1.9805358927220084e-06, "loss": 17.3984, "step": 9616 }, { "epoch": 0.09103473083367253, "grad_norm": 454.9047546386719, "learning_rate": 1.980529872801621e-06, "loss": 32.4219, "step": 9617 }, { "epoch": 0.09104419685538759, "grad_norm": 362.0184326171875, "learning_rate": 1.9805238519595987e-06, "loss": 33.4766, "step": 9618 }, { "epoch": 0.09105366287710263, "grad_norm": 370.3946228027344, "learning_rate": 1.9805178301959484e-06, "loss": 27.4453, "step": 9619 }, { "epoch": 0.0910631288988177, "grad_norm": 303.4541931152344, "learning_rate": 1.9805118075106746e-06, "loss": 38.7422, "step": 9620 }, { "epoch": 0.09107259492053275, "grad_norm": 162.36831665039062, "learning_rate": 1.9805057839037836e-06, "loss": 23.0703, "step": 9621 }, { "epoch": 0.0910820609422478, "grad_norm": 287.47918701171875, "learning_rate": 1.980499759375281e-06, "loss": 24.3438, "step": 9622 }, { "epoch": 0.09109152696396286, "grad_norm": 2.8562445640563965, "learning_rate": 1.980493733925172e-06, "loss": 0.853, "step": 9623 }, { "epoch": 0.0911009929856779, "grad_norm": 516.0043334960938, "learning_rate": 1.9804877075534635e-06, "loss": 18.0508, "step": 9624 }, { "epoch": 0.09111045900739297, "grad_norm": 285.7124328613281, "learning_rate": 1.9804816802601598e-06, "loss": 37.9102, "step": 9625 }, { "epoch": 0.09111992502910801, "grad_norm": 361.998779296875, "learning_rate": 1.9804756520452673e-06, "loss": 40.6719, "step": 9626 }, { "epoch": 0.09112939105082307, "grad_norm": 659.02392578125, "learning_rate": 1.9804696229087914e-06, "loss": 13.4453, "step": 9627 }, { "epoch": 0.09113885707253812, "grad_norm": 370.3154296875, "learning_rate": 1.980463592850738e-06, "loss": 21.9688, "step": 9628 }, { "epoch": 0.09114832309425318, "grad_norm": 438.93408203125, "learning_rate": 1.9804575618711124e-06, "loss": 37.5547, "step": 9629 }, { "epoch": 0.09115778911596824, "grad_norm": 468.5584411621094, "learning_rate": 1.9804515299699207e-06, "loss": 36.5781, "step": 9630 }, { "epoch": 0.09116725513768328, "grad_norm": 270.81787109375, "learning_rate": 1.9804454971471676e-06, "loss": 20.5039, "step": 9631 }, { "epoch": 0.09117672115939834, "grad_norm": 485.3475646972656, "learning_rate": 1.9804394634028605e-06, "loss": 21.8984, "step": 9632 }, { "epoch": 0.09118618718111339, "grad_norm": 736.6694946289062, "learning_rate": 1.9804334287370036e-06, "loss": 69.5, "step": 9633 }, { "epoch": 0.09119565320282845, "grad_norm": 233.01043701171875, "learning_rate": 1.9804273931496032e-06, "loss": 20.9297, "step": 9634 }, { "epoch": 0.0912051192245435, "grad_norm": 531.791015625, "learning_rate": 1.9804213566406645e-06, "loss": 22.7109, "step": 9635 }, { "epoch": 0.09121458524625856, "grad_norm": 326.66162109375, "learning_rate": 1.980415319210194e-06, "loss": 18.1016, "step": 9636 }, { "epoch": 0.0912240512679736, "grad_norm": 637.7732543945312, "learning_rate": 1.9804092808581963e-06, "loss": 21.668, "step": 9637 }, { "epoch": 0.09123351728968866, "grad_norm": 737.5419921875, "learning_rate": 1.9804032415846784e-06, "loss": 59.1094, "step": 9638 }, { "epoch": 0.09124298331140372, "grad_norm": 436.31658935546875, "learning_rate": 1.980397201389645e-06, "loss": 23.0234, "step": 9639 }, { "epoch": 0.09125244933311877, "grad_norm": 3.3757376670837402, "learning_rate": 1.9803911602731015e-06, "loss": 1.0073, "step": 9640 }, { "epoch": 0.09126191535483383, "grad_norm": 335.6438903808594, "learning_rate": 1.9803851182350548e-06, "loss": 22.4141, "step": 9641 }, { "epoch": 0.09127138137654887, "grad_norm": 925.059814453125, "learning_rate": 1.9803790752755095e-06, "loss": 51.1328, "step": 9642 }, { "epoch": 0.09128084739826393, "grad_norm": 471.06585693359375, "learning_rate": 1.9803730313944715e-06, "loss": 18.1562, "step": 9643 }, { "epoch": 0.09129031341997898, "grad_norm": 283.82000732421875, "learning_rate": 1.980366986591947e-06, "loss": 20.5938, "step": 9644 }, { "epoch": 0.09129977944169404, "grad_norm": 264.7506103515625, "learning_rate": 1.9803609408679406e-06, "loss": 27.9531, "step": 9645 }, { "epoch": 0.0913092454634091, "grad_norm": 1463.376953125, "learning_rate": 1.9803548942224594e-06, "loss": 61.6562, "step": 9646 }, { "epoch": 0.09131871148512415, "grad_norm": 575.117431640625, "learning_rate": 1.980348846655508e-06, "loss": 23.2188, "step": 9647 }, { "epoch": 0.0913281775068392, "grad_norm": 256.2195129394531, "learning_rate": 1.9803427981670924e-06, "loss": 32.4531, "step": 9648 }, { "epoch": 0.09133764352855425, "grad_norm": 206.14244079589844, "learning_rate": 1.980336748757219e-06, "loss": 18.5156, "step": 9649 }, { "epoch": 0.09134710955026931, "grad_norm": 3.2712817192077637, "learning_rate": 1.9803306984258925e-06, "loss": 0.9463, "step": 9650 }, { "epoch": 0.09135657557198436, "grad_norm": 333.7142333984375, "learning_rate": 1.9803246471731184e-06, "loss": 22.2969, "step": 9651 }, { "epoch": 0.09136604159369942, "grad_norm": 375.9570617675781, "learning_rate": 1.9803185949989032e-06, "loss": 34.1328, "step": 9652 }, { "epoch": 0.09137550761541446, "grad_norm": 236.3732452392578, "learning_rate": 1.9803125419032526e-06, "loss": 18.7031, "step": 9653 }, { "epoch": 0.09138497363712952, "grad_norm": 443.5666809082031, "learning_rate": 1.980306487886172e-06, "loss": 34.2969, "step": 9654 }, { "epoch": 0.09139443965884458, "grad_norm": 228.01783752441406, "learning_rate": 1.9803004329476666e-06, "loss": 19.6562, "step": 9655 }, { "epoch": 0.09140390568055963, "grad_norm": 439.1605224609375, "learning_rate": 1.980294377087743e-06, "loss": 27.9062, "step": 9656 }, { "epoch": 0.09141337170227469, "grad_norm": 622.3037109375, "learning_rate": 1.9802883203064064e-06, "loss": 42.6562, "step": 9657 }, { "epoch": 0.09142283772398974, "grad_norm": 714.6419067382812, "learning_rate": 1.9802822626036623e-06, "loss": 25.6406, "step": 9658 }, { "epoch": 0.0914323037457048, "grad_norm": 646.4647216796875, "learning_rate": 1.980276203979517e-06, "loss": 52.3906, "step": 9659 }, { "epoch": 0.09144176976741984, "grad_norm": 481.072509765625, "learning_rate": 1.9802701444339752e-06, "loss": 32.8281, "step": 9660 }, { "epoch": 0.0914512357891349, "grad_norm": 302.57757568359375, "learning_rate": 1.9802640839670437e-06, "loss": 12.7578, "step": 9661 }, { "epoch": 0.09146070181084995, "grad_norm": 232.9759063720703, "learning_rate": 1.980258022578728e-06, "loss": 21.9844, "step": 9662 }, { "epoch": 0.09147016783256501, "grad_norm": 273.5087585449219, "learning_rate": 1.980251960269033e-06, "loss": 14.0391, "step": 9663 }, { "epoch": 0.09147963385428007, "grad_norm": 2.9000864028930664, "learning_rate": 1.9802458970379653e-06, "loss": 0.8613, "step": 9664 }, { "epoch": 0.09148909987599511, "grad_norm": 173.2218475341797, "learning_rate": 1.98023983288553e-06, "loss": 15.8828, "step": 9665 }, { "epoch": 0.09149856589771017, "grad_norm": 451.5128479003906, "learning_rate": 1.980233767811733e-06, "loss": 23.1484, "step": 9666 }, { "epoch": 0.09150803191942522, "grad_norm": 330.98675537109375, "learning_rate": 1.9802277018165805e-06, "loss": 22.3828, "step": 9667 }, { "epoch": 0.09151749794114028, "grad_norm": 749.626953125, "learning_rate": 1.980221634900077e-06, "loss": 34.0781, "step": 9668 }, { "epoch": 0.09152696396285533, "grad_norm": 225.1514434814453, "learning_rate": 1.9802155670622293e-06, "loss": 15.0508, "step": 9669 }, { "epoch": 0.09153642998457039, "grad_norm": 378.3317565917969, "learning_rate": 1.980209498303043e-06, "loss": 18.043, "step": 9670 }, { "epoch": 0.09154589600628543, "grad_norm": 320.65655517578125, "learning_rate": 1.980203428622523e-06, "loss": 25.5938, "step": 9671 }, { "epoch": 0.09155536202800049, "grad_norm": 422.22314453125, "learning_rate": 1.980197358020676e-06, "loss": 45.8906, "step": 9672 }, { "epoch": 0.09156482804971555, "grad_norm": 212.3396453857422, "learning_rate": 1.980191286497507e-06, "loss": 21.0, "step": 9673 }, { "epoch": 0.0915742940714306, "grad_norm": 329.428955078125, "learning_rate": 1.9801852140530218e-06, "loss": 33.5312, "step": 9674 }, { "epoch": 0.09158376009314566, "grad_norm": 202.3539581298828, "learning_rate": 1.980179140687227e-06, "loss": 15.7812, "step": 9675 }, { "epoch": 0.0915932261148607, "grad_norm": 472.1769714355469, "learning_rate": 1.980173066400127e-06, "loss": 26.1406, "step": 9676 }, { "epoch": 0.09160269213657576, "grad_norm": 3.1342339515686035, "learning_rate": 1.980166991191728e-06, "loss": 0.8774, "step": 9677 }, { "epoch": 0.09161215815829081, "grad_norm": 533.1991577148438, "learning_rate": 1.980160915062036e-06, "loss": 42.2734, "step": 9678 }, { "epoch": 0.09162162418000587, "grad_norm": 358.7475891113281, "learning_rate": 1.9801548380110563e-06, "loss": 27.8516, "step": 9679 }, { "epoch": 0.09163109020172092, "grad_norm": 437.4605407714844, "learning_rate": 1.980148760038795e-06, "loss": 22.1719, "step": 9680 }, { "epoch": 0.09164055622343598, "grad_norm": 3.3634350299835205, "learning_rate": 1.9801426811452577e-06, "loss": 0.9243, "step": 9681 }, { "epoch": 0.09165002224515104, "grad_norm": 610.99853515625, "learning_rate": 1.9801366013304496e-06, "loss": 36.3281, "step": 9682 }, { "epoch": 0.09165948826686608, "grad_norm": 753.858642578125, "learning_rate": 1.980130520594377e-06, "loss": 23.9297, "step": 9683 }, { "epoch": 0.09166895428858114, "grad_norm": 257.5164794921875, "learning_rate": 1.980124438937046e-06, "loss": 17.25, "step": 9684 }, { "epoch": 0.09167842031029619, "grad_norm": 185.4252471923828, "learning_rate": 1.9801183563584614e-06, "loss": 21.8594, "step": 9685 }, { "epoch": 0.09168788633201125, "grad_norm": 368.43896484375, "learning_rate": 1.980112272858629e-06, "loss": 24.6641, "step": 9686 }, { "epoch": 0.0916973523537263, "grad_norm": 385.1996154785156, "learning_rate": 1.9801061884375555e-06, "loss": 41.6562, "step": 9687 }, { "epoch": 0.09170681837544135, "grad_norm": 250.1849822998047, "learning_rate": 1.9801001030952454e-06, "loss": 23.125, "step": 9688 }, { "epoch": 0.09171628439715641, "grad_norm": 527.6453857421875, "learning_rate": 1.9800940168317053e-06, "loss": 27.9062, "step": 9689 }, { "epoch": 0.09172575041887146, "grad_norm": 360.0325622558594, "learning_rate": 1.9800879296469405e-06, "loss": 14.5039, "step": 9690 }, { "epoch": 0.09173521644058652, "grad_norm": 270.4943542480469, "learning_rate": 1.9800818415409567e-06, "loss": 18.1484, "step": 9691 }, { "epoch": 0.09174468246230157, "grad_norm": 242.7377166748047, "learning_rate": 1.98007575251376e-06, "loss": 19.2812, "step": 9692 }, { "epoch": 0.09175414848401663, "grad_norm": 245.8706817626953, "learning_rate": 1.9800696625653553e-06, "loss": 9.918, "step": 9693 }, { "epoch": 0.09176361450573167, "grad_norm": 259.55059814453125, "learning_rate": 1.980063571695749e-06, "loss": 20.5703, "step": 9694 }, { "epoch": 0.09177308052744673, "grad_norm": 835.9336547851562, "learning_rate": 1.980057479904947e-06, "loss": 73.0312, "step": 9695 }, { "epoch": 0.09178254654916178, "grad_norm": 2.726426601409912, "learning_rate": 1.980051387192955e-06, "loss": 0.8862, "step": 9696 }, { "epoch": 0.09179201257087684, "grad_norm": 543.9826049804688, "learning_rate": 1.980045293559778e-06, "loss": 61.4531, "step": 9697 }, { "epoch": 0.0918014785925919, "grad_norm": 309.509521484375, "learning_rate": 1.9800391990054224e-06, "loss": 26.3672, "step": 9698 }, { "epoch": 0.09181094461430694, "grad_norm": 165.83055114746094, "learning_rate": 1.9800331035298932e-06, "loss": 23.9766, "step": 9699 }, { "epoch": 0.091820410636022, "grad_norm": 3.6539928913116455, "learning_rate": 1.980027007133197e-06, "loss": 0.8931, "step": 9700 }, { "epoch": 0.09182987665773705, "grad_norm": 975.7266235351562, "learning_rate": 1.9800209098153395e-06, "loss": 45.4062, "step": 9701 }, { "epoch": 0.09183934267945211, "grad_norm": 279.7576599121094, "learning_rate": 1.9800148115763256e-06, "loss": 20.8594, "step": 9702 }, { "epoch": 0.09184880870116716, "grad_norm": 1440.291748046875, "learning_rate": 1.9800087124161616e-06, "loss": 41.7422, "step": 9703 }, { "epoch": 0.09185827472288222, "grad_norm": 3.6639065742492676, "learning_rate": 1.9800026123348536e-06, "loss": 0.8794, "step": 9704 }, { "epoch": 0.09186774074459726, "grad_norm": 466.2031555175781, "learning_rate": 1.9799965113324065e-06, "loss": 42.125, "step": 9705 }, { "epoch": 0.09187720676631232, "grad_norm": 2.749011516571045, "learning_rate": 1.9799904094088263e-06, "loss": 0.792, "step": 9706 }, { "epoch": 0.09188667278802738, "grad_norm": 628.20703125, "learning_rate": 1.9799843065641194e-06, "loss": 56.3438, "step": 9707 }, { "epoch": 0.09189613880974243, "grad_norm": 271.0714416503906, "learning_rate": 1.9799782027982905e-06, "loss": 23.4219, "step": 9708 }, { "epoch": 0.09190560483145749, "grad_norm": 2.986212730407715, "learning_rate": 1.979972098111346e-06, "loss": 0.8164, "step": 9709 }, { "epoch": 0.09191507085317253, "grad_norm": 3.315554618835449, "learning_rate": 1.9799659925032913e-06, "loss": 0.8569, "step": 9710 }, { "epoch": 0.0919245368748876, "grad_norm": 492.279541015625, "learning_rate": 1.9799598859741325e-06, "loss": 46.0469, "step": 9711 }, { "epoch": 0.09193400289660264, "grad_norm": 271.823486328125, "learning_rate": 1.9799537785238754e-06, "loss": 19.2109, "step": 9712 }, { "epoch": 0.0919434689183177, "grad_norm": 625.6539306640625, "learning_rate": 1.9799476701525254e-06, "loss": 50.625, "step": 9713 }, { "epoch": 0.09195293494003275, "grad_norm": 738.1665649414062, "learning_rate": 1.979941560860088e-06, "loss": 36.2617, "step": 9714 }, { "epoch": 0.0919624009617478, "grad_norm": 238.42332458496094, "learning_rate": 1.9799354506465694e-06, "loss": 17.4922, "step": 9715 }, { "epoch": 0.09197186698346287, "grad_norm": 3.135193347930908, "learning_rate": 1.9799293395119756e-06, "loss": 0.9038, "step": 9716 }, { "epoch": 0.09198133300517791, "grad_norm": 2.9406158924102783, "learning_rate": 1.979923227456312e-06, "loss": 0.8701, "step": 9717 }, { "epoch": 0.09199079902689297, "grad_norm": 533.9811401367188, "learning_rate": 1.9799171144795836e-06, "loss": 33.8281, "step": 9718 }, { "epoch": 0.09200026504860802, "grad_norm": 698.6226196289062, "learning_rate": 1.9799110005817973e-06, "loss": 45.6836, "step": 9719 }, { "epoch": 0.09200973107032308, "grad_norm": 274.27496337890625, "learning_rate": 1.9799048857629583e-06, "loss": 26.8594, "step": 9720 }, { "epoch": 0.09201919709203812, "grad_norm": 306.318115234375, "learning_rate": 1.9798987700230726e-06, "loss": 26.3945, "step": 9721 }, { "epoch": 0.09202866311375318, "grad_norm": 815.236572265625, "learning_rate": 1.9798926533621458e-06, "loss": 54.2188, "step": 9722 }, { "epoch": 0.09203812913546823, "grad_norm": 376.8554382324219, "learning_rate": 1.9798865357801837e-06, "loss": 24.8984, "step": 9723 }, { "epoch": 0.09204759515718329, "grad_norm": 202.79550170898438, "learning_rate": 1.979880417277192e-06, "loss": 17.1641, "step": 9724 }, { "epoch": 0.09205706117889835, "grad_norm": 1925.5526123046875, "learning_rate": 1.9798742978531758e-06, "loss": 36.3281, "step": 9725 }, { "epoch": 0.0920665272006134, "grad_norm": 356.54632568359375, "learning_rate": 1.9798681775081422e-06, "loss": 34.5938, "step": 9726 }, { "epoch": 0.09207599322232846, "grad_norm": 666.2531127929688, "learning_rate": 1.9798620562420963e-06, "loss": 57.5469, "step": 9727 }, { "epoch": 0.0920854592440435, "grad_norm": 528.625244140625, "learning_rate": 1.9798559340550435e-06, "loss": 33.7656, "step": 9728 }, { "epoch": 0.09209492526575856, "grad_norm": 986.4994506835938, "learning_rate": 1.97984981094699e-06, "loss": 98.5625, "step": 9729 }, { "epoch": 0.09210439128747361, "grad_norm": 3.0931286811828613, "learning_rate": 1.9798436869179414e-06, "loss": 0.793, "step": 9730 }, { "epoch": 0.09211385730918867, "grad_norm": 400.24285888671875, "learning_rate": 1.9798375619679036e-06, "loss": 26.2031, "step": 9731 }, { "epoch": 0.09212332333090371, "grad_norm": 645.6535034179688, "learning_rate": 1.979831436096882e-06, "loss": 41.125, "step": 9732 }, { "epoch": 0.09213278935261877, "grad_norm": 800.4039916992188, "learning_rate": 1.9798253093048826e-06, "loss": 34.6875, "step": 9733 }, { "epoch": 0.09214225537433383, "grad_norm": 321.9671936035156, "learning_rate": 1.9798191815919114e-06, "loss": 13.9141, "step": 9734 }, { "epoch": 0.09215172139604888, "grad_norm": 476.40679931640625, "learning_rate": 1.9798130529579735e-06, "loss": 20.0508, "step": 9735 }, { "epoch": 0.09216118741776394, "grad_norm": 327.8306579589844, "learning_rate": 1.9798069234030758e-06, "loss": 26.6562, "step": 9736 }, { "epoch": 0.09217065343947899, "grad_norm": 382.8904724121094, "learning_rate": 1.9798007929272224e-06, "loss": 37.8672, "step": 9737 }, { "epoch": 0.09218011946119405, "grad_norm": 564.7841186523438, "learning_rate": 1.9797946615304206e-06, "loss": 40.0, "step": 9738 }, { "epoch": 0.09218958548290909, "grad_norm": 530.4746704101562, "learning_rate": 1.9797885292126754e-06, "loss": 22.0703, "step": 9739 }, { "epoch": 0.09219905150462415, "grad_norm": 1095.3746337890625, "learning_rate": 1.979782395973993e-06, "loss": 59.7656, "step": 9740 }, { "epoch": 0.09220851752633921, "grad_norm": 339.0370178222656, "learning_rate": 1.9797762618143784e-06, "loss": 29.2578, "step": 9741 }, { "epoch": 0.09221798354805426, "grad_norm": 685.6427612304688, "learning_rate": 1.9797701267338384e-06, "loss": 15.6758, "step": 9742 }, { "epoch": 0.09222744956976932, "grad_norm": 407.2082214355469, "learning_rate": 1.9797639907323774e-06, "loss": 9.793, "step": 9743 }, { "epoch": 0.09223691559148436, "grad_norm": 341.4816589355469, "learning_rate": 1.9797578538100028e-06, "loss": 49.1875, "step": 9744 }, { "epoch": 0.09224638161319942, "grad_norm": 420.548095703125, "learning_rate": 1.979751715966719e-06, "loss": 21.0469, "step": 9745 }, { "epoch": 0.09225584763491447, "grad_norm": 969.6497192382812, "learning_rate": 1.9797455772025327e-06, "loss": 25.7656, "step": 9746 }, { "epoch": 0.09226531365662953, "grad_norm": 1156.1646728515625, "learning_rate": 1.979739437517449e-06, "loss": 84.7266, "step": 9747 }, { "epoch": 0.09227477967834458, "grad_norm": 437.7890625, "learning_rate": 1.9797332969114743e-06, "loss": 34.8047, "step": 9748 }, { "epoch": 0.09228424570005964, "grad_norm": 317.7425842285156, "learning_rate": 1.979727155384614e-06, "loss": 21.5938, "step": 9749 }, { "epoch": 0.0922937117217747, "grad_norm": 517.2509155273438, "learning_rate": 1.9797210129368738e-06, "loss": 53.1719, "step": 9750 }, { "epoch": 0.09230317774348974, "grad_norm": 513.4829711914062, "learning_rate": 1.9797148695682595e-06, "loss": 28.875, "step": 9751 }, { "epoch": 0.0923126437652048, "grad_norm": 931.4481811523438, "learning_rate": 1.979708725278777e-06, "loss": 45.4844, "step": 9752 }, { "epoch": 0.09232210978691985, "grad_norm": 782.07177734375, "learning_rate": 1.979702580068432e-06, "loss": 60.3125, "step": 9753 }, { "epoch": 0.09233157580863491, "grad_norm": 442.12860107421875, "learning_rate": 1.97969643393723e-06, "loss": 27.75, "step": 9754 }, { "epoch": 0.09234104183034995, "grad_norm": 177.65625, "learning_rate": 1.979690286885178e-06, "loss": 18.9453, "step": 9755 }, { "epoch": 0.09235050785206501, "grad_norm": 255.34902954101562, "learning_rate": 1.97968413891228e-06, "loss": 12.8555, "step": 9756 }, { "epoch": 0.09235997387378006, "grad_norm": 216.99131774902344, "learning_rate": 1.979677990018543e-06, "loss": 25.7656, "step": 9757 }, { "epoch": 0.09236943989549512, "grad_norm": 3.2548162937164307, "learning_rate": 1.979671840203972e-06, "loss": 0.9922, "step": 9758 }, { "epoch": 0.09237890591721018, "grad_norm": 165.5908203125, "learning_rate": 1.9796656894685738e-06, "loss": 20.5234, "step": 9759 }, { "epoch": 0.09238837193892523, "grad_norm": 622.5933837890625, "learning_rate": 1.9796595378123537e-06, "loss": 38.5469, "step": 9760 }, { "epoch": 0.09239783796064029, "grad_norm": 752.8024291992188, "learning_rate": 1.979653385235317e-06, "loss": 58.7188, "step": 9761 }, { "epoch": 0.09240730398235533, "grad_norm": 465.4765625, "learning_rate": 1.9796472317374696e-06, "loss": 46.0469, "step": 9762 }, { "epoch": 0.09241677000407039, "grad_norm": 483.5605773925781, "learning_rate": 1.979641077318818e-06, "loss": 14.0664, "step": 9763 }, { "epoch": 0.09242623602578544, "grad_norm": 378.791015625, "learning_rate": 1.9796349219793675e-06, "loss": 36.6484, "step": 9764 }, { "epoch": 0.0924357020475005, "grad_norm": 215.74502563476562, "learning_rate": 1.9796287657191235e-06, "loss": 18.2891, "step": 9765 }, { "epoch": 0.09244516806921554, "grad_norm": 1186.552734375, "learning_rate": 1.9796226085380924e-06, "loss": 55.9531, "step": 9766 }, { "epoch": 0.0924546340909306, "grad_norm": 214.33616638183594, "learning_rate": 1.97961645043628e-06, "loss": 14.3281, "step": 9767 }, { "epoch": 0.09246410011264566, "grad_norm": 260.2725830078125, "learning_rate": 1.9796102914136916e-06, "loss": 20.2812, "step": 9768 }, { "epoch": 0.09247356613436071, "grad_norm": 1186.65966796875, "learning_rate": 1.979604131470333e-06, "loss": 29.9688, "step": 9769 }, { "epoch": 0.09248303215607577, "grad_norm": 395.4815368652344, "learning_rate": 1.9795979706062106e-06, "loss": 48.6875, "step": 9770 }, { "epoch": 0.09249249817779082, "grad_norm": 934.4725341796875, "learning_rate": 1.9795918088213297e-06, "loss": 60.2109, "step": 9771 }, { "epoch": 0.09250196419950588, "grad_norm": 319.33758544921875, "learning_rate": 1.9795856461156965e-06, "loss": 30.2031, "step": 9772 }, { "epoch": 0.09251143022122092, "grad_norm": 658.2109985351562, "learning_rate": 1.979579482489316e-06, "loss": 54.2188, "step": 9773 }, { "epoch": 0.09252089624293598, "grad_norm": 458.41168212890625, "learning_rate": 1.979573317942195e-06, "loss": 49.2812, "step": 9774 }, { "epoch": 0.09253036226465103, "grad_norm": 288.0232238769531, "learning_rate": 1.979567152474339e-06, "loss": 40.1094, "step": 9775 }, { "epoch": 0.09253982828636609, "grad_norm": 394.9178771972656, "learning_rate": 1.979560986085753e-06, "loss": 40.4688, "step": 9776 }, { "epoch": 0.09254929430808115, "grad_norm": 415.4179992675781, "learning_rate": 1.9795548187764436e-06, "loss": 30.0312, "step": 9777 }, { "epoch": 0.0925587603297962, "grad_norm": 965.8900756835938, "learning_rate": 1.9795486505464167e-06, "loss": 49.5625, "step": 9778 }, { "epoch": 0.09256822635151125, "grad_norm": 365.87799072265625, "learning_rate": 1.9795424813956776e-06, "loss": 46.875, "step": 9779 }, { "epoch": 0.0925776923732263, "grad_norm": 475.97589111328125, "learning_rate": 1.979536311324232e-06, "loss": 29.5469, "step": 9780 }, { "epoch": 0.09258715839494136, "grad_norm": 584.5625, "learning_rate": 1.979530140332086e-06, "loss": 25.1055, "step": 9781 }, { "epoch": 0.0925966244166564, "grad_norm": 297.8659362792969, "learning_rate": 1.9795239684192457e-06, "loss": 20.2969, "step": 9782 }, { "epoch": 0.09260609043837147, "grad_norm": 882.7657470703125, "learning_rate": 1.9795177955857166e-06, "loss": 45.3281, "step": 9783 }, { "epoch": 0.09261555646008653, "grad_norm": 390.9586486816406, "learning_rate": 1.9795116218315043e-06, "loss": 22.7969, "step": 9784 }, { "epoch": 0.09262502248180157, "grad_norm": 3.84670352935791, "learning_rate": 1.979505447156615e-06, "loss": 0.7783, "step": 9785 }, { "epoch": 0.09263448850351663, "grad_norm": 355.2179260253906, "learning_rate": 1.979499271561054e-06, "loss": 11.4453, "step": 9786 }, { "epoch": 0.09264395452523168, "grad_norm": 1213.56005859375, "learning_rate": 1.979493095044828e-06, "loss": 53.5234, "step": 9787 }, { "epoch": 0.09265342054694674, "grad_norm": 295.291015625, "learning_rate": 1.9794869176079415e-06, "loss": 19.0469, "step": 9788 }, { "epoch": 0.09266288656866178, "grad_norm": 373.337158203125, "learning_rate": 1.9794807392504015e-06, "loss": 26.2656, "step": 9789 }, { "epoch": 0.09267235259037684, "grad_norm": 418.4180908203125, "learning_rate": 1.979474559972213e-06, "loss": 30.7812, "step": 9790 }, { "epoch": 0.09268181861209189, "grad_norm": 172.42074584960938, "learning_rate": 1.979468379773382e-06, "loss": 24.2812, "step": 9791 }, { "epoch": 0.09269128463380695, "grad_norm": 800.954345703125, "learning_rate": 1.9794621986539147e-06, "loss": 49.5938, "step": 9792 }, { "epoch": 0.09270075065552201, "grad_norm": 755.182373046875, "learning_rate": 1.9794560166138172e-06, "loss": 36.4219, "step": 9793 }, { "epoch": 0.09271021667723706, "grad_norm": 651.0326538085938, "learning_rate": 1.979449833653094e-06, "loss": 22.0703, "step": 9794 }, { "epoch": 0.09271968269895212, "grad_norm": 574.7891845703125, "learning_rate": 1.979443649771752e-06, "loss": 23.6094, "step": 9795 }, { "epoch": 0.09272914872066716, "grad_norm": 495.2911376953125, "learning_rate": 1.9794374649697964e-06, "loss": 42.5, "step": 9796 }, { "epoch": 0.09273861474238222, "grad_norm": 300.8608093261719, "learning_rate": 1.979431279247234e-06, "loss": 20.7266, "step": 9797 }, { "epoch": 0.09274808076409727, "grad_norm": 3.0941474437713623, "learning_rate": 1.979425092604069e-06, "loss": 0.873, "step": 9798 }, { "epoch": 0.09275754678581233, "grad_norm": 3.620229721069336, "learning_rate": 1.9794189050403085e-06, "loss": 0.856, "step": 9799 }, { "epoch": 0.09276701280752737, "grad_norm": 887.5234375, "learning_rate": 1.979412716555958e-06, "loss": 56.1875, "step": 9800 }, { "epoch": 0.09277647882924243, "grad_norm": 380.1490783691406, "learning_rate": 1.979406527151023e-06, "loss": 21.2188, "step": 9801 }, { "epoch": 0.0927859448509575, "grad_norm": 382.5151062011719, "learning_rate": 1.9794003368255103e-06, "loss": 27.9141, "step": 9802 }, { "epoch": 0.09279541087267254, "grad_norm": 424.9163513183594, "learning_rate": 1.979394145579424e-06, "loss": 18.4297, "step": 9803 }, { "epoch": 0.0928048768943876, "grad_norm": 166.53240966796875, "learning_rate": 1.9793879534127717e-06, "loss": 23.1875, "step": 9804 }, { "epoch": 0.09281434291610265, "grad_norm": 342.6606750488281, "learning_rate": 1.9793817603255583e-06, "loss": 31.1953, "step": 9805 }, { "epoch": 0.0928238089378177, "grad_norm": 387.8049011230469, "learning_rate": 1.9793755663177895e-06, "loss": 31.375, "step": 9806 }, { "epoch": 0.09283327495953275, "grad_norm": 705.0557250976562, "learning_rate": 1.9793693713894717e-06, "loss": 29.7422, "step": 9807 }, { "epoch": 0.09284274098124781, "grad_norm": 433.0641784667969, "learning_rate": 1.9793631755406103e-06, "loss": 45.9531, "step": 9808 }, { "epoch": 0.09285220700296286, "grad_norm": 240.56919860839844, "learning_rate": 1.979356978771211e-06, "loss": 21.3047, "step": 9809 }, { "epoch": 0.09286167302467792, "grad_norm": 453.6480407714844, "learning_rate": 1.97935078108128e-06, "loss": 31.5938, "step": 9810 }, { "epoch": 0.09287113904639298, "grad_norm": 502.16766357421875, "learning_rate": 1.979344582470823e-06, "loss": 29.625, "step": 9811 }, { "epoch": 0.09288060506810802, "grad_norm": 245.7512664794922, "learning_rate": 1.9793383829398457e-06, "loss": 21.0312, "step": 9812 }, { "epoch": 0.09289007108982308, "grad_norm": 316.44244384765625, "learning_rate": 1.9793321824883538e-06, "loss": 28.5, "step": 9813 }, { "epoch": 0.09289953711153813, "grad_norm": 494.3861083984375, "learning_rate": 1.979325981116354e-06, "loss": 18.6875, "step": 9814 }, { "epoch": 0.09290900313325319, "grad_norm": 272.5976867675781, "learning_rate": 1.9793197788238507e-06, "loss": 17.1211, "step": 9815 }, { "epoch": 0.09291846915496824, "grad_norm": 444.637939453125, "learning_rate": 1.9793135756108514e-06, "loss": 21.0703, "step": 9816 }, { "epoch": 0.0929279351766833, "grad_norm": 515.8795166015625, "learning_rate": 1.9793073714773607e-06, "loss": 22.457, "step": 9817 }, { "epoch": 0.09293740119839834, "grad_norm": 661.4091796875, "learning_rate": 1.9793011664233844e-06, "loss": 35.9492, "step": 9818 }, { "epoch": 0.0929468672201134, "grad_norm": 274.75390625, "learning_rate": 1.979294960448929e-06, "loss": 28.6016, "step": 9819 }, { "epoch": 0.09295633324182846, "grad_norm": 299.4797058105469, "learning_rate": 1.9792887535540002e-06, "loss": 18.625, "step": 9820 }, { "epoch": 0.09296579926354351, "grad_norm": 177.4140625, "learning_rate": 1.9792825457386034e-06, "loss": 19.8125, "step": 9821 }, { "epoch": 0.09297526528525857, "grad_norm": 605.3327026367188, "learning_rate": 1.979276337002745e-06, "loss": 45.5156, "step": 9822 }, { "epoch": 0.09298473130697361, "grad_norm": 176.46192932128906, "learning_rate": 1.97927012734643e-06, "loss": 20.3359, "step": 9823 }, { "epoch": 0.09299419732868867, "grad_norm": 524.7268676757812, "learning_rate": 1.9792639167696656e-06, "loss": 14.0469, "step": 9824 }, { "epoch": 0.09300366335040372, "grad_norm": 3.2733848094940186, "learning_rate": 1.9792577052724562e-06, "loss": 0.8525, "step": 9825 }, { "epoch": 0.09301312937211878, "grad_norm": 470.464599609375, "learning_rate": 1.9792514928548083e-06, "loss": 18.3359, "step": 9826 }, { "epoch": 0.09302259539383384, "grad_norm": 239.26205444335938, "learning_rate": 1.979245279516728e-06, "loss": 21.8438, "step": 9827 }, { "epoch": 0.09303206141554889, "grad_norm": 224.10159301757812, "learning_rate": 1.9792390652582207e-06, "loss": 18.2734, "step": 9828 }, { "epoch": 0.09304152743726395, "grad_norm": 276.6976013183594, "learning_rate": 1.979232850079293e-06, "loss": 18.6797, "step": 9829 }, { "epoch": 0.09305099345897899, "grad_norm": 434.7705383300781, "learning_rate": 1.979226633979949e-06, "loss": 25.8672, "step": 9830 }, { "epoch": 0.09306045948069405, "grad_norm": 466.1347961425781, "learning_rate": 1.979220416960196e-06, "loss": 33.9375, "step": 9831 }, { "epoch": 0.0930699255024091, "grad_norm": 440.1026611328125, "learning_rate": 1.97921419902004e-06, "loss": 22.7812, "step": 9832 }, { "epoch": 0.09307939152412416, "grad_norm": 213.80934143066406, "learning_rate": 1.979207980159486e-06, "loss": 21.1328, "step": 9833 }, { "epoch": 0.0930888575458392, "grad_norm": 377.20458984375, "learning_rate": 1.9792017603785404e-06, "loss": 49.3438, "step": 9834 }, { "epoch": 0.09309832356755426, "grad_norm": 1610.53759765625, "learning_rate": 1.9791955396772084e-06, "loss": 10.875, "step": 9835 }, { "epoch": 0.09310778958926932, "grad_norm": 178.00071716308594, "learning_rate": 1.979189318055497e-06, "loss": 17.7891, "step": 9836 }, { "epoch": 0.09311725561098437, "grad_norm": 263.30853271484375, "learning_rate": 1.9791830955134106e-06, "loss": 24.6172, "step": 9837 }, { "epoch": 0.09312672163269943, "grad_norm": 184.97491455078125, "learning_rate": 1.9791768720509562e-06, "loss": 19.1797, "step": 9838 }, { "epoch": 0.09313618765441448, "grad_norm": 156.04193115234375, "learning_rate": 1.979170647668139e-06, "loss": 15.8867, "step": 9839 }, { "epoch": 0.09314565367612954, "grad_norm": 761.3607177734375, "learning_rate": 1.9791644223649657e-06, "loss": 56.625, "step": 9840 }, { "epoch": 0.09315511969784458, "grad_norm": 186.2762908935547, "learning_rate": 1.9791581961414405e-06, "loss": 20.7734, "step": 9841 }, { "epoch": 0.09316458571955964, "grad_norm": 596.5491943359375, "learning_rate": 1.979151968997571e-06, "loss": 58.1562, "step": 9842 }, { "epoch": 0.09317405174127469, "grad_norm": 729.2365112304688, "learning_rate": 1.9791457409333627e-06, "loss": 31.3711, "step": 9843 }, { "epoch": 0.09318351776298975, "grad_norm": 406.8029479980469, "learning_rate": 1.97913951194882e-06, "loss": 25.4844, "step": 9844 }, { "epoch": 0.09319298378470481, "grad_norm": 241.2714080810547, "learning_rate": 1.979133282043951e-06, "loss": 17.6055, "step": 9845 }, { "epoch": 0.09320244980641985, "grad_norm": 406.24609375, "learning_rate": 1.9791270512187596e-06, "loss": 30.625, "step": 9846 }, { "epoch": 0.09321191582813491, "grad_norm": 230.16476440429688, "learning_rate": 1.9791208194732526e-06, "loss": 25.1875, "step": 9847 }, { "epoch": 0.09322138184984996, "grad_norm": 176.7630615234375, "learning_rate": 1.9791145868074363e-06, "loss": 26.4766, "step": 9848 }, { "epoch": 0.09323084787156502, "grad_norm": 474.333251953125, "learning_rate": 1.9791083532213153e-06, "loss": 30.25, "step": 9849 }, { "epoch": 0.09324031389328007, "grad_norm": 211.48951721191406, "learning_rate": 1.9791021187148967e-06, "loss": 21.4766, "step": 9850 }, { "epoch": 0.09324977991499513, "grad_norm": 266.3599853515625, "learning_rate": 1.9790958832881854e-06, "loss": 18.9609, "step": 9851 }, { "epoch": 0.09325924593671017, "grad_norm": 192.31561279296875, "learning_rate": 1.9790896469411876e-06, "loss": 16.9531, "step": 9852 }, { "epoch": 0.09326871195842523, "grad_norm": 388.3170166015625, "learning_rate": 1.9790834096739097e-06, "loss": 30.9531, "step": 9853 }, { "epoch": 0.09327817798014029, "grad_norm": 3.4114723205566406, "learning_rate": 1.9790771714863567e-06, "loss": 0.9966, "step": 9854 }, { "epoch": 0.09328764400185534, "grad_norm": 348.1025695800781, "learning_rate": 1.979070932378535e-06, "loss": 34.2578, "step": 9855 }, { "epoch": 0.0932971100235704, "grad_norm": 3.0652918815612793, "learning_rate": 1.97906469235045e-06, "loss": 0.9351, "step": 9856 }, { "epoch": 0.09330657604528544, "grad_norm": 802.1818237304688, "learning_rate": 1.979058451402108e-06, "loss": 63.9805, "step": 9857 }, { "epoch": 0.0933160420670005, "grad_norm": 584.939697265625, "learning_rate": 1.979052209533515e-06, "loss": 41.8125, "step": 9858 }, { "epoch": 0.09332550808871555, "grad_norm": 578.8598022460938, "learning_rate": 1.9790459667446768e-06, "loss": 53.1719, "step": 9859 }, { "epoch": 0.09333497411043061, "grad_norm": 2247.509765625, "learning_rate": 1.9790397230355985e-06, "loss": 14.0039, "step": 9860 }, { "epoch": 0.09334444013214566, "grad_norm": 239.22776794433594, "learning_rate": 1.979033478406287e-06, "loss": 20.3125, "step": 9861 }, { "epoch": 0.09335390615386072, "grad_norm": 511.43280029296875, "learning_rate": 1.9790272328567477e-06, "loss": 27.4375, "step": 9862 }, { "epoch": 0.09336337217557578, "grad_norm": 314.92962646484375, "learning_rate": 1.9790209863869866e-06, "loss": 18.4688, "step": 9863 }, { "epoch": 0.09337283819729082, "grad_norm": 235.13534545898438, "learning_rate": 1.979014738997009e-06, "loss": 22.3906, "step": 9864 }, { "epoch": 0.09338230421900588, "grad_norm": 793.1541137695312, "learning_rate": 1.9790084906868214e-06, "loss": 34.7656, "step": 9865 }, { "epoch": 0.09339177024072093, "grad_norm": 220.02383422851562, "learning_rate": 1.9790022414564296e-06, "loss": 21.9844, "step": 9866 }, { "epoch": 0.09340123626243599, "grad_norm": 405.81500244140625, "learning_rate": 1.9789959913058394e-06, "loss": 26.0, "step": 9867 }, { "epoch": 0.09341070228415103, "grad_norm": 260.3953857421875, "learning_rate": 1.978989740235057e-06, "loss": 31.3906, "step": 9868 }, { "epoch": 0.0934201683058661, "grad_norm": 224.79229736328125, "learning_rate": 1.978983488244087e-06, "loss": 24.1094, "step": 9869 }, { "epoch": 0.09342963432758115, "grad_norm": 852.2725830078125, "learning_rate": 1.9789772353329366e-06, "loss": 63.2031, "step": 9870 }, { "epoch": 0.0934391003492962, "grad_norm": 175.42343139648438, "learning_rate": 1.9789709815016118e-06, "loss": 17.1992, "step": 9871 }, { "epoch": 0.09344856637101126, "grad_norm": 161.52371215820312, "learning_rate": 1.9789647267501173e-06, "loss": 19.3281, "step": 9872 }, { "epoch": 0.0934580323927263, "grad_norm": 243.2836456298828, "learning_rate": 1.97895847107846e-06, "loss": 15.8828, "step": 9873 }, { "epoch": 0.09346749841444137, "grad_norm": 281.77191162109375, "learning_rate": 1.978952214486645e-06, "loss": 24.7031, "step": 9874 }, { "epoch": 0.09347696443615641, "grad_norm": 276.2549133300781, "learning_rate": 1.978945956974679e-06, "loss": 9.7578, "step": 9875 }, { "epoch": 0.09348643045787147, "grad_norm": 307.3795166015625, "learning_rate": 1.978939698542568e-06, "loss": 18.6953, "step": 9876 }, { "epoch": 0.09349589647958652, "grad_norm": 645.7089233398438, "learning_rate": 1.9789334391903165e-06, "loss": 28.2109, "step": 9877 }, { "epoch": 0.09350536250130158, "grad_norm": 477.6346435546875, "learning_rate": 1.9789271789179316e-06, "loss": 40.1094, "step": 9878 }, { "epoch": 0.09351482852301664, "grad_norm": 801.1768188476562, "learning_rate": 1.978920917725419e-06, "loss": 20.8047, "step": 9879 }, { "epoch": 0.09352429454473168, "grad_norm": 346.8614807128906, "learning_rate": 1.978914655612784e-06, "loss": 31.0469, "step": 9880 }, { "epoch": 0.09353376056644674, "grad_norm": 307.6673889160156, "learning_rate": 1.978908392580033e-06, "loss": 23.2266, "step": 9881 }, { "epoch": 0.09354322658816179, "grad_norm": 363.0068359375, "learning_rate": 1.978902128627172e-06, "loss": 52.8281, "step": 9882 }, { "epoch": 0.09355269260987685, "grad_norm": 558.7035522460938, "learning_rate": 1.9788958637542068e-06, "loss": 63.0312, "step": 9883 }, { "epoch": 0.0935621586315919, "grad_norm": 447.5443420410156, "learning_rate": 1.9788895979611428e-06, "loss": 51.5703, "step": 9884 }, { "epoch": 0.09357162465330696, "grad_norm": 311.1153564453125, "learning_rate": 1.9788833312479863e-06, "loss": 30.7188, "step": 9885 }, { "epoch": 0.093581090675022, "grad_norm": 377.9522399902344, "learning_rate": 1.978877063614743e-06, "loss": 20.5312, "step": 9886 }, { "epoch": 0.09359055669673706, "grad_norm": 2.7749695777893066, "learning_rate": 1.97887079506142e-06, "loss": 0.7898, "step": 9887 }, { "epoch": 0.09360002271845212, "grad_norm": 272.0263977050781, "learning_rate": 1.978864525588021e-06, "loss": 44.3125, "step": 9888 }, { "epoch": 0.09360948874016717, "grad_norm": 208.9983673095703, "learning_rate": 1.978858255194554e-06, "loss": 20.5703, "step": 9889 }, { "epoch": 0.09361895476188223, "grad_norm": 741.0450439453125, "learning_rate": 1.978851983881023e-06, "loss": 25.5156, "step": 9890 }, { "epoch": 0.09362842078359727, "grad_norm": 510.52508544921875, "learning_rate": 1.978845711647435e-06, "loss": 46.7578, "step": 9891 }, { "epoch": 0.09363788680531233, "grad_norm": 415.8538513183594, "learning_rate": 1.9788394384937958e-06, "loss": 41.9375, "step": 9892 }, { "epoch": 0.09364735282702738, "grad_norm": 309.2653503417969, "learning_rate": 1.978833164420111e-06, "loss": 29.5312, "step": 9893 }, { "epoch": 0.09365681884874244, "grad_norm": 632.9796142578125, "learning_rate": 1.9788268894263867e-06, "loss": 30.2891, "step": 9894 }, { "epoch": 0.09366628487045749, "grad_norm": 323.8116760253906, "learning_rate": 1.978820613512629e-06, "loss": 20.5938, "step": 9895 }, { "epoch": 0.09367575089217255, "grad_norm": 224.24293518066406, "learning_rate": 1.9788143366788443e-06, "loss": 18.6641, "step": 9896 }, { "epoch": 0.0936852169138876, "grad_norm": 250.4203643798828, "learning_rate": 1.978808058925037e-06, "loss": 11.1875, "step": 9897 }, { "epoch": 0.09369468293560265, "grad_norm": 462.9657287597656, "learning_rate": 1.9788017802512136e-06, "loss": 19.5625, "step": 9898 }, { "epoch": 0.09370414895731771, "grad_norm": 319.6261901855469, "learning_rate": 1.9787955006573808e-06, "loss": 25.6484, "step": 9899 }, { "epoch": 0.09371361497903276, "grad_norm": 503.3995361328125, "learning_rate": 1.9787892201435436e-06, "loss": 41.1406, "step": 9900 }, { "epoch": 0.09372308100074782, "grad_norm": 436.50360107421875, "learning_rate": 1.9787829387097083e-06, "loss": 21.8047, "step": 9901 }, { "epoch": 0.09373254702246286, "grad_norm": 444.47564697265625, "learning_rate": 1.9787766563558805e-06, "loss": 49.7188, "step": 9902 }, { "epoch": 0.09374201304417792, "grad_norm": 485.2574157714844, "learning_rate": 1.9787703730820665e-06, "loss": 38.6133, "step": 9903 }, { "epoch": 0.09375147906589297, "grad_norm": 204.71176147460938, "learning_rate": 1.9787640888882718e-06, "loss": 18.1328, "step": 9904 }, { "epoch": 0.09376094508760803, "grad_norm": 755.2777099609375, "learning_rate": 1.9787578037745027e-06, "loss": 82.4766, "step": 9905 }, { "epoch": 0.09377041110932309, "grad_norm": 427.2696838378906, "learning_rate": 1.9787515177407653e-06, "loss": 28.9531, "step": 9906 }, { "epoch": 0.09377987713103814, "grad_norm": 381.16754150390625, "learning_rate": 1.9787452307870645e-06, "loss": 26.2266, "step": 9907 }, { "epoch": 0.0937893431527532, "grad_norm": 3.0898449420928955, "learning_rate": 1.978738942913407e-06, "loss": 0.9766, "step": 9908 }, { "epoch": 0.09379880917446824, "grad_norm": 387.1581726074219, "learning_rate": 1.978732654119799e-06, "loss": 23.5781, "step": 9909 }, { "epoch": 0.0938082751961833, "grad_norm": 253.9192352294922, "learning_rate": 1.9787263644062456e-06, "loss": 23.5, "step": 9910 }, { "epoch": 0.09381774121789835, "grad_norm": 204.65267944335938, "learning_rate": 1.9787200737727533e-06, "loss": 19.0859, "step": 9911 }, { "epoch": 0.09382720723961341, "grad_norm": 373.2955322265625, "learning_rate": 1.9787137822193278e-06, "loss": 31.3281, "step": 9912 }, { "epoch": 0.09383667326132847, "grad_norm": 278.4083251953125, "learning_rate": 1.978707489745975e-06, "loss": 25.4688, "step": 9913 }, { "epoch": 0.09384613928304351, "grad_norm": 369.2250671386719, "learning_rate": 1.9787011963527006e-06, "loss": 20.1797, "step": 9914 }, { "epoch": 0.09385560530475857, "grad_norm": 964.0662231445312, "learning_rate": 1.978694902039511e-06, "loss": 53.625, "step": 9915 }, { "epoch": 0.09386507132647362, "grad_norm": 509.7144775390625, "learning_rate": 1.9786886068064115e-06, "loss": 45.7656, "step": 9916 }, { "epoch": 0.09387453734818868, "grad_norm": 824.7598266601562, "learning_rate": 1.9786823106534086e-06, "loss": 32.4297, "step": 9917 }, { "epoch": 0.09388400336990373, "grad_norm": 443.66961669921875, "learning_rate": 1.978676013580508e-06, "loss": 19.2656, "step": 9918 }, { "epoch": 0.09389346939161879, "grad_norm": 284.842041015625, "learning_rate": 1.9786697155877157e-06, "loss": 25.2734, "step": 9919 }, { "epoch": 0.09390293541333383, "grad_norm": 318.54193115234375, "learning_rate": 1.9786634166750375e-06, "loss": 22.7188, "step": 9920 }, { "epoch": 0.09391240143504889, "grad_norm": 412.5765075683594, "learning_rate": 1.9786571168424794e-06, "loss": 10.0469, "step": 9921 }, { "epoch": 0.09392186745676395, "grad_norm": 299.0815734863281, "learning_rate": 1.978650816090047e-06, "loss": 38.0469, "step": 9922 }, { "epoch": 0.093931333478479, "grad_norm": 279.6338195800781, "learning_rate": 1.978644514417747e-06, "loss": 25.2188, "step": 9923 }, { "epoch": 0.09394079950019406, "grad_norm": 459.0413818359375, "learning_rate": 1.978638211825584e-06, "loss": 44.3125, "step": 9924 }, { "epoch": 0.0939502655219091, "grad_norm": 300.9391784667969, "learning_rate": 1.9786319083135654e-06, "loss": 20.0312, "step": 9925 }, { "epoch": 0.09395973154362416, "grad_norm": 213.55482482910156, "learning_rate": 1.9786256038816963e-06, "loss": 20.6875, "step": 9926 }, { "epoch": 0.09396919756533921, "grad_norm": 427.4632263183594, "learning_rate": 1.978619298529983e-06, "loss": 15.9141, "step": 9927 }, { "epoch": 0.09397866358705427, "grad_norm": 295.53802490234375, "learning_rate": 1.9786129922584307e-06, "loss": 21.4609, "step": 9928 }, { "epoch": 0.09398812960876932, "grad_norm": 445.13519287109375, "learning_rate": 1.9786066850670465e-06, "loss": 40.6875, "step": 9929 }, { "epoch": 0.09399759563048438, "grad_norm": 256.2403259277344, "learning_rate": 1.9786003769558353e-06, "loss": 34.1016, "step": 9930 }, { "epoch": 0.09400706165219944, "grad_norm": 240.35830688476562, "learning_rate": 1.9785940679248035e-06, "loss": 31.9297, "step": 9931 }, { "epoch": 0.09401652767391448, "grad_norm": 321.9465637207031, "learning_rate": 1.978587757973957e-06, "loss": 18.4844, "step": 9932 }, { "epoch": 0.09402599369562954, "grad_norm": 690.6354370117188, "learning_rate": 1.9785814471033017e-06, "loss": 32.7344, "step": 9933 }, { "epoch": 0.09403545971734459, "grad_norm": 2.996217966079712, "learning_rate": 1.9785751353128433e-06, "loss": 0.8867, "step": 9934 }, { "epoch": 0.09404492573905965, "grad_norm": 425.3062438964844, "learning_rate": 1.978568822602588e-06, "loss": 37.0547, "step": 9935 }, { "epoch": 0.0940543917607747, "grad_norm": 393.1328430175781, "learning_rate": 1.9785625089725416e-06, "loss": 32.9219, "step": 9936 }, { "epoch": 0.09406385778248975, "grad_norm": 294.85009765625, "learning_rate": 1.9785561944227102e-06, "loss": 27.7344, "step": 9937 }, { "epoch": 0.0940733238042048, "grad_norm": 294.02862548828125, "learning_rate": 1.9785498789530998e-06, "loss": 34.9609, "step": 9938 }, { "epoch": 0.09408278982591986, "grad_norm": 1009.8809204101562, "learning_rate": 1.9785435625637157e-06, "loss": 41.2031, "step": 9939 }, { "epoch": 0.09409225584763492, "grad_norm": 559.0762939453125, "learning_rate": 1.9785372452545644e-06, "loss": 61.4219, "step": 9940 }, { "epoch": 0.09410172186934997, "grad_norm": 255.0807342529297, "learning_rate": 1.9785309270256522e-06, "loss": 15.4922, "step": 9941 }, { "epoch": 0.09411118789106503, "grad_norm": 537.6625366210938, "learning_rate": 1.9785246078769842e-06, "loss": 20.9375, "step": 9942 }, { "epoch": 0.09412065391278007, "grad_norm": 269.2287902832031, "learning_rate": 1.978518287808567e-06, "loss": 14.6875, "step": 9943 }, { "epoch": 0.09413011993449513, "grad_norm": 3.811760663986206, "learning_rate": 1.978511966820406e-06, "loss": 1.0659, "step": 9944 }, { "epoch": 0.09413958595621018, "grad_norm": 3.2331528663635254, "learning_rate": 1.9785056449125077e-06, "loss": 0.9336, "step": 9945 }, { "epoch": 0.09414905197792524, "grad_norm": 328.1126403808594, "learning_rate": 1.9784993220848774e-06, "loss": 32.9062, "step": 9946 }, { "epoch": 0.09415851799964028, "grad_norm": 766.6487426757812, "learning_rate": 1.9784929983375215e-06, "loss": 49.4531, "step": 9947 }, { "epoch": 0.09416798402135534, "grad_norm": 3.4066975116729736, "learning_rate": 1.978486673670446e-06, "loss": 0.9458, "step": 9948 }, { "epoch": 0.0941774500430704, "grad_norm": 267.7322998046875, "learning_rate": 1.978480348083657e-06, "loss": 38.9453, "step": 9949 }, { "epoch": 0.09418691606478545, "grad_norm": 998.2938232421875, "learning_rate": 1.9784740215771592e-06, "loss": 56.2031, "step": 9950 }, { "epoch": 0.09419638208650051, "grad_norm": 3.1486639976501465, "learning_rate": 1.9784676941509604e-06, "loss": 0.8584, "step": 9951 }, { "epoch": 0.09420584810821556, "grad_norm": 485.8135681152344, "learning_rate": 1.978461365805065e-06, "loss": 21.9766, "step": 9952 }, { "epoch": 0.09421531412993062, "grad_norm": 354.8214416503906, "learning_rate": 1.9784550365394797e-06, "loss": 21.6094, "step": 9953 }, { "epoch": 0.09422478015164566, "grad_norm": 520.8807373046875, "learning_rate": 1.9784487063542104e-06, "loss": 34.8125, "step": 9954 }, { "epoch": 0.09423424617336072, "grad_norm": 233.48196411132812, "learning_rate": 1.9784423752492632e-06, "loss": 22.4688, "step": 9955 }, { "epoch": 0.09424371219507578, "grad_norm": 486.7530822753906, "learning_rate": 1.9784360432246437e-06, "loss": 33.9219, "step": 9956 }, { "epoch": 0.09425317821679083, "grad_norm": 348.7976989746094, "learning_rate": 1.9784297102803577e-06, "loss": 19.4766, "step": 9957 }, { "epoch": 0.09426264423850589, "grad_norm": 199.7154541015625, "learning_rate": 1.9784233764164116e-06, "loss": 24.5703, "step": 9958 }, { "epoch": 0.09427211026022093, "grad_norm": 981.094970703125, "learning_rate": 1.9784170416328114e-06, "loss": 28.5625, "step": 9959 }, { "epoch": 0.094281576281936, "grad_norm": 225.95791625976562, "learning_rate": 1.9784107059295625e-06, "loss": 17.0156, "step": 9960 }, { "epoch": 0.09429104230365104, "grad_norm": 636.669921875, "learning_rate": 1.9784043693066713e-06, "loss": 37.9219, "step": 9961 }, { "epoch": 0.0943005083253661, "grad_norm": 256.5860595703125, "learning_rate": 1.9783980317641437e-06, "loss": 23.7812, "step": 9962 }, { "epoch": 0.09430997434708115, "grad_norm": 222.3970489501953, "learning_rate": 1.9783916933019853e-06, "loss": 23.0469, "step": 9963 }, { "epoch": 0.0943194403687962, "grad_norm": 219.58612060546875, "learning_rate": 1.9783853539202027e-06, "loss": 31.1875, "step": 9964 }, { "epoch": 0.09432890639051127, "grad_norm": 588.8779907226562, "learning_rate": 1.978379013618801e-06, "loss": 29.3594, "step": 9965 }, { "epoch": 0.09433837241222631, "grad_norm": 2.935382604598999, "learning_rate": 1.9783726723977874e-06, "loss": 0.9565, "step": 9966 }, { "epoch": 0.09434783843394137, "grad_norm": 392.7066955566406, "learning_rate": 1.978366330257167e-06, "loss": 35.3125, "step": 9967 }, { "epoch": 0.09435730445565642, "grad_norm": 194.5018310546875, "learning_rate": 1.9783599871969455e-06, "loss": 21.9844, "step": 9968 }, { "epoch": 0.09436677047737148, "grad_norm": 670.9620971679688, "learning_rate": 1.9783536432171293e-06, "loss": 17.5742, "step": 9969 }, { "epoch": 0.09437623649908652, "grad_norm": 2.9602293968200684, "learning_rate": 1.9783472983177246e-06, "loss": 0.7705, "step": 9970 }, { "epoch": 0.09438570252080158, "grad_norm": 144.0406951904297, "learning_rate": 1.978340952498737e-06, "loss": 21.1172, "step": 9971 }, { "epoch": 0.09439516854251663, "grad_norm": 381.0456848144531, "learning_rate": 1.9783346057601725e-06, "loss": 25.6953, "step": 9972 }, { "epoch": 0.09440463456423169, "grad_norm": 212.4360809326172, "learning_rate": 1.9783282581020374e-06, "loss": 11.6641, "step": 9973 }, { "epoch": 0.09441410058594675, "grad_norm": 338.9324035644531, "learning_rate": 1.978321909524337e-06, "loss": 38.1094, "step": 9974 }, { "epoch": 0.0944235666076618, "grad_norm": 296.6690979003906, "learning_rate": 1.978315560027078e-06, "loss": 16.5547, "step": 9975 }, { "epoch": 0.09443303262937686, "grad_norm": 337.0851745605469, "learning_rate": 1.9783092096102652e-06, "loss": 21.3125, "step": 9976 }, { "epoch": 0.0944424986510919, "grad_norm": 413.2040710449219, "learning_rate": 1.9783028582739065e-06, "loss": 41.3438, "step": 9977 }, { "epoch": 0.09445196467280696, "grad_norm": 167.70407104492188, "learning_rate": 1.978296506018006e-06, "loss": 15.0781, "step": 9978 }, { "epoch": 0.09446143069452201, "grad_norm": 189.37026977539062, "learning_rate": 1.978290152842571e-06, "loss": 21.6406, "step": 9979 }, { "epoch": 0.09447089671623707, "grad_norm": 394.66619873046875, "learning_rate": 1.978283798747606e-06, "loss": 46.3594, "step": 9980 }, { "epoch": 0.09448036273795211, "grad_norm": 914.3612670898438, "learning_rate": 1.9782774437331187e-06, "loss": 37.0, "step": 9981 }, { "epoch": 0.09448982875966717, "grad_norm": 487.1379089355469, "learning_rate": 1.9782710877991138e-06, "loss": 26.1328, "step": 9982 }, { "epoch": 0.09449929478138223, "grad_norm": 635.7474975585938, "learning_rate": 1.978264730945598e-06, "loss": 55.9062, "step": 9983 }, { "epoch": 0.09450876080309728, "grad_norm": 371.2217712402344, "learning_rate": 1.9782583731725765e-06, "loss": 21.0156, "step": 9984 }, { "epoch": 0.09451822682481234, "grad_norm": 205.2873992919922, "learning_rate": 1.9782520144800563e-06, "loss": 16.3008, "step": 9985 }, { "epoch": 0.09452769284652739, "grad_norm": 501.9367980957031, "learning_rate": 1.978245654868043e-06, "loss": 45.707, "step": 9986 }, { "epoch": 0.09453715886824245, "grad_norm": 474.3633117675781, "learning_rate": 1.978239294336542e-06, "loss": 43.1562, "step": 9987 }, { "epoch": 0.09454662488995749, "grad_norm": 470.89031982421875, "learning_rate": 1.9782329328855594e-06, "loss": 48.6562, "step": 9988 }, { "epoch": 0.09455609091167255, "grad_norm": 299.90972900390625, "learning_rate": 1.9782265705151023e-06, "loss": 26.5, "step": 9989 }, { "epoch": 0.0945655569333876, "grad_norm": 224.48031616210938, "learning_rate": 1.9782202072251753e-06, "loss": 18.7578, "step": 9990 }, { "epoch": 0.09457502295510266, "grad_norm": 536.8344116210938, "learning_rate": 1.978213843015785e-06, "loss": 41.8281, "step": 9991 }, { "epoch": 0.09458448897681772, "grad_norm": 331.03155517578125, "learning_rate": 1.978207477886938e-06, "loss": 17.0938, "step": 9992 }, { "epoch": 0.09459395499853276, "grad_norm": 521.6146850585938, "learning_rate": 1.9782011118386387e-06, "loss": 16.7891, "step": 9993 }, { "epoch": 0.09460342102024782, "grad_norm": 304.19012451171875, "learning_rate": 1.9781947448708942e-06, "loss": 16.9219, "step": 9994 }, { "epoch": 0.09461288704196287, "grad_norm": 178.88265991210938, "learning_rate": 1.9781883769837103e-06, "loss": 20.5312, "step": 9995 }, { "epoch": 0.09462235306367793, "grad_norm": 168.55499267578125, "learning_rate": 1.9781820081770933e-06, "loss": 22.3203, "step": 9996 }, { "epoch": 0.09463181908539298, "grad_norm": 480.4566955566406, "learning_rate": 1.9781756384510488e-06, "loss": 28.8828, "step": 9997 }, { "epoch": 0.09464128510710804, "grad_norm": 548.2232666015625, "learning_rate": 1.9781692678055825e-06, "loss": 43.2656, "step": 9998 }, { "epoch": 0.0946507511288231, "grad_norm": 349.5177917480469, "learning_rate": 1.978162896240701e-06, "loss": 23.7344, "step": 9999 }, { "epoch": 0.09466021715053814, "grad_norm": 518.6321411132812, "learning_rate": 1.9781565237564096e-06, "loss": 19.0312, "step": 10000 }, { "epoch": 0.0946696831722532, "grad_norm": 292.95159912109375, "learning_rate": 1.978150150352715e-06, "loss": 37.6172, "step": 10001 }, { "epoch": 0.09467914919396825, "grad_norm": 1163.2376708984375, "learning_rate": 1.978143776029623e-06, "loss": 54.8281, "step": 10002 }, { "epoch": 0.09468861521568331, "grad_norm": 407.2154235839844, "learning_rate": 1.978137400787139e-06, "loss": 21.8906, "step": 10003 }, { "epoch": 0.09469808123739835, "grad_norm": 281.943603515625, "learning_rate": 1.97813102462527e-06, "loss": 23.9688, "step": 10004 }, { "epoch": 0.09470754725911341, "grad_norm": 3.344081163406372, "learning_rate": 1.978124647544021e-06, "loss": 0.812, "step": 10005 }, { "epoch": 0.09471701328082846, "grad_norm": 213.2633056640625, "learning_rate": 1.978118269543399e-06, "loss": 20.25, "step": 10006 }, { "epoch": 0.09472647930254352, "grad_norm": 370.82110595703125, "learning_rate": 1.9781118906234094e-06, "loss": 22.2031, "step": 10007 }, { "epoch": 0.09473594532425858, "grad_norm": 438.2336120605469, "learning_rate": 1.978105510784058e-06, "loss": 31.8438, "step": 10008 }, { "epoch": 0.09474541134597363, "grad_norm": 398.14361572265625, "learning_rate": 1.978099130025351e-06, "loss": 41.4062, "step": 10009 }, { "epoch": 0.09475487736768869, "grad_norm": 2.7759616374969482, "learning_rate": 1.978092748347295e-06, "loss": 0.7622, "step": 10010 }, { "epoch": 0.09476434338940373, "grad_norm": 869.1846313476562, "learning_rate": 1.978086365749895e-06, "loss": 32.1562, "step": 10011 }, { "epoch": 0.09477380941111879, "grad_norm": 427.522216796875, "learning_rate": 1.978079982233157e-06, "loss": 48.0312, "step": 10012 }, { "epoch": 0.09478327543283384, "grad_norm": 530.4539794921875, "learning_rate": 1.978073597797088e-06, "loss": 55.2344, "step": 10013 }, { "epoch": 0.0947927414545489, "grad_norm": 853.9423217773438, "learning_rate": 1.9780672124416936e-06, "loss": 45.1484, "step": 10014 }, { "epoch": 0.09480220747626394, "grad_norm": 321.0423583984375, "learning_rate": 1.978060826166979e-06, "loss": 28.4219, "step": 10015 }, { "epoch": 0.094811673497979, "grad_norm": 678.094482421875, "learning_rate": 1.9780544389729514e-06, "loss": 43.5, "step": 10016 }, { "epoch": 0.09482113951969406, "grad_norm": 454.8870849609375, "learning_rate": 1.9780480508596164e-06, "loss": 34.1875, "step": 10017 }, { "epoch": 0.09483060554140911, "grad_norm": 509.7919616699219, "learning_rate": 1.9780416618269796e-06, "loss": 24.5469, "step": 10018 }, { "epoch": 0.09484007156312417, "grad_norm": 259.4820251464844, "learning_rate": 1.978035271875047e-06, "loss": 25.4609, "step": 10019 }, { "epoch": 0.09484953758483922, "grad_norm": 3.7149767875671387, "learning_rate": 1.978028881003825e-06, "loss": 0.9604, "step": 10020 }, { "epoch": 0.09485900360655428, "grad_norm": 611.75341796875, "learning_rate": 1.97802248921332e-06, "loss": 31.8203, "step": 10021 }, { "epoch": 0.09486846962826932, "grad_norm": 2.648486375808716, "learning_rate": 1.978016096503537e-06, "loss": 0.8774, "step": 10022 }, { "epoch": 0.09487793564998438, "grad_norm": 652.892333984375, "learning_rate": 1.978009702874482e-06, "loss": 50.8125, "step": 10023 }, { "epoch": 0.09488740167169943, "grad_norm": 208.9322509765625, "learning_rate": 1.9780033083261624e-06, "loss": 18.8281, "step": 10024 }, { "epoch": 0.09489686769341449, "grad_norm": 234.25628662109375, "learning_rate": 1.977996912858583e-06, "loss": 22.4219, "step": 10025 }, { "epoch": 0.09490633371512955, "grad_norm": 662.9414672851562, "learning_rate": 1.9779905164717498e-06, "loss": 34.9062, "step": 10026 }, { "epoch": 0.0949157997368446, "grad_norm": 901.8578491210938, "learning_rate": 1.9779841191656695e-06, "loss": 32.3281, "step": 10027 }, { "epoch": 0.09492526575855965, "grad_norm": 662.2747802734375, "learning_rate": 1.977977720940348e-06, "loss": 33.1602, "step": 10028 }, { "epoch": 0.0949347317802747, "grad_norm": 491.9393005371094, "learning_rate": 1.9779713217957904e-06, "loss": 50.5469, "step": 10029 }, { "epoch": 0.09494419780198976, "grad_norm": 415.9501037597656, "learning_rate": 1.977964921732004e-06, "loss": 52.9844, "step": 10030 }, { "epoch": 0.0949536638237048, "grad_norm": 2.2428982257843018, "learning_rate": 1.977958520748994e-06, "loss": 0.8091, "step": 10031 }, { "epoch": 0.09496312984541987, "grad_norm": 1308.3697509765625, "learning_rate": 1.9779521188467662e-06, "loss": 69.4766, "step": 10032 }, { "epoch": 0.09497259586713491, "grad_norm": 328.7098388671875, "learning_rate": 1.9779457160253275e-06, "loss": 24.2031, "step": 10033 }, { "epoch": 0.09498206188884997, "grad_norm": 355.07696533203125, "learning_rate": 1.977939312284683e-06, "loss": 31.8906, "step": 10034 }, { "epoch": 0.09499152791056503, "grad_norm": 381.4848327636719, "learning_rate": 1.9779329076248395e-06, "loss": 30.1094, "step": 10035 }, { "epoch": 0.09500099393228008, "grad_norm": 660.3407592773438, "learning_rate": 1.977926502045803e-06, "loss": 24.6406, "step": 10036 }, { "epoch": 0.09501045995399514, "grad_norm": 1022.7686767578125, "learning_rate": 1.9779200955475787e-06, "loss": 86.6875, "step": 10037 }, { "epoch": 0.09501992597571018, "grad_norm": 143.43589782714844, "learning_rate": 1.977913688130173e-06, "loss": 23.9922, "step": 10038 }, { "epoch": 0.09502939199742524, "grad_norm": 646.296142578125, "learning_rate": 1.9779072797935928e-06, "loss": 29.8672, "step": 10039 }, { "epoch": 0.09503885801914029, "grad_norm": 259.0724792480469, "learning_rate": 1.9779008705378428e-06, "loss": 22.9453, "step": 10040 }, { "epoch": 0.09504832404085535, "grad_norm": 671.2141723632812, "learning_rate": 1.9778944603629294e-06, "loss": 43.5781, "step": 10041 }, { "epoch": 0.09505779006257041, "grad_norm": 276.0722351074219, "learning_rate": 1.9778880492688596e-06, "loss": 16.25, "step": 10042 }, { "epoch": 0.09506725608428546, "grad_norm": 420.24859619140625, "learning_rate": 1.977881637255638e-06, "loss": 27.2344, "step": 10043 }, { "epoch": 0.09507672210600052, "grad_norm": 400.2782897949219, "learning_rate": 1.9778752243232715e-06, "loss": 27.0781, "step": 10044 }, { "epoch": 0.09508618812771556, "grad_norm": 319.6298522949219, "learning_rate": 1.977868810471766e-06, "loss": 21.0859, "step": 10045 }, { "epoch": 0.09509565414943062, "grad_norm": 259.08172607421875, "learning_rate": 1.977862395701127e-06, "loss": 22.2188, "step": 10046 }, { "epoch": 0.09510512017114567, "grad_norm": 297.62109375, "learning_rate": 1.9778559800113614e-06, "loss": 25.75, "step": 10047 }, { "epoch": 0.09511458619286073, "grad_norm": 389.01165771484375, "learning_rate": 1.9778495634024747e-06, "loss": 20.6953, "step": 10048 }, { "epoch": 0.09512405221457577, "grad_norm": 284.9661560058594, "learning_rate": 1.977843145874473e-06, "loss": 16.8828, "step": 10049 }, { "epoch": 0.09513351823629083, "grad_norm": 261.7856750488281, "learning_rate": 1.9778367274273623e-06, "loss": 24.9922, "step": 10050 }, { "epoch": 0.0951429842580059, "grad_norm": 503.2874450683594, "learning_rate": 1.9778303080611483e-06, "loss": 33.0781, "step": 10051 }, { "epoch": 0.09515245027972094, "grad_norm": 412.4489440917969, "learning_rate": 1.977823887775838e-06, "loss": 40.6562, "step": 10052 }, { "epoch": 0.095161916301436, "grad_norm": 246.4237060546875, "learning_rate": 1.9778174665714368e-06, "loss": 26.0312, "step": 10053 }, { "epoch": 0.09517138232315105, "grad_norm": 221.5636749267578, "learning_rate": 1.9778110444479506e-06, "loss": 29.9062, "step": 10054 }, { "epoch": 0.0951808483448661, "grad_norm": 502.3572082519531, "learning_rate": 1.977804621405386e-06, "loss": 39.1406, "step": 10055 }, { "epoch": 0.09519031436658115, "grad_norm": 307.1854248046875, "learning_rate": 1.9777981974437483e-06, "loss": 16.0781, "step": 10056 }, { "epoch": 0.09519978038829621, "grad_norm": 382.7138977050781, "learning_rate": 1.977791772563044e-06, "loss": 38.9922, "step": 10057 }, { "epoch": 0.09520924641001126, "grad_norm": 373.169921875, "learning_rate": 1.977785346763279e-06, "loss": 44.7969, "step": 10058 }, { "epoch": 0.09521871243172632, "grad_norm": 486.9596862792969, "learning_rate": 1.9777789200444596e-06, "loss": 28.1094, "step": 10059 }, { "epoch": 0.09522817845344138, "grad_norm": 560.360107421875, "learning_rate": 1.977772492406591e-06, "loss": 16.8516, "step": 10060 }, { "epoch": 0.09523764447515642, "grad_norm": 440.0010681152344, "learning_rate": 1.9777660638496805e-06, "loss": 39.8203, "step": 10061 }, { "epoch": 0.09524711049687148, "grad_norm": 519.1611328125, "learning_rate": 1.9777596343737332e-06, "loss": 43.2188, "step": 10062 }, { "epoch": 0.09525657651858653, "grad_norm": 649.6676025390625, "learning_rate": 1.977753203978756e-06, "loss": 35.4844, "step": 10063 }, { "epoch": 0.09526604254030159, "grad_norm": 223.1390380859375, "learning_rate": 1.9777467726647535e-06, "loss": 21.6562, "step": 10064 }, { "epoch": 0.09527550856201664, "grad_norm": 2.735426664352417, "learning_rate": 1.977740340431733e-06, "loss": 0.8384, "step": 10065 }, { "epoch": 0.0952849745837317, "grad_norm": 532.4666748046875, "learning_rate": 1.9777339072797004e-06, "loss": 35.4844, "step": 10066 }, { "epoch": 0.09529444060544674, "grad_norm": 451.3540344238281, "learning_rate": 1.9777274732086617e-06, "loss": 43.0625, "step": 10067 }, { "epoch": 0.0953039066271618, "grad_norm": 268.7657775878906, "learning_rate": 1.9777210382186226e-06, "loss": 24.5703, "step": 10068 }, { "epoch": 0.09531337264887686, "grad_norm": 604.065185546875, "learning_rate": 1.977714602309589e-06, "loss": 7.5312, "step": 10069 }, { "epoch": 0.09532283867059191, "grad_norm": 550.9915771484375, "learning_rate": 1.977708165481568e-06, "loss": 43.0391, "step": 10070 }, { "epoch": 0.09533230469230697, "grad_norm": 572.9027099609375, "learning_rate": 1.977701727734564e-06, "loss": 35.8125, "step": 10071 }, { "epoch": 0.09534177071402201, "grad_norm": 730.685791015625, "learning_rate": 1.977695289068584e-06, "loss": 49.9688, "step": 10072 }, { "epoch": 0.09535123673573707, "grad_norm": 485.31219482421875, "learning_rate": 1.977688849483635e-06, "loss": 49.6406, "step": 10073 }, { "epoch": 0.09536070275745212, "grad_norm": 664.1962280273438, "learning_rate": 1.9776824089797214e-06, "loss": 39.9688, "step": 10074 }, { "epoch": 0.09537016877916718, "grad_norm": 395.1653747558594, "learning_rate": 1.9776759675568504e-06, "loss": 29.5234, "step": 10075 }, { "epoch": 0.09537963480088223, "grad_norm": 408.0954284667969, "learning_rate": 1.977669525215027e-06, "loss": 39.4375, "step": 10076 }, { "epoch": 0.09538910082259729, "grad_norm": 192.48715209960938, "learning_rate": 1.977663081954258e-06, "loss": 26.5547, "step": 10077 }, { "epoch": 0.09539856684431235, "grad_norm": 702.787353515625, "learning_rate": 1.9776566377745497e-06, "loss": 39.0, "step": 10078 }, { "epoch": 0.09540803286602739, "grad_norm": 549.51904296875, "learning_rate": 1.977650192675907e-06, "loss": 24.5938, "step": 10079 }, { "epoch": 0.09541749888774245, "grad_norm": 704.75537109375, "learning_rate": 1.9776437466583373e-06, "loss": 37.6875, "step": 10080 }, { "epoch": 0.0954269649094575, "grad_norm": 347.5857238769531, "learning_rate": 1.977637299721846e-06, "loss": 27.7109, "step": 10081 }, { "epoch": 0.09543643093117256, "grad_norm": 236.8549346923828, "learning_rate": 1.9776308518664394e-06, "loss": 17.0781, "step": 10082 }, { "epoch": 0.0954458969528876, "grad_norm": 433.4521179199219, "learning_rate": 1.9776244030921233e-06, "loss": 33.625, "step": 10083 }, { "epoch": 0.09545536297460266, "grad_norm": 615.6995849609375, "learning_rate": 1.9776179533989035e-06, "loss": 59.2188, "step": 10084 }, { "epoch": 0.09546482899631772, "grad_norm": 364.0931396484375, "learning_rate": 1.977611502786787e-06, "loss": 36.4375, "step": 10085 }, { "epoch": 0.09547429501803277, "grad_norm": 226.42694091796875, "learning_rate": 1.9776050512557787e-06, "loss": 14.9883, "step": 10086 }, { "epoch": 0.09548376103974783, "grad_norm": 385.0859375, "learning_rate": 1.9775985988058855e-06, "loss": 27.0938, "step": 10087 }, { "epoch": 0.09549322706146288, "grad_norm": 873.1299438476562, "learning_rate": 1.9775921454371136e-06, "loss": 61.4531, "step": 10088 }, { "epoch": 0.09550269308317794, "grad_norm": 441.3066101074219, "learning_rate": 1.977585691149468e-06, "loss": 37.0, "step": 10089 }, { "epoch": 0.09551215910489298, "grad_norm": 435.6531982421875, "learning_rate": 1.9775792359429557e-06, "loss": 27.6094, "step": 10090 }, { "epoch": 0.09552162512660804, "grad_norm": 479.2162170410156, "learning_rate": 1.977572779817583e-06, "loss": 41.5391, "step": 10091 }, { "epoch": 0.09553109114832309, "grad_norm": 329.98065185546875, "learning_rate": 1.9775663227733546e-06, "loss": 20.6719, "step": 10092 }, { "epoch": 0.09554055717003815, "grad_norm": 423.5835876464844, "learning_rate": 1.977559864810278e-06, "loss": 57.9375, "step": 10093 }, { "epoch": 0.09555002319175321, "grad_norm": 453.1203918457031, "learning_rate": 1.977553405928359e-06, "loss": 33.3984, "step": 10094 }, { "epoch": 0.09555948921346825, "grad_norm": 582.3023681640625, "learning_rate": 1.9775469461276025e-06, "loss": 53.0938, "step": 10095 }, { "epoch": 0.09556895523518331, "grad_norm": 430.0511169433594, "learning_rate": 1.977540485408016e-06, "loss": 43.0625, "step": 10096 }, { "epoch": 0.09557842125689836, "grad_norm": 828.994873046875, "learning_rate": 1.9775340237696052e-06, "loss": 48.5312, "step": 10097 }, { "epoch": 0.09558788727861342, "grad_norm": 367.1300354003906, "learning_rate": 1.9775275612123758e-06, "loss": 20.3594, "step": 10098 }, { "epoch": 0.09559735330032847, "grad_norm": 2.951322555541992, "learning_rate": 1.9775210977363345e-06, "loss": 0.9575, "step": 10099 }, { "epoch": 0.09560681932204353, "grad_norm": 578.4172973632812, "learning_rate": 1.9775146333414866e-06, "loss": 62.2344, "step": 10100 }, { "epoch": 0.09561628534375857, "grad_norm": 296.2647399902344, "learning_rate": 1.9775081680278383e-06, "loss": 35.2344, "step": 10101 }, { "epoch": 0.09562575136547363, "grad_norm": 174.0088653564453, "learning_rate": 1.977501701795396e-06, "loss": 30.4688, "step": 10102 }, { "epoch": 0.09563521738718869, "grad_norm": 407.6733703613281, "learning_rate": 1.977495234644166e-06, "loss": 37.3594, "step": 10103 }, { "epoch": 0.09564468340890374, "grad_norm": 638.0780029296875, "learning_rate": 1.977488766574154e-06, "loss": 43.5391, "step": 10104 }, { "epoch": 0.0956541494306188, "grad_norm": 307.7032775878906, "learning_rate": 1.9774822975853656e-06, "loss": 18.8164, "step": 10105 }, { "epoch": 0.09566361545233384, "grad_norm": 325.3033752441406, "learning_rate": 1.9774758276778083e-06, "loss": 16.1875, "step": 10106 }, { "epoch": 0.0956730814740489, "grad_norm": 687.14892578125, "learning_rate": 1.9774693568514866e-06, "loss": 29.375, "step": 10107 }, { "epoch": 0.09568254749576395, "grad_norm": 439.8672180175781, "learning_rate": 1.9774628851064077e-06, "loss": 35.875, "step": 10108 }, { "epoch": 0.09569201351747901, "grad_norm": 3.1484732627868652, "learning_rate": 1.9774564124425772e-06, "loss": 1.0366, "step": 10109 }, { "epoch": 0.09570147953919406, "grad_norm": 253.1479034423828, "learning_rate": 1.977449938860001e-06, "loss": 28.0391, "step": 10110 }, { "epoch": 0.09571094556090912, "grad_norm": 223.69471740722656, "learning_rate": 1.977443464358686e-06, "loss": 21.5938, "step": 10111 }, { "epoch": 0.09572041158262418, "grad_norm": 699.9251708984375, "learning_rate": 1.9774369889386373e-06, "loss": 37.1719, "step": 10112 }, { "epoch": 0.09572987760433922, "grad_norm": 345.5001220703125, "learning_rate": 1.9774305125998616e-06, "loss": 22.9766, "step": 10113 }, { "epoch": 0.09573934362605428, "grad_norm": 493.0315856933594, "learning_rate": 1.9774240353423647e-06, "loss": 51.0469, "step": 10114 }, { "epoch": 0.09574880964776933, "grad_norm": 406.1888122558594, "learning_rate": 1.9774175571661527e-06, "loss": 33.6094, "step": 10115 }, { "epoch": 0.09575827566948439, "grad_norm": 258.1922607421875, "learning_rate": 1.9774110780712317e-06, "loss": 9.8242, "step": 10116 }, { "epoch": 0.09576774169119943, "grad_norm": 375.6810607910156, "learning_rate": 1.9774045980576083e-06, "loss": 39.8906, "step": 10117 }, { "epoch": 0.0957772077129145, "grad_norm": 373.4457092285156, "learning_rate": 1.9773981171252878e-06, "loss": 32.8438, "step": 10118 }, { "epoch": 0.09578667373462954, "grad_norm": 437.98419189453125, "learning_rate": 1.9773916352742766e-06, "loss": 23.9922, "step": 10119 }, { "epoch": 0.0957961397563446, "grad_norm": 218.09103393554688, "learning_rate": 1.977385152504581e-06, "loss": 17.8828, "step": 10120 }, { "epoch": 0.09580560577805966, "grad_norm": 2.469261407852173, "learning_rate": 1.977378668816207e-06, "loss": 0.9097, "step": 10121 }, { "epoch": 0.0958150717997747, "grad_norm": 292.5363464355469, "learning_rate": 1.9773721842091606e-06, "loss": 22.1875, "step": 10122 }, { "epoch": 0.09582453782148977, "grad_norm": 641.5153198242188, "learning_rate": 1.9773656986834482e-06, "loss": 53.4961, "step": 10123 }, { "epoch": 0.09583400384320481, "grad_norm": 638.8525390625, "learning_rate": 1.9773592122390752e-06, "loss": 19.5234, "step": 10124 }, { "epoch": 0.09584346986491987, "grad_norm": 380.65447998046875, "learning_rate": 1.977352724876048e-06, "loss": 25.4922, "step": 10125 }, { "epoch": 0.09585293588663492, "grad_norm": 156.1951446533203, "learning_rate": 1.9773462365943733e-06, "loss": 18.4453, "step": 10126 }, { "epoch": 0.09586240190834998, "grad_norm": 311.4060974121094, "learning_rate": 1.9773397473940566e-06, "loss": 21.8281, "step": 10127 }, { "epoch": 0.09587186793006504, "grad_norm": 329.5367736816406, "learning_rate": 1.977333257275104e-06, "loss": 37.6406, "step": 10128 }, { "epoch": 0.09588133395178008, "grad_norm": 424.7969970703125, "learning_rate": 1.977326766237522e-06, "loss": 38.9453, "step": 10129 }, { "epoch": 0.09589079997349514, "grad_norm": 265.6376037597656, "learning_rate": 1.977320274281316e-06, "loss": 24.0469, "step": 10130 }, { "epoch": 0.09590026599521019, "grad_norm": 381.9410400390625, "learning_rate": 1.977313781406493e-06, "loss": 25.4688, "step": 10131 }, { "epoch": 0.09590973201692525, "grad_norm": 227.8025665283203, "learning_rate": 1.977307287613058e-06, "loss": 33.2109, "step": 10132 }, { "epoch": 0.0959191980386403, "grad_norm": 4.259149074554443, "learning_rate": 1.9773007929010178e-06, "loss": 1.061, "step": 10133 }, { "epoch": 0.09592866406035536, "grad_norm": 212.0712127685547, "learning_rate": 1.977294297270379e-06, "loss": 16.8203, "step": 10134 }, { "epoch": 0.0959381300820704, "grad_norm": 1055.3990478515625, "learning_rate": 1.977287800721147e-06, "loss": 55.9453, "step": 10135 }, { "epoch": 0.09594759610378546, "grad_norm": 328.3143615722656, "learning_rate": 1.9772813032533274e-06, "loss": 24.1133, "step": 10136 }, { "epoch": 0.09595706212550052, "grad_norm": 204.0445556640625, "learning_rate": 1.9772748048669274e-06, "loss": 24.3984, "step": 10137 }, { "epoch": 0.09596652814721557, "grad_norm": 822.7428588867188, "learning_rate": 1.977268305561953e-06, "loss": 27.3203, "step": 10138 }, { "epoch": 0.09597599416893063, "grad_norm": 3.1003026962280273, "learning_rate": 1.9772618053384095e-06, "loss": 0.9048, "step": 10139 }, { "epoch": 0.09598546019064567, "grad_norm": 461.15826416015625, "learning_rate": 1.9772553041963035e-06, "loss": 56.5703, "step": 10140 }, { "epoch": 0.09599492621236073, "grad_norm": 418.2093505859375, "learning_rate": 1.9772488021356414e-06, "loss": 33.1641, "step": 10141 }, { "epoch": 0.09600439223407578, "grad_norm": 236.4571533203125, "learning_rate": 1.9772422991564285e-06, "loss": 22.7344, "step": 10142 }, { "epoch": 0.09601385825579084, "grad_norm": 290.4241027832031, "learning_rate": 1.977235795258672e-06, "loss": 23.3516, "step": 10143 }, { "epoch": 0.09602332427750589, "grad_norm": 219.86947631835938, "learning_rate": 1.977229290442377e-06, "loss": 10.4961, "step": 10144 }, { "epoch": 0.09603279029922095, "grad_norm": 371.1127624511719, "learning_rate": 1.9772227847075503e-06, "loss": 33.3906, "step": 10145 }, { "epoch": 0.096042256320936, "grad_norm": 183.10922241210938, "learning_rate": 1.9772162780541973e-06, "loss": 21.9219, "step": 10146 }, { "epoch": 0.09605172234265105, "grad_norm": 265.3681945800781, "learning_rate": 1.977209770482325e-06, "loss": 21.8516, "step": 10147 }, { "epoch": 0.09606118836436611, "grad_norm": 320.51373291015625, "learning_rate": 1.977203261991939e-06, "loss": 28.5469, "step": 10148 }, { "epoch": 0.09607065438608116, "grad_norm": 195.042724609375, "learning_rate": 1.9771967525830454e-06, "loss": 24.5781, "step": 10149 }, { "epoch": 0.09608012040779622, "grad_norm": 188.8480987548828, "learning_rate": 1.9771902422556505e-06, "loss": 20.9219, "step": 10150 }, { "epoch": 0.09608958642951126, "grad_norm": 927.0878295898438, "learning_rate": 1.97718373100976e-06, "loss": 42.8281, "step": 10151 }, { "epoch": 0.09609905245122632, "grad_norm": 438.2912292480469, "learning_rate": 1.977177218845381e-06, "loss": 27.7773, "step": 10152 }, { "epoch": 0.09610851847294137, "grad_norm": 512.8429565429688, "learning_rate": 1.9771707057625188e-06, "loss": 34.2969, "step": 10153 }, { "epoch": 0.09611798449465643, "grad_norm": 451.39404296875, "learning_rate": 1.9771641917611795e-06, "loss": 19.0625, "step": 10154 }, { "epoch": 0.09612745051637149, "grad_norm": 781.2681884765625, "learning_rate": 1.9771576768413698e-06, "loss": 26.2969, "step": 10155 }, { "epoch": 0.09613691653808654, "grad_norm": 154.34764099121094, "learning_rate": 1.977151161003095e-06, "loss": 18.9141, "step": 10156 }, { "epoch": 0.0961463825598016, "grad_norm": 332.841064453125, "learning_rate": 1.977144644246362e-06, "loss": 29.3047, "step": 10157 }, { "epoch": 0.09615584858151664, "grad_norm": 418.6829528808594, "learning_rate": 1.9771381265711765e-06, "loss": 38.2031, "step": 10158 }, { "epoch": 0.0961653146032317, "grad_norm": 262.9169006347656, "learning_rate": 1.9771316079775447e-06, "loss": 26.1641, "step": 10159 }, { "epoch": 0.09617478062494675, "grad_norm": 665.6204833984375, "learning_rate": 1.9771250884654726e-06, "loss": 40.3281, "step": 10160 }, { "epoch": 0.09618424664666181, "grad_norm": 266.84619140625, "learning_rate": 1.9771185680349665e-06, "loss": 24.0781, "step": 10161 }, { "epoch": 0.09619371266837685, "grad_norm": 659.6334838867188, "learning_rate": 1.977112046686033e-06, "loss": 56.0469, "step": 10162 }, { "epoch": 0.09620317869009191, "grad_norm": 888.281005859375, "learning_rate": 1.977105524418677e-06, "loss": 13.8711, "step": 10163 }, { "epoch": 0.09621264471180697, "grad_norm": 513.14599609375, "learning_rate": 1.977099001232906e-06, "loss": 30.5938, "step": 10164 }, { "epoch": 0.09622211073352202, "grad_norm": 695.2803955078125, "learning_rate": 1.977092477128725e-06, "loss": 55.4297, "step": 10165 }, { "epoch": 0.09623157675523708, "grad_norm": 579.484619140625, "learning_rate": 1.9770859521061412e-06, "loss": 54.5, "step": 10166 }, { "epoch": 0.09624104277695213, "grad_norm": 600.60400390625, "learning_rate": 1.9770794261651598e-06, "loss": 68.9688, "step": 10167 }, { "epoch": 0.09625050879866719, "grad_norm": 977.4208374023438, "learning_rate": 1.9770728993057875e-06, "loss": 18.6172, "step": 10168 }, { "epoch": 0.09625997482038223, "grad_norm": 167.001220703125, "learning_rate": 1.97706637152803e-06, "loss": 21.4844, "step": 10169 }, { "epoch": 0.09626944084209729, "grad_norm": 596.1156005859375, "learning_rate": 1.977059842831894e-06, "loss": 50.5312, "step": 10170 }, { "epoch": 0.09627890686381235, "grad_norm": 546.8007202148438, "learning_rate": 1.977053313217385e-06, "loss": 23.9297, "step": 10171 }, { "epoch": 0.0962883728855274, "grad_norm": 371.7882995605469, "learning_rate": 1.9770467826845093e-06, "loss": 16.5547, "step": 10172 }, { "epoch": 0.09629783890724246, "grad_norm": 452.8056335449219, "learning_rate": 1.9770402512332736e-06, "loss": 29.8594, "step": 10173 }, { "epoch": 0.0963073049289575, "grad_norm": 3.31783390045166, "learning_rate": 1.9770337188636835e-06, "loss": 0.916, "step": 10174 }, { "epoch": 0.09631677095067256, "grad_norm": 328.3194580078125, "learning_rate": 1.977027185575745e-06, "loss": 28.4297, "step": 10175 }, { "epoch": 0.09632623697238761, "grad_norm": 928.585693359375, "learning_rate": 1.977020651369465e-06, "loss": 59.4922, "step": 10176 }, { "epoch": 0.09633570299410267, "grad_norm": 408.3916931152344, "learning_rate": 1.9770141162448487e-06, "loss": 51.5938, "step": 10177 }, { "epoch": 0.09634516901581772, "grad_norm": 564.3809814453125, "learning_rate": 1.977007580201903e-06, "loss": 25.9609, "step": 10178 }, { "epoch": 0.09635463503753278, "grad_norm": 1496.8934326171875, "learning_rate": 1.9770010432406335e-06, "loss": 40.6875, "step": 10179 }, { "epoch": 0.09636410105924784, "grad_norm": 554.2822875976562, "learning_rate": 1.9769945053610467e-06, "loss": 17.8008, "step": 10180 }, { "epoch": 0.09637356708096288, "grad_norm": 236.53863525390625, "learning_rate": 1.9769879665631482e-06, "loss": 27.3125, "step": 10181 }, { "epoch": 0.09638303310267794, "grad_norm": 514.442138671875, "learning_rate": 1.9769814268469453e-06, "loss": 41.2344, "step": 10182 }, { "epoch": 0.09639249912439299, "grad_norm": 940.480712890625, "learning_rate": 1.976974886212443e-06, "loss": 17.9727, "step": 10183 }, { "epoch": 0.09640196514610805, "grad_norm": 244.68309020996094, "learning_rate": 1.9769683446596483e-06, "loss": 22.082, "step": 10184 }, { "epoch": 0.0964114311678231, "grad_norm": 497.4513854980469, "learning_rate": 1.9769618021885665e-06, "loss": 30.2891, "step": 10185 }, { "epoch": 0.09642089718953815, "grad_norm": 301.9384460449219, "learning_rate": 1.976955258799204e-06, "loss": 20.8125, "step": 10186 }, { "epoch": 0.0964303632112532, "grad_norm": 385.69873046875, "learning_rate": 1.9769487144915675e-06, "loss": 20.7266, "step": 10187 }, { "epoch": 0.09643982923296826, "grad_norm": 385.4007873535156, "learning_rate": 1.9769421692656626e-06, "loss": 26.2031, "step": 10188 }, { "epoch": 0.09644929525468332, "grad_norm": 624.696044921875, "learning_rate": 1.9769356231214953e-06, "loss": 49.0625, "step": 10189 }, { "epoch": 0.09645876127639837, "grad_norm": 453.164794921875, "learning_rate": 1.9769290760590726e-06, "loss": 44.2656, "step": 10190 }, { "epoch": 0.09646822729811343, "grad_norm": 300.3827819824219, "learning_rate": 1.9769225280783997e-06, "loss": 17.9102, "step": 10191 }, { "epoch": 0.09647769331982847, "grad_norm": 2059.104736328125, "learning_rate": 1.9769159791794834e-06, "loss": 67.7891, "step": 10192 }, { "epoch": 0.09648715934154353, "grad_norm": 189.70501708984375, "learning_rate": 1.9769094293623297e-06, "loss": 21.6719, "step": 10193 }, { "epoch": 0.09649662536325858, "grad_norm": 492.15753173828125, "learning_rate": 1.9769028786269443e-06, "loss": 42.125, "step": 10194 }, { "epoch": 0.09650609138497364, "grad_norm": 248.82940673828125, "learning_rate": 1.976896326973334e-06, "loss": 30.3438, "step": 10195 }, { "epoch": 0.09651555740668868, "grad_norm": 330.384033203125, "learning_rate": 1.976889774401505e-06, "loss": 21.5234, "step": 10196 }, { "epoch": 0.09652502342840374, "grad_norm": 222.09324645996094, "learning_rate": 1.9768832209114627e-06, "loss": 13.9453, "step": 10197 }, { "epoch": 0.0965344894501188, "grad_norm": 250.0201873779297, "learning_rate": 1.976876666503214e-06, "loss": 30.8594, "step": 10198 }, { "epoch": 0.09654395547183385, "grad_norm": 1146.67138671875, "learning_rate": 1.976870111176765e-06, "loss": 44.8555, "step": 10199 }, { "epoch": 0.09655342149354891, "grad_norm": 2.9639101028442383, "learning_rate": 1.976863554932121e-06, "loss": 0.9492, "step": 10200 }, { "epoch": 0.09656288751526396, "grad_norm": 450.9801940917969, "learning_rate": 1.976856997769289e-06, "loss": 43.5469, "step": 10201 }, { "epoch": 0.09657235353697902, "grad_norm": 315.6958312988281, "learning_rate": 1.9768504396882752e-06, "loss": 55.3594, "step": 10202 }, { "epoch": 0.09658181955869406, "grad_norm": 370.0879821777344, "learning_rate": 1.9768438806890857e-06, "loss": 26.0312, "step": 10203 }, { "epoch": 0.09659128558040912, "grad_norm": 899.25830078125, "learning_rate": 1.9768373207717263e-06, "loss": 57.8047, "step": 10204 }, { "epoch": 0.09660075160212417, "grad_norm": 355.3663024902344, "learning_rate": 1.9768307599362032e-06, "loss": 17.6953, "step": 10205 }, { "epoch": 0.09661021762383923, "grad_norm": 213.57276916503906, "learning_rate": 1.976824198182523e-06, "loss": 23.8594, "step": 10206 }, { "epoch": 0.09661968364555429, "grad_norm": 432.8179016113281, "learning_rate": 1.9768176355106914e-06, "loss": 21.8438, "step": 10207 }, { "epoch": 0.09662914966726933, "grad_norm": 543.3475341796875, "learning_rate": 1.9768110719207145e-06, "loss": 26.3984, "step": 10208 }, { "epoch": 0.0966386156889844, "grad_norm": 215.85740661621094, "learning_rate": 1.9768045074125993e-06, "loss": 22.3125, "step": 10209 }, { "epoch": 0.09664808171069944, "grad_norm": 636.7998657226562, "learning_rate": 1.9767979419863516e-06, "loss": 55.375, "step": 10210 }, { "epoch": 0.0966575477324145, "grad_norm": 197.2063446044922, "learning_rate": 1.9767913756419765e-06, "loss": 22.4219, "step": 10211 }, { "epoch": 0.09666701375412955, "grad_norm": 323.0161437988281, "learning_rate": 1.976784808379482e-06, "loss": 46.1562, "step": 10212 }, { "epoch": 0.0966764797758446, "grad_norm": 634.7105102539062, "learning_rate": 1.9767782401988724e-06, "loss": 35.4375, "step": 10213 }, { "epoch": 0.09668594579755967, "grad_norm": 880.4744873046875, "learning_rate": 1.976771671100155e-06, "loss": 84.9688, "step": 10214 }, { "epoch": 0.09669541181927471, "grad_norm": 297.1836242675781, "learning_rate": 1.9767651010833364e-06, "loss": 19.5547, "step": 10215 }, { "epoch": 0.09670487784098977, "grad_norm": 396.7928771972656, "learning_rate": 1.9767585301484218e-06, "loss": 30.4375, "step": 10216 }, { "epoch": 0.09671434386270482, "grad_norm": 158.03799438476562, "learning_rate": 1.9767519582954174e-06, "loss": 21.125, "step": 10217 }, { "epoch": 0.09672380988441988, "grad_norm": 753.822509765625, "learning_rate": 1.97674538552433e-06, "loss": 54.4922, "step": 10218 }, { "epoch": 0.09673327590613492, "grad_norm": 265.46746826171875, "learning_rate": 1.9767388118351655e-06, "loss": 9.7578, "step": 10219 }, { "epoch": 0.09674274192784998, "grad_norm": 768.5626220703125, "learning_rate": 1.9767322372279302e-06, "loss": 43.2812, "step": 10220 }, { "epoch": 0.09675220794956503, "grad_norm": 1312.8638916015625, "learning_rate": 1.97672566170263e-06, "loss": 58.125, "step": 10221 }, { "epoch": 0.09676167397128009, "grad_norm": 322.19110107421875, "learning_rate": 1.976719085259271e-06, "loss": 21.375, "step": 10222 }, { "epoch": 0.09677113999299515, "grad_norm": 285.6373291015625, "learning_rate": 1.97671250789786e-06, "loss": 18.0781, "step": 10223 }, { "epoch": 0.0967806060147102, "grad_norm": 3.025853395462036, "learning_rate": 1.9767059296184025e-06, "loss": 0.8579, "step": 10224 }, { "epoch": 0.09679007203642526, "grad_norm": 302.6317443847656, "learning_rate": 1.9766993504209047e-06, "loss": 25.9531, "step": 10225 }, { "epoch": 0.0967995380581403, "grad_norm": 735.88916015625, "learning_rate": 1.9766927703053735e-06, "loss": 51.5938, "step": 10226 }, { "epoch": 0.09680900407985536, "grad_norm": 315.39599609375, "learning_rate": 1.9766861892718144e-06, "loss": 21.2109, "step": 10227 }, { "epoch": 0.09681847010157041, "grad_norm": 284.0198059082031, "learning_rate": 1.9766796073202342e-06, "loss": 24.7891, "step": 10228 }, { "epoch": 0.09682793612328547, "grad_norm": 455.1190185546875, "learning_rate": 1.976673024450638e-06, "loss": 27.9688, "step": 10229 }, { "epoch": 0.09683740214500051, "grad_norm": 635.066650390625, "learning_rate": 1.9766664406630335e-06, "loss": 33.0547, "step": 10230 }, { "epoch": 0.09684686816671557, "grad_norm": 3.0402534008026123, "learning_rate": 1.9766598559574253e-06, "loss": 0.7998, "step": 10231 }, { "epoch": 0.09685633418843063, "grad_norm": 720.6796264648438, "learning_rate": 1.9766532703338214e-06, "loss": 38.9844, "step": 10232 }, { "epoch": 0.09686580021014568, "grad_norm": 308.8890075683594, "learning_rate": 1.976646683792226e-06, "loss": 23.0156, "step": 10233 }, { "epoch": 0.09687526623186074, "grad_norm": 295.5926513671875, "learning_rate": 1.976640096332647e-06, "loss": 26.4688, "step": 10234 }, { "epoch": 0.09688473225357579, "grad_norm": 264.51055908203125, "learning_rate": 1.976633507955089e-06, "loss": 22.0781, "step": 10235 }, { "epoch": 0.09689419827529085, "grad_norm": 253.84999084472656, "learning_rate": 1.9766269186595594e-06, "loss": 21.9453, "step": 10236 }, { "epoch": 0.09690366429700589, "grad_norm": 609.7420654296875, "learning_rate": 1.976620328446064e-06, "loss": 29.0781, "step": 10237 }, { "epoch": 0.09691313031872095, "grad_norm": 299.04571533203125, "learning_rate": 1.9766137373146092e-06, "loss": 20.3906, "step": 10238 }, { "epoch": 0.096922596340436, "grad_norm": 671.076416015625, "learning_rate": 1.976607145265201e-06, "loss": 30.8906, "step": 10239 }, { "epoch": 0.09693206236215106, "grad_norm": 485.3672790527344, "learning_rate": 1.9766005522978453e-06, "loss": 44.7812, "step": 10240 }, { "epoch": 0.09694152838386612, "grad_norm": 179.59483337402344, "learning_rate": 1.9765939584125493e-06, "loss": 25.2266, "step": 10241 }, { "epoch": 0.09695099440558116, "grad_norm": 456.4469299316406, "learning_rate": 1.976587363609318e-06, "loss": 46.5469, "step": 10242 }, { "epoch": 0.09696046042729622, "grad_norm": 442.0279541015625, "learning_rate": 1.976580767888158e-06, "loss": 26.0078, "step": 10243 }, { "epoch": 0.09696992644901127, "grad_norm": 351.6238098144531, "learning_rate": 1.976574171249076e-06, "loss": 11.3984, "step": 10244 }, { "epoch": 0.09697939247072633, "grad_norm": 670.4993286132812, "learning_rate": 1.9765675736920774e-06, "loss": 47.8672, "step": 10245 }, { "epoch": 0.09698885849244138, "grad_norm": 361.2619323730469, "learning_rate": 1.9765609752171693e-06, "loss": 40.8984, "step": 10246 }, { "epoch": 0.09699832451415644, "grad_norm": 438.58123779296875, "learning_rate": 1.976554375824357e-06, "loss": 43.1797, "step": 10247 }, { "epoch": 0.09700779053587148, "grad_norm": 219.15370178222656, "learning_rate": 1.9765477755136474e-06, "loss": 24.2656, "step": 10248 }, { "epoch": 0.09701725655758654, "grad_norm": 1067.553466796875, "learning_rate": 1.976541174285046e-06, "loss": 42.8516, "step": 10249 }, { "epoch": 0.0970267225793016, "grad_norm": 360.8633117675781, "learning_rate": 1.97653457213856e-06, "loss": 26.3047, "step": 10250 }, { "epoch": 0.09703618860101665, "grad_norm": 1043.8182373046875, "learning_rate": 1.9765279690741943e-06, "loss": 61.0781, "step": 10251 }, { "epoch": 0.09704565462273171, "grad_norm": 427.0685729980469, "learning_rate": 1.9765213650919564e-06, "loss": 48.4844, "step": 10252 }, { "epoch": 0.09705512064444675, "grad_norm": 155.36795043945312, "learning_rate": 1.976514760191852e-06, "loss": 17.9297, "step": 10253 }, { "epoch": 0.09706458666616181, "grad_norm": 284.4605407714844, "learning_rate": 1.976508154373887e-06, "loss": 24.7031, "step": 10254 }, { "epoch": 0.09707405268787686, "grad_norm": 490.9344177246094, "learning_rate": 1.976501547638068e-06, "loss": 27.1719, "step": 10255 }, { "epoch": 0.09708351870959192, "grad_norm": 225.09307861328125, "learning_rate": 1.976494939984401e-06, "loss": 24.25, "step": 10256 }, { "epoch": 0.09709298473130698, "grad_norm": 352.8570861816406, "learning_rate": 1.9764883314128923e-06, "loss": 33.9688, "step": 10257 }, { "epoch": 0.09710245075302203, "grad_norm": 556.2406005859375, "learning_rate": 1.976481721923548e-06, "loss": 42.2812, "step": 10258 }, { "epoch": 0.09711191677473709, "grad_norm": 186.9109344482422, "learning_rate": 1.9764751115163747e-06, "loss": 23.9531, "step": 10259 }, { "epoch": 0.09712138279645213, "grad_norm": 997.3425903320312, "learning_rate": 1.9764685001913775e-06, "loss": 18.3984, "step": 10260 }, { "epoch": 0.09713084881816719, "grad_norm": 274.87506103515625, "learning_rate": 1.9764618879485642e-06, "loss": 19.1641, "step": 10261 }, { "epoch": 0.09714031483988224, "grad_norm": 227.7298126220703, "learning_rate": 1.9764552747879403e-06, "loss": 21.5, "step": 10262 }, { "epoch": 0.0971497808615973, "grad_norm": 393.2655029296875, "learning_rate": 1.976448660709512e-06, "loss": 41.1875, "step": 10263 }, { "epoch": 0.09715924688331234, "grad_norm": 454.8700256347656, "learning_rate": 1.976442045713285e-06, "loss": 28.918, "step": 10264 }, { "epoch": 0.0971687129050274, "grad_norm": 342.0788269042969, "learning_rate": 1.976435429799266e-06, "loss": 29.3359, "step": 10265 }, { "epoch": 0.09717817892674246, "grad_norm": 847.6720581054688, "learning_rate": 1.9764288129674617e-06, "loss": 33.3438, "step": 10266 }, { "epoch": 0.09718764494845751, "grad_norm": 2.8608405590057373, "learning_rate": 1.976422195217877e-06, "loss": 0.8462, "step": 10267 }, { "epoch": 0.09719711097017257, "grad_norm": 207.92356872558594, "learning_rate": 1.9764155765505198e-06, "loss": 19.2656, "step": 10268 }, { "epoch": 0.09720657699188762, "grad_norm": 577.9781494140625, "learning_rate": 1.9764089569653953e-06, "loss": 43.6094, "step": 10269 }, { "epoch": 0.09721604301360268, "grad_norm": 288.1922912597656, "learning_rate": 1.97640233646251e-06, "loss": 20.375, "step": 10270 }, { "epoch": 0.09722550903531772, "grad_norm": 547.9834594726562, "learning_rate": 1.9763957150418697e-06, "loss": 50.0234, "step": 10271 }, { "epoch": 0.09723497505703278, "grad_norm": 281.6842956542969, "learning_rate": 1.976389092703481e-06, "loss": 22.1875, "step": 10272 }, { "epoch": 0.09724444107874783, "grad_norm": 1090.2130126953125, "learning_rate": 1.97638246944735e-06, "loss": 58.9062, "step": 10273 }, { "epoch": 0.09725390710046289, "grad_norm": 534.2918701171875, "learning_rate": 1.976375845273483e-06, "loss": 51.6406, "step": 10274 }, { "epoch": 0.09726337312217795, "grad_norm": 450.3020324707031, "learning_rate": 1.9763692201818868e-06, "loss": 31.1875, "step": 10275 }, { "epoch": 0.097272839143893, "grad_norm": 375.4912109375, "learning_rate": 1.9763625941725667e-06, "loss": 10.3828, "step": 10276 }, { "epoch": 0.09728230516560805, "grad_norm": 418.9878845214844, "learning_rate": 1.976355967245529e-06, "loss": 20.0391, "step": 10277 }, { "epoch": 0.0972917711873231, "grad_norm": 980.3253173828125, "learning_rate": 1.9763493394007804e-06, "loss": 62.9219, "step": 10278 }, { "epoch": 0.09730123720903816, "grad_norm": 446.166259765625, "learning_rate": 1.976342710638327e-06, "loss": 18.5195, "step": 10279 }, { "epoch": 0.0973107032307532, "grad_norm": 375.0148010253906, "learning_rate": 1.9763360809581747e-06, "loss": 21.0938, "step": 10280 }, { "epoch": 0.09732016925246827, "grad_norm": 429.34149169921875, "learning_rate": 1.9763294503603303e-06, "loss": 41.4062, "step": 10281 }, { "epoch": 0.09732963527418331, "grad_norm": 176.91094970703125, "learning_rate": 1.9763228188447998e-06, "loss": 18.8047, "step": 10282 }, { "epoch": 0.09733910129589837, "grad_norm": 297.7659912109375, "learning_rate": 1.976316186411589e-06, "loss": 22.5859, "step": 10283 }, { "epoch": 0.09734856731761343, "grad_norm": 1132.12060546875, "learning_rate": 1.9763095530607046e-06, "loss": 53.25, "step": 10284 }, { "epoch": 0.09735803333932848, "grad_norm": 677.9464111328125, "learning_rate": 1.976302918792153e-06, "loss": 47.8516, "step": 10285 }, { "epoch": 0.09736749936104354, "grad_norm": 191.21070861816406, "learning_rate": 1.97629628360594e-06, "loss": 18.6758, "step": 10286 }, { "epoch": 0.09737696538275858, "grad_norm": 823.8433837890625, "learning_rate": 1.976289647502072e-06, "loss": 38.0469, "step": 10287 }, { "epoch": 0.09738643140447364, "grad_norm": 199.12391662597656, "learning_rate": 1.976283010480555e-06, "loss": 24.7734, "step": 10288 }, { "epoch": 0.09739589742618869, "grad_norm": 239.36695861816406, "learning_rate": 1.976276372541396e-06, "loss": 29.3906, "step": 10289 }, { "epoch": 0.09740536344790375, "grad_norm": 464.7518615722656, "learning_rate": 1.9762697336846002e-06, "loss": 49.668, "step": 10290 }, { "epoch": 0.0974148294696188, "grad_norm": 304.86895751953125, "learning_rate": 1.976263093910175e-06, "loss": 19.7188, "step": 10291 }, { "epoch": 0.09742429549133386, "grad_norm": 578.18603515625, "learning_rate": 1.9762564532181253e-06, "loss": 19.2422, "step": 10292 }, { "epoch": 0.09743376151304892, "grad_norm": 548.3297119140625, "learning_rate": 1.9762498116084583e-06, "loss": 47.8906, "step": 10293 }, { "epoch": 0.09744322753476396, "grad_norm": 195.54144287109375, "learning_rate": 1.97624316908118e-06, "loss": 21.9844, "step": 10294 }, { "epoch": 0.09745269355647902, "grad_norm": 244.40379333496094, "learning_rate": 1.9762365256362967e-06, "loss": 14.1523, "step": 10295 }, { "epoch": 0.09746215957819407, "grad_norm": 417.9362487792969, "learning_rate": 1.976229881273815e-06, "loss": 62.0469, "step": 10296 }, { "epoch": 0.09747162559990913, "grad_norm": 368.09381103515625, "learning_rate": 1.9762232359937403e-06, "loss": 36.0977, "step": 10297 }, { "epoch": 0.09748109162162417, "grad_norm": 270.1180419921875, "learning_rate": 1.976216589796079e-06, "loss": 14.8477, "step": 10298 }, { "epoch": 0.09749055764333923, "grad_norm": 175.67605590820312, "learning_rate": 1.9762099426808376e-06, "loss": 23.8281, "step": 10299 }, { "epoch": 0.0975000236650543, "grad_norm": 508.4403381347656, "learning_rate": 1.9762032946480227e-06, "loss": 31.3438, "step": 10300 }, { "epoch": 0.09750948968676934, "grad_norm": 207.54005432128906, "learning_rate": 1.97619664569764e-06, "loss": 21.4219, "step": 10301 }, { "epoch": 0.0975189557084844, "grad_norm": 863.822998046875, "learning_rate": 1.9761899958296964e-06, "loss": 10.3906, "step": 10302 }, { "epoch": 0.09752842173019945, "grad_norm": 434.83148193359375, "learning_rate": 1.976183345044197e-06, "loss": 22.3906, "step": 10303 }, { "epoch": 0.0975378877519145, "grad_norm": 1844.4630126953125, "learning_rate": 1.9761766933411496e-06, "loss": 26.9414, "step": 10304 }, { "epoch": 0.09754735377362955, "grad_norm": 1036.388916015625, "learning_rate": 1.976170040720559e-06, "loss": 71.8125, "step": 10305 }, { "epoch": 0.09755681979534461, "grad_norm": 3.502329111099243, "learning_rate": 1.976163387182432e-06, "loss": 0.8857, "step": 10306 }, { "epoch": 0.09756628581705966, "grad_norm": 207.48321533203125, "learning_rate": 1.976156732726775e-06, "loss": 19.8438, "step": 10307 }, { "epoch": 0.09757575183877472, "grad_norm": 547.0567016601562, "learning_rate": 1.9761500773535946e-06, "loss": 40.5, "step": 10308 }, { "epoch": 0.09758521786048978, "grad_norm": 456.09967041015625, "learning_rate": 1.9761434210628963e-06, "loss": 21.8828, "step": 10309 }, { "epoch": 0.09759468388220482, "grad_norm": 709.7697143554688, "learning_rate": 1.976136763854687e-06, "loss": 62.6875, "step": 10310 }, { "epoch": 0.09760414990391988, "grad_norm": 300.9504699707031, "learning_rate": 1.976130105728972e-06, "loss": 23.2383, "step": 10311 }, { "epoch": 0.09761361592563493, "grad_norm": 534.18212890625, "learning_rate": 1.976123446685759e-06, "loss": 46.7344, "step": 10312 }, { "epoch": 0.09762308194734999, "grad_norm": 442.36505126953125, "learning_rate": 1.9761167867250527e-06, "loss": 46.3281, "step": 10313 }, { "epoch": 0.09763254796906504, "grad_norm": 3.2587411403656006, "learning_rate": 1.9761101258468604e-06, "loss": 0.9272, "step": 10314 }, { "epoch": 0.0976420139907801, "grad_norm": 414.86376953125, "learning_rate": 1.976103464051188e-06, "loss": 17.1953, "step": 10315 }, { "epoch": 0.09765148001249514, "grad_norm": 221.00144958496094, "learning_rate": 1.976096801338042e-06, "loss": 25.7188, "step": 10316 }, { "epoch": 0.0976609460342102, "grad_norm": 452.2099914550781, "learning_rate": 1.9760901377074285e-06, "loss": 16.5, "step": 10317 }, { "epoch": 0.09767041205592526, "grad_norm": 457.12786865234375, "learning_rate": 1.9760834731593537e-06, "loss": 13.4375, "step": 10318 }, { "epoch": 0.09767987807764031, "grad_norm": 502.55218505859375, "learning_rate": 1.976076807693824e-06, "loss": 46.2188, "step": 10319 }, { "epoch": 0.09768934409935537, "grad_norm": 1163.345458984375, "learning_rate": 1.976070141310846e-06, "loss": 40.3672, "step": 10320 }, { "epoch": 0.09769881012107041, "grad_norm": 472.6195983886719, "learning_rate": 1.976063474010425e-06, "loss": 25.2188, "step": 10321 }, { "epoch": 0.09770827614278547, "grad_norm": 3.055809736251831, "learning_rate": 1.976056805792568e-06, "loss": 0.9045, "step": 10322 }, { "epoch": 0.09771774216450052, "grad_norm": 346.5358581542969, "learning_rate": 1.976050136657281e-06, "loss": 23.0781, "step": 10323 }, { "epoch": 0.09772720818621558, "grad_norm": 332.6968688964844, "learning_rate": 1.9760434666045707e-06, "loss": 23.7266, "step": 10324 }, { "epoch": 0.09773667420793063, "grad_norm": 471.40740966796875, "learning_rate": 1.9760367956344425e-06, "loss": 60.1875, "step": 10325 }, { "epoch": 0.09774614022964569, "grad_norm": 502.49267578125, "learning_rate": 1.9760301237469034e-06, "loss": 46.4688, "step": 10326 }, { "epoch": 0.09775560625136075, "grad_norm": 189.1688690185547, "learning_rate": 1.97602345094196e-06, "loss": 18.1641, "step": 10327 }, { "epoch": 0.09776507227307579, "grad_norm": 711.5901489257812, "learning_rate": 1.9760167772196174e-06, "loss": 40.9844, "step": 10328 }, { "epoch": 0.09777453829479085, "grad_norm": 505.90411376953125, "learning_rate": 1.976010102579883e-06, "loss": 44.2656, "step": 10329 }, { "epoch": 0.0977840043165059, "grad_norm": 157.70034790039062, "learning_rate": 1.976003427022762e-06, "loss": 16.5586, "step": 10330 }, { "epoch": 0.09779347033822096, "grad_norm": 310.81268310546875, "learning_rate": 1.9759967505482616e-06, "loss": 24.7188, "step": 10331 }, { "epoch": 0.097802936359936, "grad_norm": 372.4911804199219, "learning_rate": 1.975990073156388e-06, "loss": 40.6406, "step": 10332 }, { "epoch": 0.09781240238165106, "grad_norm": 384.564208984375, "learning_rate": 1.9759833948471468e-06, "loss": 19.543, "step": 10333 }, { "epoch": 0.09782186840336611, "grad_norm": 277.5809631347656, "learning_rate": 1.975976715620545e-06, "loss": 30.6172, "step": 10334 }, { "epoch": 0.09783133442508117, "grad_norm": 2.8912665843963623, "learning_rate": 1.9759700354765886e-06, "loss": 0.855, "step": 10335 }, { "epoch": 0.09784080044679623, "grad_norm": 955.6618041992188, "learning_rate": 1.975963354415284e-06, "loss": 59.0312, "step": 10336 }, { "epoch": 0.09785026646851128, "grad_norm": 395.9432678222656, "learning_rate": 1.9759566724366363e-06, "loss": 33.8984, "step": 10337 }, { "epoch": 0.09785973249022634, "grad_norm": 367.2672119140625, "learning_rate": 1.975949989540654e-06, "loss": 34.5625, "step": 10338 }, { "epoch": 0.09786919851194138, "grad_norm": 535.1878051757812, "learning_rate": 1.975943305727342e-06, "loss": 36.9688, "step": 10339 }, { "epoch": 0.09787866453365644, "grad_norm": 345.7509460449219, "learning_rate": 1.9759366209967065e-06, "loss": 19.0469, "step": 10340 }, { "epoch": 0.09788813055537149, "grad_norm": 283.3561706542969, "learning_rate": 1.975929935348754e-06, "loss": 26.3008, "step": 10341 }, { "epoch": 0.09789759657708655, "grad_norm": 365.5733642578125, "learning_rate": 1.975923248783491e-06, "loss": 51.1406, "step": 10342 }, { "epoch": 0.09790706259880161, "grad_norm": 788.6690673828125, "learning_rate": 1.9759165613009235e-06, "loss": 54.4141, "step": 10343 }, { "epoch": 0.09791652862051665, "grad_norm": 494.7263488769531, "learning_rate": 1.9759098729010582e-06, "loss": 54.2188, "step": 10344 }, { "epoch": 0.09792599464223171, "grad_norm": 1290.26904296875, "learning_rate": 1.9759031835839007e-06, "loss": 41.4219, "step": 10345 }, { "epoch": 0.09793546066394676, "grad_norm": 1141.926513671875, "learning_rate": 1.975896493349458e-06, "loss": 37.0156, "step": 10346 }, { "epoch": 0.09794492668566182, "grad_norm": 498.5381164550781, "learning_rate": 1.975889802197736e-06, "loss": 50.0781, "step": 10347 }, { "epoch": 0.09795439270737687, "grad_norm": 424.741943359375, "learning_rate": 1.9758831101287413e-06, "loss": 48.7969, "step": 10348 }, { "epoch": 0.09796385872909193, "grad_norm": 503.9120178222656, "learning_rate": 1.9758764171424797e-06, "loss": 34.6094, "step": 10349 }, { "epoch": 0.09797332475080697, "grad_norm": 591.324462890625, "learning_rate": 1.9758697232389577e-06, "loss": 15.7344, "step": 10350 }, { "epoch": 0.09798279077252203, "grad_norm": 630.6304321289062, "learning_rate": 1.975863028418182e-06, "loss": 49.3125, "step": 10351 }, { "epoch": 0.09799225679423709, "grad_norm": 793.0771484375, "learning_rate": 1.975856332680158e-06, "loss": 43.2227, "step": 10352 }, { "epoch": 0.09800172281595214, "grad_norm": 189.85208129882812, "learning_rate": 1.975849636024893e-06, "loss": 17.3398, "step": 10353 }, { "epoch": 0.0980111888376672, "grad_norm": 431.83062744140625, "learning_rate": 1.9758429384523926e-06, "loss": 27.9531, "step": 10354 }, { "epoch": 0.09802065485938224, "grad_norm": 495.9781494140625, "learning_rate": 1.975836239962663e-06, "loss": 24.3281, "step": 10355 }, { "epoch": 0.0980301208810973, "grad_norm": 3.032191276550293, "learning_rate": 1.9758295405557114e-06, "loss": 0.9224, "step": 10356 }, { "epoch": 0.09803958690281235, "grad_norm": 1230.7496337890625, "learning_rate": 1.9758228402315433e-06, "loss": 31.8516, "step": 10357 }, { "epoch": 0.09804905292452741, "grad_norm": 3.1814911365509033, "learning_rate": 1.9758161389901655e-06, "loss": 0.9316, "step": 10358 }, { "epoch": 0.09805851894624246, "grad_norm": 272.021240234375, "learning_rate": 1.9758094368315836e-06, "loss": 19.8516, "step": 10359 }, { "epoch": 0.09806798496795752, "grad_norm": 1309.969970703125, "learning_rate": 1.9758027337558044e-06, "loss": 63.3438, "step": 10360 }, { "epoch": 0.09807745098967258, "grad_norm": 194.75299072265625, "learning_rate": 1.975796029762834e-06, "loss": 15.4883, "step": 10361 }, { "epoch": 0.09808691701138762, "grad_norm": 339.1365051269531, "learning_rate": 1.9757893248526793e-06, "loss": 22.418, "step": 10362 }, { "epoch": 0.09809638303310268, "grad_norm": 221.52230834960938, "learning_rate": 1.9757826190253453e-06, "loss": 23.4531, "step": 10363 }, { "epoch": 0.09810584905481773, "grad_norm": 465.6391296386719, "learning_rate": 1.9757759122808398e-06, "loss": 36.8906, "step": 10364 }, { "epoch": 0.09811531507653279, "grad_norm": 2.949362277984619, "learning_rate": 1.9757692046191683e-06, "loss": 1.0225, "step": 10365 }, { "epoch": 0.09812478109824783, "grad_norm": 166.07484436035156, "learning_rate": 1.975762496040337e-06, "loss": 16.6562, "step": 10366 }, { "epoch": 0.0981342471199629, "grad_norm": 445.81207275390625, "learning_rate": 1.9757557865443526e-06, "loss": 24.7969, "step": 10367 }, { "epoch": 0.09814371314167794, "grad_norm": 358.4668884277344, "learning_rate": 1.975749076131221e-06, "loss": 22.8984, "step": 10368 }, { "epoch": 0.098153179163393, "grad_norm": 499.86639404296875, "learning_rate": 1.975742364800949e-06, "loss": 24.7188, "step": 10369 }, { "epoch": 0.09816264518510806, "grad_norm": 178.606689453125, "learning_rate": 1.9757356525535425e-06, "loss": 22.9141, "step": 10370 }, { "epoch": 0.0981721112068231, "grad_norm": 445.4112243652344, "learning_rate": 1.9757289393890084e-06, "loss": 22.3359, "step": 10371 }, { "epoch": 0.09818157722853817, "grad_norm": 428.4311828613281, "learning_rate": 1.975722225307352e-06, "loss": 45.9375, "step": 10372 }, { "epoch": 0.09819104325025321, "grad_norm": 455.4181823730469, "learning_rate": 1.9757155103085806e-06, "loss": 44.1094, "step": 10373 }, { "epoch": 0.09820050927196827, "grad_norm": 399.91021728515625, "learning_rate": 1.9757087943927e-06, "loss": 47.2734, "step": 10374 }, { "epoch": 0.09820997529368332, "grad_norm": 361.8592834472656, "learning_rate": 1.975702077559716e-06, "loss": 30.4375, "step": 10375 }, { "epoch": 0.09821944131539838, "grad_norm": 256.8457946777344, "learning_rate": 1.9756953598096362e-06, "loss": 17.9922, "step": 10376 }, { "epoch": 0.09822890733711342, "grad_norm": 413.14788818359375, "learning_rate": 1.975688641142466e-06, "loss": 45.7422, "step": 10377 }, { "epoch": 0.09823837335882848, "grad_norm": 284.602783203125, "learning_rate": 1.975681921558212e-06, "loss": 12.6172, "step": 10378 }, { "epoch": 0.09824783938054354, "grad_norm": 4.253300666809082, "learning_rate": 1.9756752010568805e-06, "loss": 0.896, "step": 10379 }, { "epoch": 0.09825730540225859, "grad_norm": 375.0534362792969, "learning_rate": 1.975668479638478e-06, "loss": 42.4531, "step": 10380 }, { "epoch": 0.09826677142397365, "grad_norm": 321.6159362792969, "learning_rate": 1.9756617573030104e-06, "loss": 42.3906, "step": 10381 }, { "epoch": 0.0982762374456887, "grad_norm": 237.150634765625, "learning_rate": 1.975655034050484e-06, "loss": 13.9609, "step": 10382 }, { "epoch": 0.09828570346740376, "grad_norm": 673.5106811523438, "learning_rate": 1.9756483098809058e-06, "loss": 25.1406, "step": 10383 }, { "epoch": 0.0982951694891188, "grad_norm": 729.9949951171875, "learning_rate": 1.9756415847942813e-06, "loss": 35.2891, "step": 10384 }, { "epoch": 0.09830463551083386, "grad_norm": 438.16729736328125, "learning_rate": 1.9756348587906174e-06, "loss": 30.3047, "step": 10385 }, { "epoch": 0.09831410153254892, "grad_norm": 315.14886474609375, "learning_rate": 1.97562813186992e-06, "loss": 21.3672, "step": 10386 }, { "epoch": 0.09832356755426397, "grad_norm": 437.479248046875, "learning_rate": 1.9756214040321956e-06, "loss": 9.4688, "step": 10387 }, { "epoch": 0.09833303357597903, "grad_norm": 324.9654541015625, "learning_rate": 1.975614675277451e-06, "loss": 38.9531, "step": 10388 }, { "epoch": 0.09834249959769407, "grad_norm": 222.1731414794922, "learning_rate": 1.9756079456056913e-06, "loss": 9.25, "step": 10389 }, { "epoch": 0.09835196561940913, "grad_norm": 2.940930128097534, "learning_rate": 1.975601215016924e-06, "loss": 0.9014, "step": 10390 }, { "epoch": 0.09836143164112418, "grad_norm": 579.0230712890625, "learning_rate": 1.975594483511155e-06, "loss": 21.2188, "step": 10391 }, { "epoch": 0.09837089766283924, "grad_norm": 2.8820202350616455, "learning_rate": 1.9755877510883908e-06, "loss": 0.8867, "step": 10392 }, { "epoch": 0.09838036368455429, "grad_norm": 328.38739013671875, "learning_rate": 1.9755810177486376e-06, "loss": 22.0703, "step": 10393 }, { "epoch": 0.09838982970626935, "grad_norm": 362.91259765625, "learning_rate": 1.9755742834919014e-06, "loss": 38.6562, "step": 10394 }, { "epoch": 0.0983992957279844, "grad_norm": 1231.831787109375, "learning_rate": 1.975567548318189e-06, "loss": 35.8984, "step": 10395 }, { "epoch": 0.09840876174969945, "grad_norm": 228.1195526123047, "learning_rate": 1.9755608122275066e-06, "loss": 14.7656, "step": 10396 }, { "epoch": 0.09841822777141451, "grad_norm": 494.0980224609375, "learning_rate": 1.9755540752198607e-06, "loss": 21.9844, "step": 10397 }, { "epoch": 0.09842769379312956, "grad_norm": 162.33761596679688, "learning_rate": 1.975547337295257e-06, "loss": 19.5625, "step": 10398 }, { "epoch": 0.09843715981484462, "grad_norm": 408.5000915527344, "learning_rate": 1.9755405984537027e-06, "loss": 30.2031, "step": 10399 }, { "epoch": 0.09844662583655966, "grad_norm": 2.7992103099823, "learning_rate": 1.9755338586952033e-06, "loss": 0.9224, "step": 10400 }, { "epoch": 0.09845609185827472, "grad_norm": 288.8086242675781, "learning_rate": 1.975527118019766e-06, "loss": 27.9062, "step": 10401 }, { "epoch": 0.09846555787998977, "grad_norm": 221.4352569580078, "learning_rate": 1.975520376427396e-06, "loss": 35.1953, "step": 10402 }, { "epoch": 0.09847502390170483, "grad_norm": 490.8182678222656, "learning_rate": 1.975513633918101e-06, "loss": 42.9609, "step": 10403 }, { "epoch": 0.09848448992341989, "grad_norm": 423.80902099609375, "learning_rate": 1.975506890491886e-06, "loss": 17.6172, "step": 10404 }, { "epoch": 0.09849395594513494, "grad_norm": 231.14669799804688, "learning_rate": 1.9755001461487588e-06, "loss": 23.0859, "step": 10405 }, { "epoch": 0.09850342196685, "grad_norm": 628.6065063476562, "learning_rate": 1.975493400888724e-06, "loss": 25.6797, "step": 10406 }, { "epoch": 0.09851288798856504, "grad_norm": 305.96466064453125, "learning_rate": 1.97548665471179e-06, "loss": 16.8984, "step": 10407 }, { "epoch": 0.0985223540102801, "grad_norm": 351.3771667480469, "learning_rate": 1.975479907617961e-06, "loss": 28.7656, "step": 10408 }, { "epoch": 0.09853182003199515, "grad_norm": 294.8346252441406, "learning_rate": 1.975473159607245e-06, "loss": 32.3281, "step": 10409 }, { "epoch": 0.09854128605371021, "grad_norm": 386.4176025390625, "learning_rate": 1.975466410679647e-06, "loss": 17.4961, "step": 10410 }, { "epoch": 0.09855075207542525, "grad_norm": 484.5563049316406, "learning_rate": 1.9754596608351742e-06, "loss": 51.5156, "step": 10411 }, { "epoch": 0.09856021809714031, "grad_norm": 219.4167022705078, "learning_rate": 1.975452910073833e-06, "loss": 15.6094, "step": 10412 }, { "epoch": 0.09856968411885537, "grad_norm": 619.6824951171875, "learning_rate": 1.9754461583956297e-06, "loss": 41.2266, "step": 10413 }, { "epoch": 0.09857915014057042, "grad_norm": 634.3948364257812, "learning_rate": 1.97543940580057e-06, "loss": 24.2344, "step": 10414 }, { "epoch": 0.09858861616228548, "grad_norm": 261.8423767089844, "learning_rate": 1.9754326522886612e-06, "loss": 24.3906, "step": 10415 }, { "epoch": 0.09859808218400053, "grad_norm": 408.32220458984375, "learning_rate": 1.975425897859909e-06, "loss": 25.2578, "step": 10416 }, { "epoch": 0.09860754820571559, "grad_norm": 492.8872375488281, "learning_rate": 1.97541914251432e-06, "loss": 19.6484, "step": 10417 }, { "epoch": 0.09861701422743063, "grad_norm": 3.365710973739624, "learning_rate": 1.9754123862519e-06, "loss": 0.9604, "step": 10418 }, { "epoch": 0.09862648024914569, "grad_norm": 277.56634521484375, "learning_rate": 1.975405629072656e-06, "loss": 23.5469, "step": 10419 }, { "epoch": 0.09863594627086074, "grad_norm": 866.2472534179688, "learning_rate": 1.9753988709765944e-06, "loss": 38.1562, "step": 10420 }, { "epoch": 0.0986454122925758, "grad_norm": 404.5047607421875, "learning_rate": 1.975392111963721e-06, "loss": 22.0312, "step": 10421 }, { "epoch": 0.09865487831429086, "grad_norm": 498.59259033203125, "learning_rate": 1.975385352034043e-06, "loss": 19.375, "step": 10422 }, { "epoch": 0.0986643443360059, "grad_norm": 298.3697814941406, "learning_rate": 1.975378591187566e-06, "loss": 15.9297, "step": 10423 }, { "epoch": 0.09867381035772096, "grad_norm": 291.7547607421875, "learning_rate": 1.975371829424296e-06, "loss": 32.0547, "step": 10424 }, { "epoch": 0.09868327637943601, "grad_norm": 320.02288818359375, "learning_rate": 1.9753650667442407e-06, "loss": 17.2578, "step": 10425 }, { "epoch": 0.09869274240115107, "grad_norm": 548.949462890625, "learning_rate": 1.975358303147405e-06, "loss": 60.9844, "step": 10426 }, { "epoch": 0.09870220842286612, "grad_norm": 469.86529541015625, "learning_rate": 1.9753515386337966e-06, "loss": 22.0117, "step": 10427 }, { "epoch": 0.09871167444458118, "grad_norm": 462.38055419921875, "learning_rate": 1.975344773203421e-06, "loss": 47.7188, "step": 10428 }, { "epoch": 0.09872114046629622, "grad_norm": 307.0365905761719, "learning_rate": 1.9753380068562846e-06, "loss": 26.2812, "step": 10429 }, { "epoch": 0.09873060648801128, "grad_norm": 278.0803527832031, "learning_rate": 1.975331239592394e-06, "loss": 20.5938, "step": 10430 }, { "epoch": 0.09874007250972634, "grad_norm": 627.2938232421875, "learning_rate": 1.9753244714117557e-06, "loss": 13.2734, "step": 10431 }, { "epoch": 0.09874953853144139, "grad_norm": 358.4927062988281, "learning_rate": 1.975317702314376e-06, "loss": 21.8047, "step": 10432 }, { "epoch": 0.09875900455315645, "grad_norm": 647.2894897460938, "learning_rate": 1.9753109323002603e-06, "loss": 56.3125, "step": 10433 }, { "epoch": 0.0987684705748715, "grad_norm": 325.6732482910156, "learning_rate": 1.975304161369416e-06, "loss": 31.4531, "step": 10434 }, { "epoch": 0.09877793659658655, "grad_norm": 319.8504638671875, "learning_rate": 1.9752973895218495e-06, "loss": 27.2969, "step": 10435 }, { "epoch": 0.0987874026183016, "grad_norm": 286.9356384277344, "learning_rate": 1.975290616757567e-06, "loss": 22.6875, "step": 10436 }, { "epoch": 0.09879686864001666, "grad_norm": 940.7423706054688, "learning_rate": 1.9752838430765746e-06, "loss": 46.2109, "step": 10437 }, { "epoch": 0.09880633466173172, "grad_norm": 187.41110229492188, "learning_rate": 1.975277068478879e-06, "loss": 19.2578, "step": 10438 }, { "epoch": 0.09881580068344677, "grad_norm": 399.7290344238281, "learning_rate": 1.9752702929644865e-06, "loss": 14.6406, "step": 10439 }, { "epoch": 0.09882526670516183, "grad_norm": 416.6968688964844, "learning_rate": 1.975263516533403e-06, "loss": 20.3359, "step": 10440 }, { "epoch": 0.09883473272687687, "grad_norm": 225.5126953125, "learning_rate": 1.9752567391856356e-06, "loss": 10.5156, "step": 10441 }, { "epoch": 0.09884419874859193, "grad_norm": 138.52012634277344, "learning_rate": 1.97524996092119e-06, "loss": 20.3672, "step": 10442 }, { "epoch": 0.09885366477030698, "grad_norm": 426.450927734375, "learning_rate": 1.975243181740073e-06, "loss": 17.0547, "step": 10443 }, { "epoch": 0.09886313079202204, "grad_norm": 303.82513427734375, "learning_rate": 1.9752364016422906e-06, "loss": 32.5625, "step": 10444 }, { "epoch": 0.09887259681373708, "grad_norm": 612.205078125, "learning_rate": 1.9752296206278497e-06, "loss": 37.7188, "step": 10445 }, { "epoch": 0.09888206283545214, "grad_norm": 225.19180297851562, "learning_rate": 1.9752228386967564e-06, "loss": 19.8984, "step": 10446 }, { "epoch": 0.0988915288571672, "grad_norm": 195.0120086669922, "learning_rate": 1.9752160558490168e-06, "loss": 20.5312, "step": 10447 }, { "epoch": 0.09890099487888225, "grad_norm": 502.1038513183594, "learning_rate": 1.975209272084638e-06, "loss": 45.5, "step": 10448 }, { "epoch": 0.09891046090059731, "grad_norm": 179.06121826171875, "learning_rate": 1.9752024874036256e-06, "loss": 16.4844, "step": 10449 }, { "epoch": 0.09891992692231236, "grad_norm": 233.52622985839844, "learning_rate": 1.9751957018059863e-06, "loss": 8.5352, "step": 10450 }, { "epoch": 0.09892939294402742, "grad_norm": 295.1914978027344, "learning_rate": 1.975188915291727e-06, "loss": 11.5078, "step": 10451 }, { "epoch": 0.09893885896574246, "grad_norm": 808.3541259765625, "learning_rate": 1.975182127860853e-06, "loss": 15.918, "step": 10452 }, { "epoch": 0.09894832498745752, "grad_norm": 392.3846435546875, "learning_rate": 1.975175339513371e-06, "loss": 42.4531, "step": 10453 }, { "epoch": 0.09895779100917257, "grad_norm": 424.0802917480469, "learning_rate": 1.9751685502492884e-06, "loss": 25.4609, "step": 10454 }, { "epoch": 0.09896725703088763, "grad_norm": 222.34893798828125, "learning_rate": 1.9751617600686105e-06, "loss": 17.8906, "step": 10455 }, { "epoch": 0.09897672305260269, "grad_norm": 1006.8344116210938, "learning_rate": 1.975154968971344e-06, "loss": 32.3281, "step": 10456 }, { "epoch": 0.09898618907431773, "grad_norm": 201.7296142578125, "learning_rate": 1.9751481769574947e-06, "loss": 24.8594, "step": 10457 }, { "epoch": 0.0989956550960328, "grad_norm": 464.29888916015625, "learning_rate": 1.9751413840270705e-06, "loss": 35.4375, "step": 10458 }, { "epoch": 0.09900512111774784, "grad_norm": 579.51220703125, "learning_rate": 1.975134590180076e-06, "loss": 38.5156, "step": 10459 }, { "epoch": 0.0990145871394629, "grad_norm": 327.7087097167969, "learning_rate": 1.975127795416519e-06, "loss": 33.8438, "step": 10460 }, { "epoch": 0.09902405316117795, "grad_norm": 3.0270233154296875, "learning_rate": 1.975120999736405e-06, "loss": 0.8538, "step": 10461 }, { "epoch": 0.099033519182893, "grad_norm": 592.593505859375, "learning_rate": 1.975114203139741e-06, "loss": 41.1875, "step": 10462 }, { "epoch": 0.09904298520460805, "grad_norm": 253.55532836914062, "learning_rate": 1.9751074056265325e-06, "loss": 18.6875, "step": 10463 }, { "epoch": 0.09905245122632311, "grad_norm": 616.60986328125, "learning_rate": 1.9751006071967866e-06, "loss": 19.25, "step": 10464 }, { "epoch": 0.09906191724803817, "grad_norm": 528.144775390625, "learning_rate": 1.97509380785051e-06, "loss": 22.125, "step": 10465 }, { "epoch": 0.09907138326975322, "grad_norm": 1471.817138671875, "learning_rate": 1.9750870075877082e-06, "loss": 15.9805, "step": 10466 }, { "epoch": 0.09908084929146828, "grad_norm": 3.264573097229004, "learning_rate": 1.975080206408389e-06, "loss": 0.9453, "step": 10467 }, { "epoch": 0.09909031531318332, "grad_norm": 313.4655456542969, "learning_rate": 1.975073404312557e-06, "loss": 20.9727, "step": 10468 }, { "epoch": 0.09909978133489838, "grad_norm": 460.9433898925781, "learning_rate": 1.975066601300219e-06, "loss": 9.1973, "step": 10469 }, { "epoch": 0.09910924735661343, "grad_norm": 518.381591796875, "learning_rate": 1.9750597973713826e-06, "loss": 45.6562, "step": 10470 }, { "epoch": 0.09911871337832849, "grad_norm": 497.022216796875, "learning_rate": 1.9750529925260533e-06, "loss": 37.6016, "step": 10471 }, { "epoch": 0.09912817940004354, "grad_norm": 1456.469970703125, "learning_rate": 1.9750461867642378e-06, "loss": 33.7734, "step": 10472 }, { "epoch": 0.0991376454217586, "grad_norm": 770.9595336914062, "learning_rate": 1.975039380085942e-06, "loss": 39.6562, "step": 10473 }, { "epoch": 0.09914711144347366, "grad_norm": 1448.3214111328125, "learning_rate": 1.9750325724911725e-06, "loss": 24.2266, "step": 10474 }, { "epoch": 0.0991565774651887, "grad_norm": 259.16278076171875, "learning_rate": 1.9750257639799363e-06, "loss": 26.6406, "step": 10475 }, { "epoch": 0.09916604348690376, "grad_norm": 201.31863403320312, "learning_rate": 1.9750189545522385e-06, "loss": 27.9375, "step": 10476 }, { "epoch": 0.09917550950861881, "grad_norm": 207.7871551513672, "learning_rate": 1.9750121442080874e-06, "loss": 18.5781, "step": 10477 }, { "epoch": 0.09918497553033387, "grad_norm": 292.58953857421875, "learning_rate": 1.9750053329474873e-06, "loss": 11.6406, "step": 10478 }, { "epoch": 0.09919444155204891, "grad_norm": 308.9752502441406, "learning_rate": 1.974998520770446e-06, "loss": 12.4141, "step": 10479 }, { "epoch": 0.09920390757376397, "grad_norm": 494.34814453125, "learning_rate": 1.9749917076769697e-06, "loss": 25.6719, "step": 10480 }, { "epoch": 0.09921337359547903, "grad_norm": 363.6687927246094, "learning_rate": 1.974984893667064e-06, "loss": 14.4219, "step": 10481 }, { "epoch": 0.09922283961719408, "grad_norm": 1657.044921875, "learning_rate": 1.9749780787407367e-06, "loss": 53.3438, "step": 10482 }, { "epoch": 0.09923230563890914, "grad_norm": 824.8363647460938, "learning_rate": 1.9749712628979933e-06, "loss": 40.875, "step": 10483 }, { "epoch": 0.09924177166062419, "grad_norm": 982.764404296875, "learning_rate": 1.97496444613884e-06, "loss": 54.6641, "step": 10484 }, { "epoch": 0.09925123768233925, "grad_norm": 374.8672180175781, "learning_rate": 1.974957628463284e-06, "loss": 21.582, "step": 10485 }, { "epoch": 0.09926070370405429, "grad_norm": 264.6556396484375, "learning_rate": 1.974950809871331e-06, "loss": 12.0664, "step": 10486 }, { "epoch": 0.09927016972576935, "grad_norm": 225.75660705566406, "learning_rate": 1.9749439903629875e-06, "loss": 22.3672, "step": 10487 }, { "epoch": 0.0992796357474844, "grad_norm": 377.72064208984375, "learning_rate": 1.97493716993826e-06, "loss": 14.6406, "step": 10488 }, { "epoch": 0.09928910176919946, "grad_norm": 580.0833129882812, "learning_rate": 1.974930348597155e-06, "loss": 44.9062, "step": 10489 }, { "epoch": 0.09929856779091452, "grad_norm": 4.108983516693115, "learning_rate": 1.974923526339679e-06, "loss": 1.0034, "step": 10490 }, { "epoch": 0.09930803381262956, "grad_norm": 195.62484741210938, "learning_rate": 1.9749167031658385e-06, "loss": 33.1719, "step": 10491 }, { "epoch": 0.09931749983434462, "grad_norm": 174.94029235839844, "learning_rate": 1.97490987907564e-06, "loss": 20.9297, "step": 10492 }, { "epoch": 0.09932696585605967, "grad_norm": 457.2878112792969, "learning_rate": 1.9749030540690893e-06, "loss": 50.6406, "step": 10493 }, { "epoch": 0.09933643187777473, "grad_norm": 345.0350341796875, "learning_rate": 1.974896228146193e-06, "loss": 34.7969, "step": 10494 }, { "epoch": 0.09934589789948978, "grad_norm": 457.0674133300781, "learning_rate": 1.974889401306958e-06, "loss": 19.6406, "step": 10495 }, { "epoch": 0.09935536392120484, "grad_norm": 360.1155090332031, "learning_rate": 1.9748825735513898e-06, "loss": 45.5703, "step": 10496 }, { "epoch": 0.09936482994291988, "grad_norm": 286.20135498046875, "learning_rate": 1.9748757448794955e-06, "loss": 35.5312, "step": 10497 }, { "epoch": 0.09937429596463494, "grad_norm": 598.2994995117188, "learning_rate": 1.9748689152912823e-06, "loss": 66.7188, "step": 10498 }, { "epoch": 0.09938376198635, "grad_norm": 274.84857177734375, "learning_rate": 1.9748620847867547e-06, "loss": 22.8672, "step": 10499 }, { "epoch": 0.09939322800806505, "grad_norm": 282.0081787109375, "learning_rate": 1.974855253365921e-06, "loss": 16.25, "step": 10500 }, { "epoch": 0.09940269402978011, "grad_norm": 536.1817626953125, "learning_rate": 1.9748484210287863e-06, "loss": 31.6484, "step": 10501 }, { "epoch": 0.09941216005149515, "grad_norm": 358.7933654785156, "learning_rate": 1.9748415877753573e-06, "loss": 29.0938, "step": 10502 }, { "epoch": 0.09942162607321021, "grad_norm": 275.96142578125, "learning_rate": 1.9748347536056407e-06, "loss": 26.5312, "step": 10503 }, { "epoch": 0.09943109209492526, "grad_norm": 282.97637939453125, "learning_rate": 1.9748279185196433e-06, "loss": 23.5938, "step": 10504 }, { "epoch": 0.09944055811664032, "grad_norm": 225.2004852294922, "learning_rate": 1.9748210825173704e-06, "loss": 19.3125, "step": 10505 }, { "epoch": 0.09945002413835537, "grad_norm": 280.664794921875, "learning_rate": 1.97481424559883e-06, "loss": 40.7969, "step": 10506 }, { "epoch": 0.09945949016007043, "grad_norm": 2.9684271812438965, "learning_rate": 1.974807407764027e-06, "loss": 0.9756, "step": 10507 }, { "epoch": 0.09946895618178549, "grad_norm": 635.0742797851562, "learning_rate": 1.9748005690129685e-06, "loss": 33.0273, "step": 10508 }, { "epoch": 0.09947842220350053, "grad_norm": 484.6530456542969, "learning_rate": 1.974793729345661e-06, "loss": 23.6719, "step": 10509 }, { "epoch": 0.09948788822521559, "grad_norm": 311.8675231933594, "learning_rate": 1.9747868887621107e-06, "loss": 28.1094, "step": 10510 }, { "epoch": 0.09949735424693064, "grad_norm": 327.2730407714844, "learning_rate": 1.9747800472623243e-06, "loss": 34.2031, "step": 10511 }, { "epoch": 0.0995068202686457, "grad_norm": 3.4789772033691406, "learning_rate": 1.974773204846308e-06, "loss": 0.9238, "step": 10512 }, { "epoch": 0.09951628629036074, "grad_norm": 257.3350830078125, "learning_rate": 1.974766361514068e-06, "loss": 18.5625, "step": 10513 }, { "epoch": 0.0995257523120758, "grad_norm": 556.5375366210938, "learning_rate": 1.9747595172656115e-06, "loss": 23.3359, "step": 10514 }, { "epoch": 0.09953521833379085, "grad_norm": 574.2821044921875, "learning_rate": 1.9747526721009443e-06, "loss": 58.2656, "step": 10515 }, { "epoch": 0.09954468435550591, "grad_norm": 457.4751892089844, "learning_rate": 1.974745826020073e-06, "loss": 57.4844, "step": 10516 }, { "epoch": 0.09955415037722097, "grad_norm": 587.476318359375, "learning_rate": 1.974738979023004e-06, "loss": 39.3906, "step": 10517 }, { "epoch": 0.09956361639893602, "grad_norm": 280.83831787109375, "learning_rate": 1.974732131109744e-06, "loss": 24.7734, "step": 10518 }, { "epoch": 0.09957308242065108, "grad_norm": 280.8338928222656, "learning_rate": 1.974725282280299e-06, "loss": 25.7031, "step": 10519 }, { "epoch": 0.09958254844236612, "grad_norm": 324.6818542480469, "learning_rate": 1.9747184325346754e-06, "loss": 20.9219, "step": 10520 }, { "epoch": 0.09959201446408118, "grad_norm": 265.00360107421875, "learning_rate": 1.97471158187288e-06, "loss": 21.2656, "step": 10521 }, { "epoch": 0.09960148048579623, "grad_norm": 446.798828125, "learning_rate": 1.9747047302949194e-06, "loss": 35.1719, "step": 10522 }, { "epoch": 0.09961094650751129, "grad_norm": 335.78973388671875, "learning_rate": 1.9746978778007995e-06, "loss": 10.3828, "step": 10523 }, { "epoch": 0.09962041252922635, "grad_norm": 708.5814208984375, "learning_rate": 1.9746910243905275e-06, "loss": 55.9844, "step": 10524 }, { "epoch": 0.0996298785509414, "grad_norm": 308.51129150390625, "learning_rate": 1.9746841700641084e-06, "loss": 33.1562, "step": 10525 }, { "epoch": 0.09963934457265645, "grad_norm": 1825.2071533203125, "learning_rate": 1.9746773148215504e-06, "loss": 40.4766, "step": 10526 }, { "epoch": 0.0996488105943715, "grad_norm": 3.9277267456054688, "learning_rate": 1.974670458662859e-06, "loss": 0.9819, "step": 10527 }, { "epoch": 0.09965827661608656, "grad_norm": 717.6543579101562, "learning_rate": 1.9746636015880403e-06, "loss": 35.0859, "step": 10528 }, { "epoch": 0.0996677426378016, "grad_norm": 376.1217956542969, "learning_rate": 1.9746567435971017e-06, "loss": 31.4258, "step": 10529 }, { "epoch": 0.09967720865951667, "grad_norm": 585.8643188476562, "learning_rate": 1.974649884690049e-06, "loss": 53.5, "step": 10530 }, { "epoch": 0.09968667468123171, "grad_norm": 663.4803466796875, "learning_rate": 1.974643024866889e-06, "loss": 95.0781, "step": 10531 }, { "epoch": 0.09969614070294677, "grad_norm": 226.31419372558594, "learning_rate": 1.9746361641276275e-06, "loss": 16.9453, "step": 10532 }, { "epoch": 0.09970560672466183, "grad_norm": 456.3667907714844, "learning_rate": 1.9746293024722716e-06, "loss": 48.4766, "step": 10533 }, { "epoch": 0.09971507274637688, "grad_norm": 313.5391845703125, "learning_rate": 1.974622439900828e-06, "loss": 22.9375, "step": 10534 }, { "epoch": 0.09972453876809194, "grad_norm": 991.4757690429688, "learning_rate": 1.9746155764133024e-06, "loss": 46.3516, "step": 10535 }, { "epoch": 0.09973400478980698, "grad_norm": 442.83349609375, "learning_rate": 1.9746087120097014e-06, "loss": 30.8281, "step": 10536 }, { "epoch": 0.09974347081152204, "grad_norm": 343.6889343261719, "learning_rate": 1.974601846690032e-06, "loss": 22.0625, "step": 10537 }, { "epoch": 0.09975293683323709, "grad_norm": 444.3009338378906, "learning_rate": 1.9745949804543e-06, "loss": 24.0703, "step": 10538 }, { "epoch": 0.09976240285495215, "grad_norm": 223.16622924804688, "learning_rate": 1.974588113302512e-06, "loss": 22.3047, "step": 10539 }, { "epoch": 0.0997718688766672, "grad_norm": 255.51681518554688, "learning_rate": 1.974581245234675e-06, "loss": 21.2344, "step": 10540 }, { "epoch": 0.09978133489838226, "grad_norm": 411.4266052246094, "learning_rate": 1.9745743762507945e-06, "loss": 24.5078, "step": 10541 }, { "epoch": 0.09979080092009732, "grad_norm": 629.9450073242188, "learning_rate": 1.9745675063508777e-06, "loss": 48.2891, "step": 10542 }, { "epoch": 0.09980026694181236, "grad_norm": 788.22705078125, "learning_rate": 1.974560635534931e-06, "loss": 48.6406, "step": 10543 }, { "epoch": 0.09980973296352742, "grad_norm": 433.68218994140625, "learning_rate": 1.9745537638029608e-06, "loss": 20.7266, "step": 10544 }, { "epoch": 0.09981919898524247, "grad_norm": 190.0010223388672, "learning_rate": 1.974546891154973e-06, "loss": 10.2266, "step": 10545 }, { "epoch": 0.09982866500695753, "grad_norm": 434.3814697265625, "learning_rate": 1.9745400175909746e-06, "loss": 36.4531, "step": 10546 }, { "epoch": 0.09983813102867257, "grad_norm": 208.67723083496094, "learning_rate": 1.9745331431109723e-06, "loss": 16.8828, "step": 10547 }, { "epoch": 0.09984759705038763, "grad_norm": 341.0684814453125, "learning_rate": 1.974526267714972e-06, "loss": 34.125, "step": 10548 }, { "epoch": 0.09985706307210268, "grad_norm": 166.9879150390625, "learning_rate": 1.9745193914029806e-06, "loss": 21.9375, "step": 10549 }, { "epoch": 0.09986652909381774, "grad_norm": 313.43023681640625, "learning_rate": 1.974512514175004e-06, "loss": 47.1016, "step": 10550 }, { "epoch": 0.0998759951155328, "grad_norm": 322.5186767578125, "learning_rate": 1.9745056360310493e-06, "loss": 37.9844, "step": 10551 }, { "epoch": 0.09988546113724785, "grad_norm": 306.283203125, "learning_rate": 1.974498756971123e-06, "loss": 17.1836, "step": 10552 }, { "epoch": 0.0998949271589629, "grad_norm": 305.0131530761719, "learning_rate": 1.974491876995231e-06, "loss": 29.4766, "step": 10553 }, { "epoch": 0.09990439318067795, "grad_norm": 524.6256103515625, "learning_rate": 1.9744849961033796e-06, "loss": 52.8438, "step": 10554 }, { "epoch": 0.09991385920239301, "grad_norm": 389.64788818359375, "learning_rate": 1.9744781142955763e-06, "loss": 13.2891, "step": 10555 }, { "epoch": 0.09992332522410806, "grad_norm": 902.099853515625, "learning_rate": 1.9744712315718267e-06, "loss": 30.7344, "step": 10556 }, { "epoch": 0.09993279124582312, "grad_norm": 428.68670654296875, "learning_rate": 1.9744643479321376e-06, "loss": 15.207, "step": 10557 }, { "epoch": 0.09994225726753816, "grad_norm": 546.7999877929688, "learning_rate": 1.9744574633765153e-06, "loss": 48.5, "step": 10558 }, { "epoch": 0.09995172328925322, "grad_norm": 385.1988830566406, "learning_rate": 1.9744505779049667e-06, "loss": 34.8594, "step": 10559 }, { "epoch": 0.09996118931096828, "grad_norm": 710.0870361328125, "learning_rate": 1.9744436915174976e-06, "loss": 38.6797, "step": 10560 }, { "epoch": 0.09997065533268333, "grad_norm": 734.7847900390625, "learning_rate": 1.974436804214115e-06, "loss": 66.3125, "step": 10561 }, { "epoch": 0.09998012135439839, "grad_norm": 548.478515625, "learning_rate": 1.974429915994825e-06, "loss": 24.125, "step": 10562 }, { "epoch": 0.09998958737611344, "grad_norm": 514.9948120117188, "learning_rate": 1.9744230268596347e-06, "loss": 28.1094, "step": 10563 }, { "epoch": 0.0999990533978285, "grad_norm": 789.7698364257812, "learning_rate": 1.97441613680855e-06, "loss": 37.1953, "step": 10564 }, { "epoch": 0.10000851941954354, "grad_norm": 335.65789794921875, "learning_rate": 1.9744092458415773e-06, "loss": 23.2188, "step": 10565 }, { "epoch": 0.1000179854412586, "grad_norm": 295.2977294921875, "learning_rate": 1.974402353958723e-06, "loss": 24.332, "step": 10566 }, { "epoch": 0.10002745146297366, "grad_norm": 272.85845947265625, "learning_rate": 1.9743954611599944e-06, "loss": 16.4141, "step": 10567 }, { "epoch": 0.10003691748468871, "grad_norm": 3.7741830348968506, "learning_rate": 1.9743885674453973e-06, "loss": 1.0093, "step": 10568 }, { "epoch": 0.10004638350640377, "grad_norm": 639.1832275390625, "learning_rate": 1.9743816728149386e-06, "loss": 35.7188, "step": 10569 }, { "epoch": 0.10005584952811881, "grad_norm": 465.90826416015625, "learning_rate": 1.974374777268624e-06, "loss": 24.875, "step": 10570 }, { "epoch": 0.10006531554983387, "grad_norm": 318.6462707519531, "learning_rate": 1.974367880806461e-06, "loss": 27.3984, "step": 10571 }, { "epoch": 0.10007478157154892, "grad_norm": 343.479248046875, "learning_rate": 1.974360983428455e-06, "loss": 24.4141, "step": 10572 }, { "epoch": 0.10008424759326398, "grad_norm": 410.5221862792969, "learning_rate": 1.9743540851346133e-06, "loss": 41.2891, "step": 10573 }, { "epoch": 0.10009371361497903, "grad_norm": 405.2139587402344, "learning_rate": 1.9743471859249425e-06, "loss": 52.7656, "step": 10574 }, { "epoch": 0.10010317963669409, "grad_norm": 417.7200622558594, "learning_rate": 1.974340285799448e-06, "loss": 42.5078, "step": 10575 }, { "epoch": 0.10011264565840915, "grad_norm": 621.2086791992188, "learning_rate": 1.974333384758138e-06, "loss": 34.6484, "step": 10576 }, { "epoch": 0.10012211168012419, "grad_norm": 824.2508544921875, "learning_rate": 1.9743264828010175e-06, "loss": 64.3125, "step": 10577 }, { "epoch": 0.10013157770183925, "grad_norm": 711.673583984375, "learning_rate": 1.9743195799280934e-06, "loss": 53.8594, "step": 10578 }, { "epoch": 0.1001410437235543, "grad_norm": 195.89825439453125, "learning_rate": 1.974312676139372e-06, "loss": 19.7344, "step": 10579 }, { "epoch": 0.10015050974526936, "grad_norm": 433.01593017578125, "learning_rate": 1.9743057714348605e-06, "loss": 44.7344, "step": 10580 }, { "epoch": 0.1001599757669844, "grad_norm": 277.92962646484375, "learning_rate": 1.974298865814565e-06, "loss": 19.8125, "step": 10581 }, { "epoch": 0.10016944178869946, "grad_norm": 461.06976318359375, "learning_rate": 1.9742919592784918e-06, "loss": 36.6406, "step": 10582 }, { "epoch": 0.10017890781041451, "grad_norm": 3.0607473850250244, "learning_rate": 1.974285051826648e-06, "loss": 0.918, "step": 10583 }, { "epoch": 0.10018837383212957, "grad_norm": 381.955810546875, "learning_rate": 1.974278143459039e-06, "loss": 58.6875, "step": 10584 }, { "epoch": 0.10019783985384463, "grad_norm": 270.6435546875, "learning_rate": 1.974271234175672e-06, "loss": 25.8906, "step": 10585 }, { "epoch": 0.10020730587555968, "grad_norm": 509.0980224609375, "learning_rate": 1.9742643239765533e-06, "loss": 51.3906, "step": 10586 }, { "epoch": 0.10021677189727474, "grad_norm": 294.2142639160156, "learning_rate": 1.97425741286169e-06, "loss": 16.6094, "step": 10587 }, { "epoch": 0.10022623791898978, "grad_norm": 944.1612548828125, "learning_rate": 1.974250500831088e-06, "loss": 44.6016, "step": 10588 }, { "epoch": 0.10023570394070484, "grad_norm": 470.3285217285156, "learning_rate": 1.9742435878847538e-06, "loss": 54.125, "step": 10589 }, { "epoch": 0.10024516996241989, "grad_norm": 348.0058898925781, "learning_rate": 1.974236674022694e-06, "loss": 17.2188, "step": 10590 }, { "epoch": 0.10025463598413495, "grad_norm": 435.2326354980469, "learning_rate": 1.974229759244915e-06, "loss": 28.3516, "step": 10591 }, { "epoch": 0.10026410200585, "grad_norm": 253.0084228515625, "learning_rate": 1.9742228435514236e-06, "loss": 17.668, "step": 10592 }, { "epoch": 0.10027356802756505, "grad_norm": 319.3443908691406, "learning_rate": 1.974215926942226e-06, "loss": 39.2188, "step": 10593 }, { "epoch": 0.10028303404928011, "grad_norm": 264.6986083984375, "learning_rate": 1.9742090094173285e-06, "loss": 25.4062, "step": 10594 }, { "epoch": 0.10029250007099516, "grad_norm": 278.1637268066406, "learning_rate": 1.9742020909767385e-06, "loss": 26.3438, "step": 10595 }, { "epoch": 0.10030196609271022, "grad_norm": 496.13433837890625, "learning_rate": 1.974195171620462e-06, "loss": 43.4219, "step": 10596 }, { "epoch": 0.10031143211442527, "grad_norm": 498.82684326171875, "learning_rate": 1.9741882513485044e-06, "loss": 44.7812, "step": 10597 }, { "epoch": 0.10032089813614033, "grad_norm": 523.537841796875, "learning_rate": 1.974181330160874e-06, "loss": 40.6406, "step": 10598 }, { "epoch": 0.10033036415785537, "grad_norm": 279.31036376953125, "learning_rate": 1.9741744080575762e-06, "loss": 35.0547, "step": 10599 }, { "epoch": 0.10033983017957043, "grad_norm": 401.4710388183594, "learning_rate": 1.974167485038618e-06, "loss": 36.7266, "step": 10600 }, { "epoch": 0.10034929620128548, "grad_norm": 907.2523803710938, "learning_rate": 1.974160561104006e-06, "loss": 49.4766, "step": 10601 }, { "epoch": 0.10035876222300054, "grad_norm": 3.494854211807251, "learning_rate": 1.974153636253746e-06, "loss": 1.1094, "step": 10602 }, { "epoch": 0.1003682282447156, "grad_norm": 851.9781494140625, "learning_rate": 1.9741467104878454e-06, "loss": 36.6797, "step": 10603 }, { "epoch": 0.10037769426643064, "grad_norm": 313.6498107910156, "learning_rate": 1.97413978380631e-06, "loss": 22.4297, "step": 10604 }, { "epoch": 0.1003871602881457, "grad_norm": 370.8446044921875, "learning_rate": 1.9741328562091465e-06, "loss": 37.25, "step": 10605 }, { "epoch": 0.10039662630986075, "grad_norm": 392.18585205078125, "learning_rate": 1.974125927696362e-06, "loss": 23.2969, "step": 10606 }, { "epoch": 0.10040609233157581, "grad_norm": 582.4617309570312, "learning_rate": 1.974118998267962e-06, "loss": 59.2969, "step": 10607 }, { "epoch": 0.10041555835329086, "grad_norm": 417.8567810058594, "learning_rate": 1.9741120679239534e-06, "loss": 15.3477, "step": 10608 }, { "epoch": 0.10042502437500592, "grad_norm": 718.2811279296875, "learning_rate": 1.974105136664343e-06, "loss": 32.4766, "step": 10609 }, { "epoch": 0.10043449039672098, "grad_norm": 314.06793212890625, "learning_rate": 1.974098204489137e-06, "loss": 27.4688, "step": 10610 }, { "epoch": 0.10044395641843602, "grad_norm": 2.930375099182129, "learning_rate": 1.9740912713983423e-06, "loss": 0.9126, "step": 10611 }, { "epoch": 0.10045342244015108, "grad_norm": 169.0237579345703, "learning_rate": 1.974084337391965e-06, "loss": 16.5703, "step": 10612 }, { "epoch": 0.10046288846186613, "grad_norm": 514.50146484375, "learning_rate": 1.974077402470012e-06, "loss": 18.5859, "step": 10613 }, { "epoch": 0.10047235448358119, "grad_norm": 1485.6439208984375, "learning_rate": 1.9740704666324898e-06, "loss": 34.7891, "step": 10614 }, { "epoch": 0.10048182050529623, "grad_norm": 754.9489135742188, "learning_rate": 1.974063529879404e-06, "loss": 25.6406, "step": 10615 }, { "epoch": 0.1004912865270113, "grad_norm": 325.19146728515625, "learning_rate": 1.9740565922107625e-06, "loss": 22.8359, "step": 10616 }, { "epoch": 0.10050075254872634, "grad_norm": 420.18408203125, "learning_rate": 1.974049653626571e-06, "loss": 30.0547, "step": 10617 }, { "epoch": 0.1005102185704414, "grad_norm": 328.6214599609375, "learning_rate": 1.974042714126836e-06, "loss": 20.2891, "step": 10618 }, { "epoch": 0.10051968459215646, "grad_norm": 416.1221923828125, "learning_rate": 1.974035773711564e-06, "loss": 22.8125, "step": 10619 }, { "epoch": 0.1005291506138715, "grad_norm": 295.1413269042969, "learning_rate": 1.9740288323807625e-06, "loss": 23.0312, "step": 10620 }, { "epoch": 0.10053861663558657, "grad_norm": 483.65618896484375, "learning_rate": 1.974021890134437e-06, "loss": 49.375, "step": 10621 }, { "epoch": 0.10054808265730161, "grad_norm": 409.1482849121094, "learning_rate": 1.974014946972594e-06, "loss": 40.0938, "step": 10622 }, { "epoch": 0.10055754867901667, "grad_norm": 384.18988037109375, "learning_rate": 1.9740080028952402e-06, "loss": 27.2266, "step": 10623 }, { "epoch": 0.10056701470073172, "grad_norm": 426.2543029785156, "learning_rate": 1.9740010579023827e-06, "loss": 41.5781, "step": 10624 }, { "epoch": 0.10057648072244678, "grad_norm": 779.4286499023438, "learning_rate": 1.9739941119940275e-06, "loss": 8.5586, "step": 10625 }, { "epoch": 0.10058594674416182, "grad_norm": 518.2972412109375, "learning_rate": 1.9739871651701808e-06, "loss": 41.1484, "step": 10626 }, { "epoch": 0.10059541276587688, "grad_norm": 188.68101501464844, "learning_rate": 1.97398021743085e-06, "loss": 19.0938, "step": 10627 }, { "epoch": 0.10060487878759194, "grad_norm": 1193.4737548828125, "learning_rate": 1.9739732687760407e-06, "loss": 54.6875, "step": 10628 }, { "epoch": 0.10061434480930699, "grad_norm": 288.6430358886719, "learning_rate": 1.9739663192057604e-06, "loss": 18.1875, "step": 10629 }, { "epoch": 0.10062381083102205, "grad_norm": 233.88282775878906, "learning_rate": 1.9739593687200145e-06, "loss": 18.8906, "step": 10630 }, { "epoch": 0.1006332768527371, "grad_norm": 303.88177490234375, "learning_rate": 1.9739524173188107e-06, "loss": 28.1484, "step": 10631 }, { "epoch": 0.10064274287445216, "grad_norm": 2106.4033203125, "learning_rate": 1.9739454650021548e-06, "loss": 42.3633, "step": 10632 }, { "epoch": 0.1006522088961672, "grad_norm": 1197.361328125, "learning_rate": 1.9739385117700532e-06, "loss": 48.2539, "step": 10633 }, { "epoch": 0.10066167491788226, "grad_norm": 287.88623046875, "learning_rate": 1.973931557622513e-06, "loss": 23.2969, "step": 10634 }, { "epoch": 0.10067114093959731, "grad_norm": 608.188720703125, "learning_rate": 1.973924602559541e-06, "loss": 63.0938, "step": 10635 }, { "epoch": 0.10068060696131237, "grad_norm": 279.9298095703125, "learning_rate": 1.9739176465811427e-06, "loss": 28.7656, "step": 10636 }, { "epoch": 0.10069007298302743, "grad_norm": 173.24404907226562, "learning_rate": 1.9739106896873253e-06, "loss": 19.3125, "step": 10637 }, { "epoch": 0.10069953900474247, "grad_norm": 448.0154724121094, "learning_rate": 1.9739037318780953e-06, "loss": 19.4141, "step": 10638 }, { "epoch": 0.10070900502645753, "grad_norm": 304.6627502441406, "learning_rate": 1.973896773153459e-06, "loss": 29.9531, "step": 10639 }, { "epoch": 0.10071847104817258, "grad_norm": 423.02191162109375, "learning_rate": 1.973889813513423e-06, "loss": 40.9922, "step": 10640 }, { "epoch": 0.10072793706988764, "grad_norm": 675.7747192382812, "learning_rate": 1.973882852957994e-06, "loss": 37.9258, "step": 10641 }, { "epoch": 0.10073740309160269, "grad_norm": 317.4502258300781, "learning_rate": 1.9738758914871784e-06, "loss": 36.1719, "step": 10642 }, { "epoch": 0.10074686911331775, "grad_norm": 265.65771484375, "learning_rate": 1.973868929100983e-06, "loss": 20.5859, "step": 10643 }, { "epoch": 0.10075633513503279, "grad_norm": 490.9431457519531, "learning_rate": 1.973861965799414e-06, "loss": 30.4922, "step": 10644 }, { "epoch": 0.10076580115674785, "grad_norm": 245.01992797851562, "learning_rate": 1.9738550015824783e-06, "loss": 21.5234, "step": 10645 }, { "epoch": 0.10077526717846291, "grad_norm": 462.2872009277344, "learning_rate": 1.973848036450182e-06, "loss": 28.4688, "step": 10646 }, { "epoch": 0.10078473320017796, "grad_norm": 411.36083984375, "learning_rate": 1.9738410704025323e-06, "loss": 28.1406, "step": 10647 }, { "epoch": 0.10079419922189302, "grad_norm": 486.3639831542969, "learning_rate": 1.9738341034395353e-06, "loss": 43.0234, "step": 10648 }, { "epoch": 0.10080366524360806, "grad_norm": 796.4903564453125, "learning_rate": 1.973827135561197e-06, "loss": 42.6953, "step": 10649 }, { "epoch": 0.10081313126532312, "grad_norm": 432.8517761230469, "learning_rate": 1.973820166767525e-06, "loss": 29.1719, "step": 10650 }, { "epoch": 0.10082259728703817, "grad_norm": 1389.22802734375, "learning_rate": 1.9738131970585253e-06, "loss": 45.1875, "step": 10651 }, { "epoch": 0.10083206330875323, "grad_norm": 460.63262939453125, "learning_rate": 1.9738062264342047e-06, "loss": 32.9688, "step": 10652 }, { "epoch": 0.10084152933046829, "grad_norm": 329.6923828125, "learning_rate": 1.9737992548945694e-06, "loss": 24.2812, "step": 10653 }, { "epoch": 0.10085099535218334, "grad_norm": 611.152587890625, "learning_rate": 1.9737922824396264e-06, "loss": 42.7344, "step": 10654 }, { "epoch": 0.1008604613738984, "grad_norm": 370.48974609375, "learning_rate": 1.9737853090693818e-06, "loss": 34.3359, "step": 10655 }, { "epoch": 0.10086992739561344, "grad_norm": 285.9647216796875, "learning_rate": 1.973778334783842e-06, "loss": 21.8086, "step": 10656 }, { "epoch": 0.1008793934173285, "grad_norm": 697.1549682617188, "learning_rate": 1.9737713595830145e-06, "loss": 60.5625, "step": 10657 }, { "epoch": 0.10088885943904355, "grad_norm": 409.978271484375, "learning_rate": 1.973764383466905e-06, "loss": 24.3945, "step": 10658 }, { "epoch": 0.10089832546075861, "grad_norm": 327.5806884765625, "learning_rate": 1.9737574064355205e-06, "loss": 17.918, "step": 10659 }, { "epoch": 0.10090779148247365, "grad_norm": 2.785609006881714, "learning_rate": 1.9737504284888674e-06, "loss": 0.8062, "step": 10660 }, { "epoch": 0.10091725750418871, "grad_norm": 408.5553283691406, "learning_rate": 1.973743449626952e-06, "loss": 55.3594, "step": 10661 }, { "epoch": 0.10092672352590377, "grad_norm": 661.19677734375, "learning_rate": 1.9737364698497815e-06, "loss": 51.1875, "step": 10662 }, { "epoch": 0.10093618954761882, "grad_norm": 282.2568664550781, "learning_rate": 1.9737294891573618e-06, "loss": 16.7344, "step": 10663 }, { "epoch": 0.10094565556933388, "grad_norm": 358.90899658203125, "learning_rate": 1.9737225075496997e-06, "loss": 23.7812, "step": 10664 }, { "epoch": 0.10095512159104893, "grad_norm": 728.069091796875, "learning_rate": 1.9737155250268015e-06, "loss": 31.3516, "step": 10665 }, { "epoch": 0.10096458761276399, "grad_norm": 399.13763427734375, "learning_rate": 1.9737085415886746e-06, "loss": 11.0391, "step": 10666 }, { "epoch": 0.10097405363447903, "grad_norm": 269.24957275390625, "learning_rate": 1.9737015572353247e-06, "loss": 19.5859, "step": 10667 }, { "epoch": 0.10098351965619409, "grad_norm": 623.5556030273438, "learning_rate": 1.9736945719667587e-06, "loss": 15.3359, "step": 10668 }, { "epoch": 0.10099298567790914, "grad_norm": 219.8771209716797, "learning_rate": 1.9736875857829833e-06, "loss": 21.3594, "step": 10669 }, { "epoch": 0.1010024516996242, "grad_norm": 225.57615661621094, "learning_rate": 1.973680598684005e-06, "loss": 25.0703, "step": 10670 }, { "epoch": 0.10101191772133926, "grad_norm": 1792.068603515625, "learning_rate": 1.97367361066983e-06, "loss": 30.9922, "step": 10671 }, { "epoch": 0.1010213837430543, "grad_norm": 3.4837355613708496, "learning_rate": 1.973666621740465e-06, "loss": 0.9434, "step": 10672 }, { "epoch": 0.10103084976476936, "grad_norm": 218.24822998046875, "learning_rate": 1.973659631895917e-06, "loss": 17.0078, "step": 10673 }, { "epoch": 0.10104031578648441, "grad_norm": 496.6743469238281, "learning_rate": 1.973652641136192e-06, "loss": 19.5312, "step": 10674 }, { "epoch": 0.10104978180819947, "grad_norm": 275.8001403808594, "learning_rate": 1.9736456494612975e-06, "loss": 16.0078, "step": 10675 }, { "epoch": 0.10105924782991452, "grad_norm": 632.2824096679688, "learning_rate": 1.9736386568712387e-06, "loss": 46.9531, "step": 10676 }, { "epoch": 0.10106871385162958, "grad_norm": 256.0386047363281, "learning_rate": 1.9736316633660235e-06, "loss": 25.6797, "step": 10677 }, { "epoch": 0.10107817987334462, "grad_norm": 414.1674499511719, "learning_rate": 1.9736246689456574e-06, "loss": 37.7031, "step": 10678 }, { "epoch": 0.10108764589505968, "grad_norm": 360.83160400390625, "learning_rate": 1.9736176736101476e-06, "loss": 42.6719, "step": 10679 }, { "epoch": 0.10109711191677474, "grad_norm": 278.8284606933594, "learning_rate": 1.973610677359501e-06, "loss": 22.4922, "step": 10680 }, { "epoch": 0.10110657793848979, "grad_norm": 156.84400939941406, "learning_rate": 1.9736036801937226e-06, "loss": 15.3164, "step": 10681 }, { "epoch": 0.10111604396020485, "grad_norm": 561.5140991210938, "learning_rate": 1.973596682112821e-06, "loss": 58.9062, "step": 10682 }, { "epoch": 0.1011255099819199, "grad_norm": 227.15115356445312, "learning_rate": 1.9735896831168016e-06, "loss": 28.3828, "step": 10683 }, { "epoch": 0.10113497600363495, "grad_norm": 175.51441955566406, "learning_rate": 1.9735826832056715e-06, "loss": 20.0781, "step": 10684 }, { "epoch": 0.10114444202535, "grad_norm": 199.6274871826172, "learning_rate": 1.9735756823794366e-06, "loss": 16.8906, "step": 10685 }, { "epoch": 0.10115390804706506, "grad_norm": 3.341701030731201, "learning_rate": 1.973568680638104e-06, "loss": 1.0479, "step": 10686 }, { "epoch": 0.10116337406878011, "grad_norm": 281.3661193847656, "learning_rate": 1.9735616779816805e-06, "loss": 19.0469, "step": 10687 }, { "epoch": 0.10117284009049517, "grad_norm": 441.92596435546875, "learning_rate": 1.973554674410172e-06, "loss": 42.1562, "step": 10688 }, { "epoch": 0.10118230611221023, "grad_norm": 310.4792785644531, "learning_rate": 1.9735476699235855e-06, "loss": 34.9844, "step": 10689 }, { "epoch": 0.10119177213392527, "grad_norm": 464.5519104003906, "learning_rate": 1.9735406645219277e-06, "loss": 28.2812, "step": 10690 }, { "epoch": 0.10120123815564033, "grad_norm": 3.1908648014068604, "learning_rate": 1.9735336582052045e-06, "loss": 1.0747, "step": 10691 }, { "epoch": 0.10121070417735538, "grad_norm": 516.6564331054688, "learning_rate": 1.9735266509734235e-06, "loss": 32.5859, "step": 10692 }, { "epoch": 0.10122017019907044, "grad_norm": 463.9186096191406, "learning_rate": 1.9735196428265904e-06, "loss": 19.8672, "step": 10693 }, { "epoch": 0.10122963622078548, "grad_norm": 187.6327362060547, "learning_rate": 1.9735126337647126e-06, "loss": 18.8828, "step": 10694 }, { "epoch": 0.10123910224250054, "grad_norm": 303.098388671875, "learning_rate": 1.973505623787796e-06, "loss": 20.8711, "step": 10695 }, { "epoch": 0.1012485682642156, "grad_norm": 397.9830322265625, "learning_rate": 1.9734986128958473e-06, "loss": 39.6953, "step": 10696 }, { "epoch": 0.10125803428593065, "grad_norm": 458.7879943847656, "learning_rate": 1.973491601088874e-06, "loss": 45.4062, "step": 10697 }, { "epoch": 0.10126750030764571, "grad_norm": 508.9710998535156, "learning_rate": 1.973484588366881e-06, "loss": 26.8125, "step": 10698 }, { "epoch": 0.10127696632936076, "grad_norm": 229.2833709716797, "learning_rate": 1.9734775747298764e-06, "loss": 17.2266, "step": 10699 }, { "epoch": 0.10128643235107582, "grad_norm": 438.72442626953125, "learning_rate": 1.9734705601778657e-06, "loss": 22.9102, "step": 10700 }, { "epoch": 0.10129589837279086, "grad_norm": 555.96435546875, "learning_rate": 1.9734635447108564e-06, "loss": 23.457, "step": 10701 }, { "epoch": 0.10130536439450592, "grad_norm": 664.8400268554688, "learning_rate": 1.9734565283288547e-06, "loss": 26.4648, "step": 10702 }, { "epoch": 0.10131483041622097, "grad_norm": 335.2774353027344, "learning_rate": 1.973449511031867e-06, "loss": 36.9531, "step": 10703 }, { "epoch": 0.10132429643793603, "grad_norm": 172.12205505371094, "learning_rate": 1.9734424928199004e-06, "loss": 20.125, "step": 10704 }, { "epoch": 0.10133376245965109, "grad_norm": 753.755615234375, "learning_rate": 1.9734354736929605e-06, "loss": 18.8047, "step": 10705 }, { "epoch": 0.10134322848136613, "grad_norm": 508.3937683105469, "learning_rate": 1.973428453651055e-06, "loss": 58.2188, "step": 10706 }, { "epoch": 0.1013526945030812, "grad_norm": 3.337949514389038, "learning_rate": 1.9734214326941904e-06, "loss": 0.8726, "step": 10707 }, { "epoch": 0.10136216052479624, "grad_norm": 605.645751953125, "learning_rate": 1.9734144108223724e-06, "loss": 39.75, "step": 10708 }, { "epoch": 0.1013716265465113, "grad_norm": 317.1656799316406, "learning_rate": 1.973407388035609e-06, "loss": 15.918, "step": 10709 }, { "epoch": 0.10138109256822635, "grad_norm": 777.4223022460938, "learning_rate": 1.973400364333905e-06, "loss": 62.5938, "step": 10710 }, { "epoch": 0.1013905585899414, "grad_norm": 941.7799072265625, "learning_rate": 1.9733933397172685e-06, "loss": 41.7266, "step": 10711 }, { "epoch": 0.10140002461165645, "grad_norm": 2.9774844646453857, "learning_rate": 1.9733863141857053e-06, "loss": 0.7925, "step": 10712 }, { "epoch": 0.10140949063337151, "grad_norm": 279.4832763671875, "learning_rate": 1.9733792877392226e-06, "loss": 28.4375, "step": 10713 }, { "epoch": 0.10141895665508657, "grad_norm": 387.12420654296875, "learning_rate": 1.973372260377827e-06, "loss": 32.4766, "step": 10714 }, { "epoch": 0.10142842267680162, "grad_norm": 553.7158203125, "learning_rate": 1.973365232101524e-06, "loss": 25.5781, "step": 10715 }, { "epoch": 0.10143788869851668, "grad_norm": 406.6733703613281, "learning_rate": 1.9733582029103214e-06, "loss": 31.0469, "step": 10716 }, { "epoch": 0.10144735472023172, "grad_norm": 183.98226928710938, "learning_rate": 1.9733511728042255e-06, "loss": 16.5469, "step": 10717 }, { "epoch": 0.10145682074194678, "grad_norm": 587.1603393554688, "learning_rate": 1.9733441417832426e-06, "loss": 23.5547, "step": 10718 }, { "epoch": 0.10146628676366183, "grad_norm": 281.7250061035156, "learning_rate": 1.97333710984738e-06, "loss": 38.75, "step": 10719 }, { "epoch": 0.10147575278537689, "grad_norm": 551.3875122070312, "learning_rate": 1.973330076996643e-06, "loss": 25.1406, "step": 10720 }, { "epoch": 0.10148521880709194, "grad_norm": 169.3487548828125, "learning_rate": 1.9733230432310397e-06, "loss": 10.3867, "step": 10721 }, { "epoch": 0.101494684828807, "grad_norm": 499.8358459472656, "learning_rate": 1.973316008550576e-06, "loss": 44.4531, "step": 10722 }, { "epoch": 0.10150415085052206, "grad_norm": 3.4561665058135986, "learning_rate": 1.9733089729552585e-06, "loss": 0.957, "step": 10723 }, { "epoch": 0.1015136168722371, "grad_norm": 254.56167602539062, "learning_rate": 1.9733019364450934e-06, "loss": 20.5547, "step": 10724 }, { "epoch": 0.10152308289395216, "grad_norm": 644.0747680664062, "learning_rate": 1.9732948990200887e-06, "loss": 54.375, "step": 10725 }, { "epoch": 0.10153254891566721, "grad_norm": 385.3824462890625, "learning_rate": 1.97328786068025e-06, "loss": 22.2578, "step": 10726 }, { "epoch": 0.10154201493738227, "grad_norm": 782.6631469726562, "learning_rate": 1.9732808214255832e-06, "loss": 35.9531, "step": 10727 }, { "epoch": 0.10155148095909731, "grad_norm": 350.6466369628906, "learning_rate": 1.973273781256096e-06, "loss": 25.375, "step": 10728 }, { "epoch": 0.10156094698081237, "grad_norm": 615.5086059570312, "learning_rate": 1.973266740171795e-06, "loss": 27.3984, "step": 10729 }, { "epoch": 0.10157041300252742, "grad_norm": 195.8678741455078, "learning_rate": 1.9732596981726867e-06, "loss": 13.4453, "step": 10730 }, { "epoch": 0.10157987902424248, "grad_norm": 376.080322265625, "learning_rate": 1.9732526552587776e-06, "loss": 43.9375, "step": 10731 }, { "epoch": 0.10158934504595754, "grad_norm": 167.0189208984375, "learning_rate": 1.9732456114300738e-06, "loss": 17.9805, "step": 10732 }, { "epoch": 0.10159881106767259, "grad_norm": 430.85760498046875, "learning_rate": 1.9732385666865832e-06, "loss": 39.6875, "step": 10733 }, { "epoch": 0.10160827708938765, "grad_norm": 292.11053466796875, "learning_rate": 1.973231521028311e-06, "loss": 19.0312, "step": 10734 }, { "epoch": 0.10161774311110269, "grad_norm": 185.2101593017578, "learning_rate": 1.973224474455265e-06, "loss": 19.6953, "step": 10735 }, { "epoch": 0.10162720913281775, "grad_norm": 472.912841796875, "learning_rate": 1.973217426967451e-06, "loss": 19.4453, "step": 10736 }, { "epoch": 0.1016366751545328, "grad_norm": 294.7665100097656, "learning_rate": 1.973210378564876e-06, "loss": 19.8047, "step": 10737 }, { "epoch": 0.10164614117624786, "grad_norm": 219.18197631835938, "learning_rate": 1.9732033292475464e-06, "loss": 19.8281, "step": 10738 }, { "epoch": 0.10165560719796292, "grad_norm": 3.7228434085845947, "learning_rate": 1.973196279015469e-06, "loss": 1.001, "step": 10739 }, { "epoch": 0.10166507321967796, "grad_norm": 353.1515197753906, "learning_rate": 1.973189227868651e-06, "loss": 19.3828, "step": 10740 }, { "epoch": 0.10167453924139302, "grad_norm": 326.2034912109375, "learning_rate": 1.9731821758070977e-06, "loss": 25.6172, "step": 10741 }, { "epoch": 0.10168400526310807, "grad_norm": 765.2726440429688, "learning_rate": 1.9731751228308167e-06, "loss": 46.1094, "step": 10742 }, { "epoch": 0.10169347128482313, "grad_norm": 389.43524169921875, "learning_rate": 1.9731680689398146e-06, "loss": 35.2969, "step": 10743 }, { "epoch": 0.10170293730653818, "grad_norm": 456.8645324707031, "learning_rate": 1.9731610141340974e-06, "loss": 25.2266, "step": 10744 }, { "epoch": 0.10171240332825324, "grad_norm": 297.10345458984375, "learning_rate": 1.9731539584136728e-06, "loss": 17.8828, "step": 10745 }, { "epoch": 0.10172186934996828, "grad_norm": 172.04150390625, "learning_rate": 1.973146901778546e-06, "loss": 17.7734, "step": 10746 }, { "epoch": 0.10173133537168334, "grad_norm": 420.06500244140625, "learning_rate": 1.973139844228725e-06, "loss": 45.7656, "step": 10747 }, { "epoch": 0.1017408013933984, "grad_norm": 426.1719970703125, "learning_rate": 1.9731327857642156e-06, "loss": 28.8438, "step": 10748 }, { "epoch": 0.10175026741511345, "grad_norm": 3.1038930416107178, "learning_rate": 1.973125726385025e-06, "loss": 0.9253, "step": 10749 }, { "epoch": 0.10175973343682851, "grad_norm": 659.4366455078125, "learning_rate": 1.9731186660911593e-06, "loss": 11.875, "step": 10750 }, { "epoch": 0.10176919945854355, "grad_norm": 1159.806640625, "learning_rate": 1.9731116048826252e-06, "loss": 48.6406, "step": 10751 }, { "epoch": 0.10177866548025861, "grad_norm": 378.3949890136719, "learning_rate": 1.9731045427594294e-06, "loss": 35.5625, "step": 10752 }, { "epoch": 0.10178813150197366, "grad_norm": 902.2179565429688, "learning_rate": 1.973097479721579e-06, "loss": 60.9922, "step": 10753 }, { "epoch": 0.10179759752368872, "grad_norm": 222.41650390625, "learning_rate": 1.9730904157690804e-06, "loss": 26.5156, "step": 10754 }, { "epoch": 0.10180706354540377, "grad_norm": 593.7284545898438, "learning_rate": 1.97308335090194e-06, "loss": 33.7891, "step": 10755 }, { "epoch": 0.10181652956711883, "grad_norm": 558.3494873046875, "learning_rate": 1.973076285120164e-06, "loss": 24.9336, "step": 10756 }, { "epoch": 0.10182599558883389, "grad_norm": 757.4963989257812, "learning_rate": 1.97306921842376e-06, "loss": 48.3906, "step": 10757 }, { "epoch": 0.10183546161054893, "grad_norm": 262.8924865722656, "learning_rate": 1.9730621508127344e-06, "loss": 20.8594, "step": 10758 }, { "epoch": 0.10184492763226399, "grad_norm": 143.2061767578125, "learning_rate": 1.9730550822870935e-06, "loss": 21.0312, "step": 10759 }, { "epoch": 0.10185439365397904, "grad_norm": 301.7262268066406, "learning_rate": 1.973048012846844e-06, "loss": 34.5156, "step": 10760 }, { "epoch": 0.1018638596756941, "grad_norm": 217.05828857421875, "learning_rate": 1.973040942491993e-06, "loss": 20.1406, "step": 10761 }, { "epoch": 0.10187332569740915, "grad_norm": 237.01048278808594, "learning_rate": 1.9730338712225466e-06, "loss": 19.75, "step": 10762 }, { "epoch": 0.1018827917191242, "grad_norm": 336.124267578125, "learning_rate": 1.9730267990385115e-06, "loss": 21.0625, "step": 10763 }, { "epoch": 0.10189225774083925, "grad_norm": 249.41673278808594, "learning_rate": 1.973019725939895e-06, "loss": 18.7969, "step": 10764 }, { "epoch": 0.10190172376255431, "grad_norm": 470.5046081542969, "learning_rate": 1.973012651926703e-06, "loss": 22.1797, "step": 10765 }, { "epoch": 0.10191118978426937, "grad_norm": 360.1184997558594, "learning_rate": 1.9730055769989423e-06, "loss": 23.9688, "step": 10766 }, { "epoch": 0.10192065580598442, "grad_norm": 279.7814025878906, "learning_rate": 1.9729985011566197e-06, "loss": 21.6797, "step": 10767 }, { "epoch": 0.10193012182769948, "grad_norm": 500.1244201660156, "learning_rate": 1.9729914243997418e-06, "loss": 18.2148, "step": 10768 }, { "epoch": 0.10193958784941452, "grad_norm": 374.2088317871094, "learning_rate": 1.9729843467283155e-06, "loss": 17.9297, "step": 10769 }, { "epoch": 0.10194905387112958, "grad_norm": 255.31495666503906, "learning_rate": 1.9729772681423466e-06, "loss": 24.9062, "step": 10770 }, { "epoch": 0.10195851989284463, "grad_norm": 264.9049377441406, "learning_rate": 1.972970188641843e-06, "loss": 28.0781, "step": 10771 }, { "epoch": 0.10196798591455969, "grad_norm": 608.18505859375, "learning_rate": 1.9729631082268104e-06, "loss": 25.1211, "step": 10772 }, { "epoch": 0.10197745193627474, "grad_norm": 317.8977966308594, "learning_rate": 1.972956026897256e-06, "loss": 39.375, "step": 10773 }, { "epoch": 0.1019869179579898, "grad_norm": 394.0249938964844, "learning_rate": 1.972948944653186e-06, "loss": 11.0469, "step": 10774 }, { "epoch": 0.10199638397970485, "grad_norm": 831.8424682617188, "learning_rate": 1.9729418614946074e-06, "loss": 33.3438, "step": 10775 }, { "epoch": 0.1020058500014199, "grad_norm": 911.8816528320312, "learning_rate": 1.9729347774215264e-06, "loss": 50.5859, "step": 10776 }, { "epoch": 0.10201531602313496, "grad_norm": 429.0124206542969, "learning_rate": 1.9729276924339505e-06, "loss": 46.9062, "step": 10777 }, { "epoch": 0.10202478204485001, "grad_norm": 1202.1380615234375, "learning_rate": 1.9729206065318856e-06, "loss": 61.8594, "step": 10778 }, { "epoch": 0.10203424806656507, "grad_norm": 539.4385986328125, "learning_rate": 1.9729135197153386e-06, "loss": 30.2812, "step": 10779 }, { "epoch": 0.10204371408828011, "grad_norm": 272.08056640625, "learning_rate": 1.9729064319843166e-06, "loss": 29.4688, "step": 10780 }, { "epoch": 0.10205318010999517, "grad_norm": 306.85858154296875, "learning_rate": 1.9728993433388255e-06, "loss": 8.832, "step": 10781 }, { "epoch": 0.10206264613171023, "grad_norm": 336.7789001464844, "learning_rate": 1.972892253778872e-06, "loss": 26.7969, "step": 10782 }, { "epoch": 0.10207211215342528, "grad_norm": 212.7386474609375, "learning_rate": 1.9728851633044637e-06, "loss": 45.2656, "step": 10783 }, { "epoch": 0.10208157817514034, "grad_norm": 186.25457763671875, "learning_rate": 1.9728780719156066e-06, "loss": 10.0664, "step": 10784 }, { "epoch": 0.10209104419685539, "grad_norm": 309.6431579589844, "learning_rate": 1.972870979612307e-06, "loss": 21.5234, "step": 10785 }, { "epoch": 0.10210051021857044, "grad_norm": 343.0778503417969, "learning_rate": 1.972863886394572e-06, "loss": 30.4844, "step": 10786 }, { "epoch": 0.10210997624028549, "grad_norm": 578.17724609375, "learning_rate": 1.9728567922624084e-06, "loss": 52.2656, "step": 10787 }, { "epoch": 0.10211944226200055, "grad_norm": 645.7352905273438, "learning_rate": 1.972849697215823e-06, "loss": 32.4766, "step": 10788 }, { "epoch": 0.1021289082837156, "grad_norm": 458.2528076171875, "learning_rate": 1.9728426012548216e-06, "loss": 38.3984, "step": 10789 }, { "epoch": 0.10213837430543066, "grad_norm": 633.7694091796875, "learning_rate": 1.9728355043794115e-06, "loss": 48.5312, "step": 10790 }, { "epoch": 0.10214784032714572, "grad_norm": 240.54690551757812, "learning_rate": 1.9728284065895996e-06, "loss": 23.0625, "step": 10791 }, { "epoch": 0.10215730634886076, "grad_norm": 182.63925170898438, "learning_rate": 1.9728213078853924e-06, "loss": 16.5781, "step": 10792 }, { "epoch": 0.10216677237057582, "grad_norm": 257.3024597167969, "learning_rate": 1.972814208266796e-06, "loss": 14.9297, "step": 10793 }, { "epoch": 0.10217623839229087, "grad_norm": 761.2980346679688, "learning_rate": 1.9728071077338177e-06, "loss": 44.7812, "step": 10794 }, { "epoch": 0.10218570441400593, "grad_norm": 601.8558959960938, "learning_rate": 1.9728000062864642e-06, "loss": 24.25, "step": 10795 }, { "epoch": 0.10219517043572098, "grad_norm": 551.3482055664062, "learning_rate": 1.9727929039247416e-06, "loss": 43.7812, "step": 10796 }, { "epoch": 0.10220463645743603, "grad_norm": 252.5516815185547, "learning_rate": 1.9727858006486575e-06, "loss": 21.0547, "step": 10797 }, { "epoch": 0.10221410247915108, "grad_norm": 3.5308241844177246, "learning_rate": 1.9727786964582175e-06, "loss": 1.063, "step": 10798 }, { "epoch": 0.10222356850086614, "grad_norm": 205.68643188476562, "learning_rate": 1.972771591353429e-06, "loss": 20.1172, "step": 10799 }, { "epoch": 0.1022330345225812, "grad_norm": 442.6908874511719, "learning_rate": 1.9727644853342987e-06, "loss": 43.4531, "step": 10800 }, { "epoch": 0.10224250054429625, "grad_norm": 519.674072265625, "learning_rate": 1.972757378400833e-06, "loss": 55.8086, "step": 10801 }, { "epoch": 0.10225196656601131, "grad_norm": 410.145751953125, "learning_rate": 1.9727502705530386e-06, "loss": 40.9688, "step": 10802 }, { "epoch": 0.10226143258772635, "grad_norm": 524.9154052734375, "learning_rate": 1.972743161790922e-06, "loss": 8.7695, "step": 10803 }, { "epoch": 0.10227089860944141, "grad_norm": 252.77191162109375, "learning_rate": 1.97273605211449e-06, "loss": 22.9297, "step": 10804 }, { "epoch": 0.10228036463115646, "grad_norm": 281.7939453125, "learning_rate": 1.97272894152375e-06, "loss": 19.0156, "step": 10805 }, { "epoch": 0.10228983065287152, "grad_norm": 342.2960205078125, "learning_rate": 1.9727218300187075e-06, "loss": 22.8359, "step": 10806 }, { "epoch": 0.10229929667458657, "grad_norm": 680.4656982421875, "learning_rate": 1.97271471759937e-06, "loss": 19.5, "step": 10807 }, { "epoch": 0.10230876269630163, "grad_norm": 1994.168701171875, "learning_rate": 1.972707604265744e-06, "loss": 41.0625, "step": 10808 }, { "epoch": 0.10231822871801668, "grad_norm": 350.072998046875, "learning_rate": 1.972700490017836e-06, "loss": 40.4844, "step": 10809 }, { "epoch": 0.10232769473973173, "grad_norm": 346.6784362792969, "learning_rate": 1.9726933748556533e-06, "loss": 24.2852, "step": 10810 }, { "epoch": 0.10233716076144679, "grad_norm": 2.8702890872955322, "learning_rate": 1.9726862587792014e-06, "loss": 0.8506, "step": 10811 }, { "epoch": 0.10234662678316184, "grad_norm": 296.68426513671875, "learning_rate": 1.972679141788488e-06, "loss": 33.8047, "step": 10812 }, { "epoch": 0.1023560928048769, "grad_norm": 279.8361511230469, "learning_rate": 1.9726720238835198e-06, "loss": 12.7031, "step": 10813 }, { "epoch": 0.10236555882659194, "grad_norm": 14966.8642578125, "learning_rate": 1.9726649050643023e-06, "loss": 26.2969, "step": 10814 }, { "epoch": 0.102375024848307, "grad_norm": 282.9493103027344, "learning_rate": 1.972657785330844e-06, "loss": 32.9688, "step": 10815 }, { "epoch": 0.10238449087002205, "grad_norm": 802.9174194335938, "learning_rate": 1.9726506646831502e-06, "loss": 77.75, "step": 10816 }, { "epoch": 0.10239395689173711, "grad_norm": 577.6806030273438, "learning_rate": 1.9726435431212283e-06, "loss": 50.6094, "step": 10817 }, { "epoch": 0.10240342291345217, "grad_norm": 364.91754150390625, "learning_rate": 1.9726364206450846e-06, "loss": 44.7812, "step": 10818 }, { "epoch": 0.10241288893516722, "grad_norm": 2.9025769233703613, "learning_rate": 1.9726292972547255e-06, "loss": 0.9263, "step": 10819 }, { "epoch": 0.10242235495688227, "grad_norm": 236.49420166015625, "learning_rate": 1.9726221729501587e-06, "loss": 31.5312, "step": 10820 }, { "epoch": 0.10243182097859732, "grad_norm": 510.47271728515625, "learning_rate": 1.97261504773139e-06, "loss": 23.8438, "step": 10821 }, { "epoch": 0.10244128700031238, "grad_norm": 3.306743621826172, "learning_rate": 1.972607921598427e-06, "loss": 0.96, "step": 10822 }, { "epoch": 0.10245075302202743, "grad_norm": 758.3485107421875, "learning_rate": 1.9726007945512754e-06, "loss": 42.8594, "step": 10823 }, { "epoch": 0.10246021904374249, "grad_norm": 1055.95654296875, "learning_rate": 1.972593666589942e-06, "loss": 43.7109, "step": 10824 }, { "epoch": 0.10246968506545755, "grad_norm": 1080.4608154296875, "learning_rate": 1.9725865377144343e-06, "loss": 29.2031, "step": 10825 }, { "epoch": 0.10247915108717259, "grad_norm": 387.9617614746094, "learning_rate": 1.9725794079247583e-06, "loss": 29.5469, "step": 10826 }, { "epoch": 0.10248861710888765, "grad_norm": 323.5856628417969, "learning_rate": 1.972572277220921e-06, "loss": 29.3438, "step": 10827 }, { "epoch": 0.1024980831306027, "grad_norm": 252.15255737304688, "learning_rate": 1.972565145602929e-06, "loss": 17.9922, "step": 10828 }, { "epoch": 0.10250754915231776, "grad_norm": 529.5326538085938, "learning_rate": 1.9725580130707895e-06, "loss": 47.4766, "step": 10829 }, { "epoch": 0.1025170151740328, "grad_norm": 633.2676391601562, "learning_rate": 1.9725508796245083e-06, "loss": 32.5078, "step": 10830 }, { "epoch": 0.10252648119574787, "grad_norm": 248.73562622070312, "learning_rate": 1.9725437452640925e-06, "loss": 23.4531, "step": 10831 }, { "epoch": 0.10253594721746291, "grad_norm": 3.68914794921875, "learning_rate": 1.9725366099895487e-06, "loss": 0.9048, "step": 10832 }, { "epoch": 0.10254541323917797, "grad_norm": 289.8896789550781, "learning_rate": 1.9725294738008843e-06, "loss": 22.0469, "step": 10833 }, { "epoch": 0.10255487926089303, "grad_norm": 726.901611328125, "learning_rate": 1.972522336698105e-06, "loss": 68.6875, "step": 10834 }, { "epoch": 0.10256434528260808, "grad_norm": 362.4858093261719, "learning_rate": 1.9725151986812182e-06, "loss": 35.6875, "step": 10835 }, { "epoch": 0.10257381130432314, "grad_norm": 243.82470703125, "learning_rate": 1.97250805975023e-06, "loss": 20.125, "step": 10836 }, { "epoch": 0.10258327732603818, "grad_norm": 286.18585205078125, "learning_rate": 1.972500919905148e-06, "loss": 25.5781, "step": 10837 }, { "epoch": 0.10259274334775324, "grad_norm": 295.905029296875, "learning_rate": 1.9724937791459782e-06, "loss": 17.7891, "step": 10838 }, { "epoch": 0.10260220936946829, "grad_norm": 275.5675354003906, "learning_rate": 1.9724866374727273e-06, "loss": 24.4844, "step": 10839 }, { "epoch": 0.10261167539118335, "grad_norm": 519.4190063476562, "learning_rate": 1.972479494885403e-06, "loss": 24.3281, "step": 10840 }, { "epoch": 0.1026211414128984, "grad_norm": 437.50689697265625, "learning_rate": 1.97247235138401e-06, "loss": 34.9531, "step": 10841 }, { "epoch": 0.10263060743461346, "grad_norm": 3.3383407592773438, "learning_rate": 1.972465206968557e-06, "loss": 0.9778, "step": 10842 }, { "epoch": 0.10264007345632851, "grad_norm": 475.2577209472656, "learning_rate": 1.97245806163905e-06, "loss": 19.0391, "step": 10843 }, { "epoch": 0.10264953947804356, "grad_norm": 3.287160873413086, "learning_rate": 1.9724509153954955e-06, "loss": 0.9097, "step": 10844 }, { "epoch": 0.10265900549975862, "grad_norm": 760.5127563476562, "learning_rate": 1.9724437682379005e-06, "loss": 51.1719, "step": 10845 }, { "epoch": 0.10266847152147367, "grad_norm": 302.37518310546875, "learning_rate": 1.9724366201662715e-06, "loss": 24.4688, "step": 10846 }, { "epoch": 0.10267793754318873, "grad_norm": 543.580810546875, "learning_rate": 1.972429471180615e-06, "loss": 47.8125, "step": 10847 }, { "epoch": 0.10268740356490377, "grad_norm": 490.3222961425781, "learning_rate": 1.972422321280939e-06, "loss": 31.1953, "step": 10848 }, { "epoch": 0.10269686958661883, "grad_norm": 315.1514587402344, "learning_rate": 1.9724151704672485e-06, "loss": 24.0, "step": 10849 }, { "epoch": 0.10270633560833388, "grad_norm": 419.42413330078125, "learning_rate": 1.9724080187395513e-06, "loss": 46.3438, "step": 10850 }, { "epoch": 0.10271580163004894, "grad_norm": 572.6223754882812, "learning_rate": 1.972400866097854e-06, "loss": 57.7578, "step": 10851 }, { "epoch": 0.102725267651764, "grad_norm": 604.9544677734375, "learning_rate": 1.972393712542163e-06, "loss": 72.8828, "step": 10852 }, { "epoch": 0.10273473367347905, "grad_norm": 411.4793395996094, "learning_rate": 1.972386558072485e-06, "loss": 32.8438, "step": 10853 }, { "epoch": 0.1027441996951941, "grad_norm": 379.739990234375, "learning_rate": 1.9723794026888267e-06, "loss": 23.6562, "step": 10854 }, { "epoch": 0.10275366571690915, "grad_norm": 459.8060302734375, "learning_rate": 1.9723722463911953e-06, "loss": 33.0156, "step": 10855 }, { "epoch": 0.10276313173862421, "grad_norm": 251.7030792236328, "learning_rate": 1.9723650891795975e-06, "loss": 19.4375, "step": 10856 }, { "epoch": 0.10277259776033926, "grad_norm": 253.3026123046875, "learning_rate": 1.9723579310540394e-06, "loss": 22.8984, "step": 10857 }, { "epoch": 0.10278206378205432, "grad_norm": 420.1177062988281, "learning_rate": 1.9723507720145284e-06, "loss": 38.7188, "step": 10858 }, { "epoch": 0.10279152980376936, "grad_norm": 320.0526123046875, "learning_rate": 1.9723436120610706e-06, "loss": 20.4922, "step": 10859 }, { "epoch": 0.10280099582548442, "grad_norm": 222.70547485351562, "learning_rate": 1.972336451193673e-06, "loss": 10.8789, "step": 10860 }, { "epoch": 0.10281046184719948, "grad_norm": 275.14556884765625, "learning_rate": 1.972329289412343e-06, "loss": 22.2344, "step": 10861 }, { "epoch": 0.10281992786891453, "grad_norm": 531.1494140625, "learning_rate": 1.9723221267170864e-06, "loss": 37.2656, "step": 10862 }, { "epoch": 0.10282939389062959, "grad_norm": 354.58489990234375, "learning_rate": 1.9723149631079098e-06, "loss": 39.9609, "step": 10863 }, { "epoch": 0.10283885991234464, "grad_norm": 170.89991760253906, "learning_rate": 1.972307798584821e-06, "loss": 18.3359, "step": 10864 }, { "epoch": 0.1028483259340597, "grad_norm": 334.2052917480469, "learning_rate": 1.9723006331478257e-06, "loss": 22.375, "step": 10865 }, { "epoch": 0.10285779195577474, "grad_norm": 393.3051452636719, "learning_rate": 1.9722934667969313e-06, "loss": 12.4414, "step": 10866 }, { "epoch": 0.1028672579774898, "grad_norm": 748.0123291015625, "learning_rate": 1.9722862995321443e-06, "loss": 25.1953, "step": 10867 }, { "epoch": 0.10287672399920486, "grad_norm": 221.02259826660156, "learning_rate": 1.9722791313534715e-06, "loss": 15.9648, "step": 10868 }, { "epoch": 0.10288619002091991, "grad_norm": 699.7174682617188, "learning_rate": 1.9722719622609195e-06, "loss": 32.9531, "step": 10869 }, { "epoch": 0.10289565604263497, "grad_norm": 453.2724609375, "learning_rate": 1.9722647922544948e-06, "loss": 36.6953, "step": 10870 }, { "epoch": 0.10290512206435001, "grad_norm": 515.581787109375, "learning_rate": 1.972257621334205e-06, "loss": 19.6836, "step": 10871 }, { "epoch": 0.10291458808606507, "grad_norm": 429.34686279296875, "learning_rate": 1.9722504495000555e-06, "loss": 24.4297, "step": 10872 }, { "epoch": 0.10292405410778012, "grad_norm": 165.5006561279297, "learning_rate": 1.9722432767520545e-06, "loss": 19.4453, "step": 10873 }, { "epoch": 0.10293352012949518, "grad_norm": 321.20294189453125, "learning_rate": 1.972236103090208e-06, "loss": 26.5938, "step": 10874 }, { "epoch": 0.10294298615121023, "grad_norm": 394.6074523925781, "learning_rate": 1.9722289285145224e-06, "loss": 18.8867, "step": 10875 }, { "epoch": 0.10295245217292529, "grad_norm": 539.1472778320312, "learning_rate": 1.9722217530250052e-06, "loss": 41.4141, "step": 10876 }, { "epoch": 0.10296191819464035, "grad_norm": 201.51284790039062, "learning_rate": 1.9722145766216628e-06, "loss": 22.6797, "step": 10877 }, { "epoch": 0.10297138421635539, "grad_norm": 290.1072082519531, "learning_rate": 1.9722073993045018e-06, "loss": 25.7578, "step": 10878 }, { "epoch": 0.10298085023807045, "grad_norm": 344.3453369140625, "learning_rate": 1.972200221073529e-06, "loss": 26.5234, "step": 10879 }, { "epoch": 0.1029903162597855, "grad_norm": 234.2140350341797, "learning_rate": 1.9721930419287517e-06, "loss": 19.8906, "step": 10880 }, { "epoch": 0.10299978228150056, "grad_norm": 485.43719482421875, "learning_rate": 1.972185861870176e-06, "loss": 48.0, "step": 10881 }, { "epoch": 0.1030092483032156, "grad_norm": 583.1027221679688, "learning_rate": 1.9721786808978084e-06, "loss": 30.2578, "step": 10882 }, { "epoch": 0.10301871432493066, "grad_norm": 285.80999755859375, "learning_rate": 1.9721714990116564e-06, "loss": 45.7891, "step": 10883 }, { "epoch": 0.10302818034664571, "grad_norm": 160.91310119628906, "learning_rate": 1.9721643162117266e-06, "loss": 21.1406, "step": 10884 }, { "epoch": 0.10303764636836077, "grad_norm": 378.9137878417969, "learning_rate": 1.9721571324980256e-06, "loss": 44.1719, "step": 10885 }, { "epoch": 0.10304711239007583, "grad_norm": 426.5187683105469, "learning_rate": 1.9721499478705595e-06, "loss": 65.7344, "step": 10886 }, { "epoch": 0.10305657841179088, "grad_norm": 420.0162048339844, "learning_rate": 1.972142762329336e-06, "loss": 44.8281, "step": 10887 }, { "epoch": 0.10306604443350594, "grad_norm": 175.10191345214844, "learning_rate": 1.972135575874362e-06, "loss": 19.5781, "step": 10888 }, { "epoch": 0.10307551045522098, "grad_norm": 412.2738952636719, "learning_rate": 1.9721283885056437e-06, "loss": 44.7578, "step": 10889 }, { "epoch": 0.10308497647693604, "grad_norm": 379.93157958984375, "learning_rate": 1.9721212002231876e-06, "loss": 23.8594, "step": 10890 }, { "epoch": 0.10309444249865109, "grad_norm": 595.22216796875, "learning_rate": 1.972114011027001e-06, "loss": 52.2422, "step": 10891 }, { "epoch": 0.10310390852036615, "grad_norm": 513.4304809570312, "learning_rate": 1.9721068209170905e-06, "loss": 50.4062, "step": 10892 }, { "epoch": 0.1031133745420812, "grad_norm": 193.1838836669922, "learning_rate": 1.9720996298934626e-06, "loss": 21.4766, "step": 10893 }, { "epoch": 0.10312284056379625, "grad_norm": 342.3955993652344, "learning_rate": 1.9720924379561246e-06, "loss": 17.8906, "step": 10894 }, { "epoch": 0.10313230658551131, "grad_norm": 565.2291870117188, "learning_rate": 1.9720852451050828e-06, "loss": 29.8281, "step": 10895 }, { "epoch": 0.10314177260722636, "grad_norm": 746.2498168945312, "learning_rate": 1.972078051340344e-06, "loss": 57.4219, "step": 10896 }, { "epoch": 0.10315123862894142, "grad_norm": 331.8751220703125, "learning_rate": 1.9720708566619155e-06, "loss": 21.3438, "step": 10897 }, { "epoch": 0.10316070465065647, "grad_norm": 192.48326110839844, "learning_rate": 1.972063661069803e-06, "loss": 15.1406, "step": 10898 }, { "epoch": 0.10317017067237153, "grad_norm": 289.55340576171875, "learning_rate": 1.9720564645640144e-06, "loss": 32.5469, "step": 10899 }, { "epoch": 0.10317963669408657, "grad_norm": 282.59759521484375, "learning_rate": 1.972049267144556e-06, "loss": 45.4375, "step": 10900 }, { "epoch": 0.10318910271580163, "grad_norm": 786.8599243164062, "learning_rate": 1.972042068811434e-06, "loss": 40.1094, "step": 10901 }, { "epoch": 0.10319856873751668, "grad_norm": 387.32440185546875, "learning_rate": 1.972034869564656e-06, "loss": 41.1172, "step": 10902 }, { "epoch": 0.10320803475923174, "grad_norm": 282.9812927246094, "learning_rate": 1.9720276694042285e-06, "loss": 17.0625, "step": 10903 }, { "epoch": 0.1032175007809468, "grad_norm": 440.28076171875, "learning_rate": 1.9720204683301583e-06, "loss": 32.0, "step": 10904 }, { "epoch": 0.10322696680266184, "grad_norm": 245.8771514892578, "learning_rate": 1.972013266342452e-06, "loss": 11.7422, "step": 10905 }, { "epoch": 0.1032364328243769, "grad_norm": 317.5594787597656, "learning_rate": 1.972006063441116e-06, "loss": 31.3281, "step": 10906 }, { "epoch": 0.10324589884609195, "grad_norm": 158.54681396484375, "learning_rate": 1.9719988596261582e-06, "loss": 16.2344, "step": 10907 }, { "epoch": 0.10325536486780701, "grad_norm": 189.99205017089844, "learning_rate": 1.9719916548975847e-06, "loss": 21.25, "step": 10908 }, { "epoch": 0.10326483088952206, "grad_norm": 437.78851318359375, "learning_rate": 1.9719844492554018e-06, "loss": 31.25, "step": 10909 }, { "epoch": 0.10327429691123712, "grad_norm": 365.56842041015625, "learning_rate": 1.971977242699617e-06, "loss": 31.1719, "step": 10910 }, { "epoch": 0.10328376293295218, "grad_norm": 663.56103515625, "learning_rate": 1.971970035230237e-06, "loss": 46.3594, "step": 10911 }, { "epoch": 0.10329322895466722, "grad_norm": 890.317138671875, "learning_rate": 1.971962826847268e-06, "loss": 40.9355, "step": 10912 }, { "epoch": 0.10330269497638228, "grad_norm": 3.5380702018737793, "learning_rate": 1.9719556175507173e-06, "loss": 0.9487, "step": 10913 }, { "epoch": 0.10331216099809733, "grad_norm": 331.2292175292969, "learning_rate": 1.9719484073405916e-06, "loss": 20.1953, "step": 10914 }, { "epoch": 0.10332162701981239, "grad_norm": 199.7902374267578, "learning_rate": 1.971941196216898e-06, "loss": 25.6406, "step": 10915 }, { "epoch": 0.10333109304152743, "grad_norm": 303.60113525390625, "learning_rate": 1.9719339841796427e-06, "loss": 11.2148, "step": 10916 }, { "epoch": 0.1033405590632425, "grad_norm": 502.7160339355469, "learning_rate": 1.9719267712288322e-06, "loss": 40.4219, "step": 10917 }, { "epoch": 0.10335002508495754, "grad_norm": 158.38868713378906, "learning_rate": 1.9719195573644743e-06, "loss": 16.2031, "step": 10918 }, { "epoch": 0.1033594911066726, "grad_norm": 486.94378662109375, "learning_rate": 1.9719123425865748e-06, "loss": 23.7422, "step": 10919 }, { "epoch": 0.10336895712838766, "grad_norm": 264.3819885253906, "learning_rate": 1.9719051268951413e-06, "loss": 34.5547, "step": 10920 }, { "epoch": 0.1033784231501027, "grad_norm": 315.072021484375, "learning_rate": 1.97189791029018e-06, "loss": 19.7344, "step": 10921 }, { "epoch": 0.10338788917181777, "grad_norm": 419.4021911621094, "learning_rate": 1.971890692771698e-06, "loss": 44.25, "step": 10922 }, { "epoch": 0.10339735519353281, "grad_norm": 523.1376342773438, "learning_rate": 1.9718834743397022e-06, "loss": 54.5625, "step": 10923 }, { "epoch": 0.10340682121524787, "grad_norm": 207.40463256835938, "learning_rate": 1.9718762549941987e-06, "loss": 25.7969, "step": 10924 }, { "epoch": 0.10341628723696292, "grad_norm": 468.511474609375, "learning_rate": 1.9718690347351948e-06, "loss": 42.8125, "step": 10925 }, { "epoch": 0.10342575325867798, "grad_norm": 2.9573404788970947, "learning_rate": 1.9718618135626975e-06, "loss": 0.957, "step": 10926 }, { "epoch": 0.10343521928039302, "grad_norm": 432.2933044433594, "learning_rate": 1.9718545914767134e-06, "loss": 40.4219, "step": 10927 }, { "epoch": 0.10344468530210808, "grad_norm": 1119.9229736328125, "learning_rate": 1.971847368477249e-06, "loss": 48.9961, "step": 10928 }, { "epoch": 0.10345415132382314, "grad_norm": 527.8988647460938, "learning_rate": 1.9718401445643114e-06, "loss": 40.1953, "step": 10929 }, { "epoch": 0.10346361734553819, "grad_norm": 316.88433837890625, "learning_rate": 1.971832919737907e-06, "loss": 47.5781, "step": 10930 }, { "epoch": 0.10347308336725325, "grad_norm": 287.9429931640625, "learning_rate": 1.971825693998043e-06, "loss": 29.375, "step": 10931 }, { "epoch": 0.1034825493889683, "grad_norm": 447.82244873046875, "learning_rate": 1.9718184673447264e-06, "loss": 28.7266, "step": 10932 }, { "epoch": 0.10349201541068336, "grad_norm": 256.868896484375, "learning_rate": 1.9718112397779635e-06, "loss": 18.3594, "step": 10933 }, { "epoch": 0.1035014814323984, "grad_norm": 525.4481201171875, "learning_rate": 1.971804011297761e-06, "loss": 44.7812, "step": 10934 }, { "epoch": 0.10351094745411346, "grad_norm": 324.1067810058594, "learning_rate": 1.971796781904126e-06, "loss": 43.4375, "step": 10935 }, { "epoch": 0.10352041347582851, "grad_norm": 199.42681884765625, "learning_rate": 1.9717895515970657e-06, "loss": 21.7422, "step": 10936 }, { "epoch": 0.10352987949754357, "grad_norm": 189.02706909179688, "learning_rate": 1.971782320376586e-06, "loss": 17.9922, "step": 10937 }, { "epoch": 0.10353934551925863, "grad_norm": 215.96365356445312, "learning_rate": 1.971775088242694e-06, "loss": 17.3906, "step": 10938 }, { "epoch": 0.10354881154097367, "grad_norm": 436.6141662597656, "learning_rate": 1.971767855195397e-06, "loss": 47.6641, "step": 10939 }, { "epoch": 0.10355827756268873, "grad_norm": 454.8377685546875, "learning_rate": 1.971760621234701e-06, "loss": 41.2266, "step": 10940 }, { "epoch": 0.10356774358440378, "grad_norm": 453.27093505859375, "learning_rate": 1.9717533863606137e-06, "loss": 12.2656, "step": 10941 }, { "epoch": 0.10357720960611884, "grad_norm": 279.400634765625, "learning_rate": 1.9717461505731415e-06, "loss": 10.459, "step": 10942 }, { "epoch": 0.10358667562783389, "grad_norm": 788.4459838867188, "learning_rate": 1.9717389138722908e-06, "loss": 43.1172, "step": 10943 }, { "epoch": 0.10359614164954895, "grad_norm": 360.5593566894531, "learning_rate": 1.9717316762580687e-06, "loss": 14.8945, "step": 10944 }, { "epoch": 0.10360560767126399, "grad_norm": 3.157219409942627, "learning_rate": 1.971724437730482e-06, "loss": 0.9614, "step": 10945 }, { "epoch": 0.10361507369297905, "grad_norm": 325.6812744140625, "learning_rate": 1.971717198289538e-06, "loss": 21.3594, "step": 10946 }, { "epoch": 0.10362453971469411, "grad_norm": 455.7628479003906, "learning_rate": 1.9717099579352427e-06, "loss": 37.3906, "step": 10947 }, { "epoch": 0.10363400573640916, "grad_norm": 185.26690673828125, "learning_rate": 1.9717027166676036e-06, "loss": 26.0781, "step": 10948 }, { "epoch": 0.10364347175812422, "grad_norm": 219.37246704101562, "learning_rate": 1.9716954744866268e-06, "loss": 20.6016, "step": 10949 }, { "epoch": 0.10365293777983926, "grad_norm": 672.3784790039062, "learning_rate": 1.9716882313923193e-06, "loss": 61.0469, "step": 10950 }, { "epoch": 0.10366240380155432, "grad_norm": 283.1695861816406, "learning_rate": 1.9716809873846883e-06, "loss": 26.3203, "step": 10951 }, { "epoch": 0.10367186982326937, "grad_norm": 567.36865234375, "learning_rate": 1.9716737424637407e-06, "loss": 52.7148, "step": 10952 }, { "epoch": 0.10368133584498443, "grad_norm": 503.0396423339844, "learning_rate": 1.971666496629483e-06, "loss": 23.1641, "step": 10953 }, { "epoch": 0.10369080186669949, "grad_norm": 385.7420349121094, "learning_rate": 1.971659249881921e-06, "loss": 24.9609, "step": 10954 }, { "epoch": 0.10370026788841454, "grad_norm": 295.7552795410156, "learning_rate": 1.9716520022210636e-06, "loss": 24.1172, "step": 10955 }, { "epoch": 0.1037097339101296, "grad_norm": 577.8611450195312, "learning_rate": 1.9716447536469163e-06, "loss": 33.3438, "step": 10956 }, { "epoch": 0.10371919993184464, "grad_norm": 260.78131103515625, "learning_rate": 1.9716375041594857e-06, "loss": 37.9531, "step": 10957 }, { "epoch": 0.1037286659535597, "grad_norm": 206.6540985107422, "learning_rate": 1.9716302537587796e-06, "loss": 25.7109, "step": 10958 }, { "epoch": 0.10373813197527475, "grad_norm": 478.281982421875, "learning_rate": 1.971623002444804e-06, "loss": 49.7969, "step": 10959 }, { "epoch": 0.10374759799698981, "grad_norm": 604.0131225585938, "learning_rate": 1.9716157502175656e-06, "loss": 22.1484, "step": 10960 }, { "epoch": 0.10375706401870485, "grad_norm": 470.69366455078125, "learning_rate": 1.971608497077072e-06, "loss": 50.8438, "step": 10961 }, { "epoch": 0.10376653004041991, "grad_norm": 309.73309326171875, "learning_rate": 1.97160124302333e-06, "loss": 17.6406, "step": 10962 }, { "epoch": 0.10377599606213497, "grad_norm": 378.7859802246094, "learning_rate": 1.9715939880563456e-06, "loss": 20.9297, "step": 10963 }, { "epoch": 0.10378546208385002, "grad_norm": 335.626708984375, "learning_rate": 1.971586732176126e-06, "loss": 27.0234, "step": 10964 }, { "epoch": 0.10379492810556508, "grad_norm": 448.3037109375, "learning_rate": 1.971579475382678e-06, "loss": 26.9766, "step": 10965 }, { "epoch": 0.10380439412728013, "grad_norm": 420.7528076171875, "learning_rate": 1.9715722176760088e-06, "loss": 25.3984, "step": 10966 }, { "epoch": 0.10381386014899519, "grad_norm": 350.0543212890625, "learning_rate": 1.9715649590561248e-06, "loss": 25.5391, "step": 10967 }, { "epoch": 0.10382332617071023, "grad_norm": 616.3115234375, "learning_rate": 1.971557699523033e-06, "loss": 34.5508, "step": 10968 }, { "epoch": 0.10383279219242529, "grad_norm": 681.7471923828125, "learning_rate": 1.9715504390767405e-06, "loss": 44.6016, "step": 10969 }, { "epoch": 0.10384225821414034, "grad_norm": 916.4519653320312, "learning_rate": 1.9715431777172534e-06, "loss": 75.75, "step": 10970 }, { "epoch": 0.1038517242358554, "grad_norm": 383.5513916015625, "learning_rate": 1.971535915444579e-06, "loss": 29.5781, "step": 10971 }, { "epoch": 0.10386119025757046, "grad_norm": 676.7057495117188, "learning_rate": 1.9715286522587235e-06, "loss": 53.2109, "step": 10972 }, { "epoch": 0.1038706562792855, "grad_norm": 465.93243408203125, "learning_rate": 1.971521388159695e-06, "loss": 46.3594, "step": 10973 }, { "epoch": 0.10388012230100056, "grad_norm": 385.6876525878906, "learning_rate": 1.971514123147499e-06, "loss": 29.5781, "step": 10974 }, { "epoch": 0.10388958832271561, "grad_norm": 431.7419738769531, "learning_rate": 1.9715068572221436e-06, "loss": 18.6328, "step": 10975 }, { "epoch": 0.10389905434443067, "grad_norm": 292.94146728515625, "learning_rate": 1.9714995903836344e-06, "loss": 19.5664, "step": 10976 }, { "epoch": 0.10390852036614572, "grad_norm": 211.20188903808594, "learning_rate": 1.9714923226319793e-06, "loss": 23.9062, "step": 10977 }, { "epoch": 0.10391798638786078, "grad_norm": 332.8619079589844, "learning_rate": 1.9714850539671846e-06, "loss": 23.8359, "step": 10978 }, { "epoch": 0.10392745240957582, "grad_norm": 2.751103162765503, "learning_rate": 1.9714777843892565e-06, "loss": 0.8911, "step": 10979 }, { "epoch": 0.10393691843129088, "grad_norm": 376.7247314453125, "learning_rate": 1.971470513898203e-06, "loss": 18.5078, "step": 10980 }, { "epoch": 0.10394638445300594, "grad_norm": 289.2521667480469, "learning_rate": 1.9714632424940302e-06, "loss": 20.625, "step": 10981 }, { "epoch": 0.10395585047472099, "grad_norm": 405.21942138671875, "learning_rate": 1.971455970176745e-06, "loss": 25.1562, "step": 10982 }, { "epoch": 0.10396531649643605, "grad_norm": 430.47265625, "learning_rate": 1.9714486969463547e-06, "loss": 33.0312, "step": 10983 }, { "epoch": 0.1039747825181511, "grad_norm": 398.8013000488281, "learning_rate": 1.971441422802866e-06, "loss": 30.1719, "step": 10984 }, { "epoch": 0.10398424853986615, "grad_norm": 2.8812148571014404, "learning_rate": 1.9714341477462853e-06, "loss": 0.9639, "step": 10985 }, { "epoch": 0.1039937145615812, "grad_norm": 650.38330078125, "learning_rate": 1.9714268717766196e-06, "loss": 37.0625, "step": 10986 }, { "epoch": 0.10400318058329626, "grad_norm": 258.11212158203125, "learning_rate": 1.971419594893876e-06, "loss": 24.7344, "step": 10987 }, { "epoch": 0.1040126466050113, "grad_norm": 222.7620086669922, "learning_rate": 1.971412317098061e-06, "loss": 23.5703, "step": 10988 }, { "epoch": 0.10402211262672637, "grad_norm": 370.5278015136719, "learning_rate": 1.9714050383891817e-06, "loss": 61.6719, "step": 10989 }, { "epoch": 0.10403157864844143, "grad_norm": 302.0345458984375, "learning_rate": 1.971397758767245e-06, "loss": 15.5078, "step": 10990 }, { "epoch": 0.10404104467015647, "grad_norm": 833.75244140625, "learning_rate": 1.971390478232257e-06, "loss": 40.1875, "step": 10991 }, { "epoch": 0.10405051069187153, "grad_norm": 3.6264429092407227, "learning_rate": 1.971383196784226e-06, "loss": 0.8867, "step": 10992 }, { "epoch": 0.10405997671358658, "grad_norm": 435.81390380859375, "learning_rate": 1.9713759144231577e-06, "loss": 48.75, "step": 10993 }, { "epoch": 0.10406944273530164, "grad_norm": 535.5218505859375, "learning_rate": 1.971368631149059e-06, "loss": 30.2188, "step": 10994 }, { "epoch": 0.10407890875701668, "grad_norm": 364.15069580078125, "learning_rate": 1.971361346961937e-06, "loss": 46.8438, "step": 10995 }, { "epoch": 0.10408837477873174, "grad_norm": 570.130615234375, "learning_rate": 1.9713540618617984e-06, "loss": 50.8906, "step": 10996 }, { "epoch": 0.1040978408004468, "grad_norm": 223.6854705810547, "learning_rate": 1.971346775848651e-06, "loss": 14.7266, "step": 10997 }, { "epoch": 0.10410730682216185, "grad_norm": 183.22982788085938, "learning_rate": 1.9713394889225e-06, "loss": 20.5156, "step": 10998 }, { "epoch": 0.10411677284387691, "grad_norm": 430.8686828613281, "learning_rate": 1.9713322010833534e-06, "loss": 20.7891, "step": 10999 }, { "epoch": 0.10412623886559196, "grad_norm": 188.73277282714844, "learning_rate": 1.9713249123312177e-06, "loss": 24.25, "step": 11000 }, { "epoch": 0.10413570488730702, "grad_norm": 257.7251281738281, "learning_rate": 1.9713176226660994e-06, "loss": 13.7031, "step": 11001 }, { "epoch": 0.10414517090902206, "grad_norm": 1169.9783935546875, "learning_rate": 1.971310332088006e-06, "loss": 47.7109, "step": 11002 }, { "epoch": 0.10415463693073712, "grad_norm": 292.08551025390625, "learning_rate": 1.971303040596944e-06, "loss": 31.2578, "step": 11003 }, { "epoch": 0.10416410295245217, "grad_norm": 563.0948486328125, "learning_rate": 1.9712957481929206e-06, "loss": 44.1562, "step": 11004 }, { "epoch": 0.10417356897416723, "grad_norm": 280.7180480957031, "learning_rate": 1.971288454875942e-06, "loss": 20.1992, "step": 11005 }, { "epoch": 0.10418303499588229, "grad_norm": 674.0655517578125, "learning_rate": 1.971281160646016e-06, "loss": 35.1875, "step": 11006 }, { "epoch": 0.10419250101759733, "grad_norm": 353.2453918457031, "learning_rate": 1.9712738655031486e-06, "loss": 29.0312, "step": 11007 }, { "epoch": 0.1042019670393124, "grad_norm": 326.4766540527344, "learning_rate": 1.9712665694473467e-06, "loss": 21.2109, "step": 11008 }, { "epoch": 0.10421143306102744, "grad_norm": 250.00648498535156, "learning_rate": 1.971259272478618e-06, "loss": 21.0625, "step": 11009 }, { "epoch": 0.1042208990827425, "grad_norm": 685.5781860351562, "learning_rate": 1.9712519745969682e-06, "loss": 47.5938, "step": 11010 }, { "epoch": 0.10423036510445755, "grad_norm": 1284.687255859375, "learning_rate": 1.9712446758024047e-06, "loss": 55.9219, "step": 11011 }, { "epoch": 0.1042398311261726, "grad_norm": 230.6253204345703, "learning_rate": 1.9712373760949345e-06, "loss": 16.832, "step": 11012 }, { "epoch": 0.10424929714788765, "grad_norm": 257.70953369140625, "learning_rate": 1.9712300754745644e-06, "loss": 17.6094, "step": 11013 }, { "epoch": 0.10425876316960271, "grad_norm": 314.79168701171875, "learning_rate": 1.971222773941301e-06, "loss": 16.5938, "step": 11014 }, { "epoch": 0.10426822919131777, "grad_norm": 701.1805419921875, "learning_rate": 1.971215471495152e-06, "loss": 43.0469, "step": 11015 }, { "epoch": 0.10427769521303282, "grad_norm": 627.4496459960938, "learning_rate": 1.971208168136123e-06, "loss": 23.0, "step": 11016 }, { "epoch": 0.10428716123474788, "grad_norm": 243.2913360595703, "learning_rate": 1.971200863864222e-06, "loss": 18.0391, "step": 11017 }, { "epoch": 0.10429662725646292, "grad_norm": 341.4163818359375, "learning_rate": 1.971193558679455e-06, "loss": 26.0859, "step": 11018 }, { "epoch": 0.10430609327817798, "grad_norm": 225.21926879882812, "learning_rate": 1.9711862525818294e-06, "loss": 17.9922, "step": 11019 }, { "epoch": 0.10431555929989303, "grad_norm": 412.05401611328125, "learning_rate": 1.9711789455713517e-06, "loss": 42.8438, "step": 11020 }, { "epoch": 0.10432502532160809, "grad_norm": 511.51654052734375, "learning_rate": 1.9711716376480295e-06, "loss": 54.9219, "step": 11021 }, { "epoch": 0.10433449134332314, "grad_norm": 362.22491455078125, "learning_rate": 1.971164328811869e-06, "loss": 26.0469, "step": 11022 }, { "epoch": 0.1043439573650382, "grad_norm": 926.6913452148438, "learning_rate": 1.971157019062877e-06, "loss": 47.8125, "step": 11023 }, { "epoch": 0.10435342338675326, "grad_norm": 258.46148681640625, "learning_rate": 1.9711497084010603e-06, "loss": 16.3281, "step": 11024 }, { "epoch": 0.1043628894084683, "grad_norm": 2.614239454269409, "learning_rate": 1.9711423968264265e-06, "loss": 0.853, "step": 11025 }, { "epoch": 0.10437235543018336, "grad_norm": 305.5497131347656, "learning_rate": 1.9711350843389816e-06, "loss": 24.1641, "step": 11026 }, { "epoch": 0.10438182145189841, "grad_norm": 3.1954874992370605, "learning_rate": 1.9711277709387334e-06, "loss": 0.9634, "step": 11027 }, { "epoch": 0.10439128747361347, "grad_norm": 182.5242156982422, "learning_rate": 1.9711204566256877e-06, "loss": 23.2188, "step": 11028 }, { "epoch": 0.10440075349532851, "grad_norm": 531.1272583007812, "learning_rate": 1.9711131413998527e-06, "loss": 53.4844, "step": 11029 }, { "epoch": 0.10441021951704357, "grad_norm": 679.1480102539062, "learning_rate": 1.9711058252612338e-06, "loss": 20.7031, "step": 11030 }, { "epoch": 0.10441968553875862, "grad_norm": 1006.1559448242188, "learning_rate": 1.971098508209839e-06, "loss": 56.3203, "step": 11031 }, { "epoch": 0.10442915156047368, "grad_norm": 542.111083984375, "learning_rate": 1.971091190245675e-06, "loss": 45.4531, "step": 11032 }, { "epoch": 0.10443861758218874, "grad_norm": 993.37109375, "learning_rate": 1.971083871368748e-06, "loss": 27.9219, "step": 11033 }, { "epoch": 0.10444808360390379, "grad_norm": 469.634521484375, "learning_rate": 1.971076551579065e-06, "loss": 22.2188, "step": 11034 }, { "epoch": 0.10445754962561885, "grad_norm": 669.7723999023438, "learning_rate": 1.971069230876634e-06, "loss": 58.0, "step": 11035 }, { "epoch": 0.10446701564733389, "grad_norm": 547.9564819335938, "learning_rate": 1.9710619092614603e-06, "loss": 47.3906, "step": 11036 }, { "epoch": 0.10447648166904895, "grad_norm": 764.3327026367188, "learning_rate": 1.9710545867335524e-06, "loss": 38.2969, "step": 11037 }, { "epoch": 0.104485947690764, "grad_norm": 255.91648864746094, "learning_rate": 1.9710472632929157e-06, "loss": 21.0078, "step": 11038 }, { "epoch": 0.10449541371247906, "grad_norm": 779.333984375, "learning_rate": 1.9710399389395583e-06, "loss": 35.5234, "step": 11039 }, { "epoch": 0.10450487973419412, "grad_norm": 282.8353576660156, "learning_rate": 1.9710326136734864e-06, "loss": 19.7969, "step": 11040 }, { "epoch": 0.10451434575590916, "grad_norm": 993.9248657226562, "learning_rate": 1.971025287494707e-06, "loss": 81.3359, "step": 11041 }, { "epoch": 0.10452381177762422, "grad_norm": 162.68218994140625, "learning_rate": 1.9710179604032264e-06, "loss": 22.5312, "step": 11042 }, { "epoch": 0.10453327779933927, "grad_norm": 224.70155334472656, "learning_rate": 1.9710106323990528e-06, "loss": 19.8125, "step": 11043 }, { "epoch": 0.10454274382105433, "grad_norm": 419.9261779785156, "learning_rate": 1.9710033034821922e-06, "loss": 42.0, "step": 11044 }, { "epoch": 0.10455220984276938, "grad_norm": 907.1172485351562, "learning_rate": 1.9709959736526515e-06, "loss": 47.0, "step": 11045 }, { "epoch": 0.10456167586448444, "grad_norm": 258.5594787597656, "learning_rate": 1.970988642910438e-06, "loss": 25.2344, "step": 11046 }, { "epoch": 0.10457114188619948, "grad_norm": 670.270263671875, "learning_rate": 1.970981311255558e-06, "loss": 46.7969, "step": 11047 }, { "epoch": 0.10458060790791454, "grad_norm": 538.0924072265625, "learning_rate": 1.9709739786880187e-06, "loss": 24.6719, "step": 11048 }, { "epoch": 0.1045900739296296, "grad_norm": 343.294189453125, "learning_rate": 1.9709666452078275e-06, "loss": 26.1797, "step": 11049 }, { "epoch": 0.10459953995134465, "grad_norm": 391.7842712402344, "learning_rate": 1.970959310814991e-06, "loss": 39.3906, "step": 11050 }, { "epoch": 0.10460900597305971, "grad_norm": 381.0661926269531, "learning_rate": 1.970951975509515e-06, "loss": 24.3047, "step": 11051 }, { "epoch": 0.10461847199477475, "grad_norm": 395.5987548828125, "learning_rate": 1.970944639291408e-06, "loss": 26.6016, "step": 11052 }, { "epoch": 0.10462793801648981, "grad_norm": 209.2889404296875, "learning_rate": 1.970937302160676e-06, "loss": 19.5391, "step": 11053 }, { "epoch": 0.10463740403820486, "grad_norm": 451.6080017089844, "learning_rate": 1.970929964117326e-06, "loss": 17.3516, "step": 11054 }, { "epoch": 0.10464687005991992, "grad_norm": 364.6752624511719, "learning_rate": 1.970922625161365e-06, "loss": 34.4062, "step": 11055 }, { "epoch": 0.10465633608163497, "grad_norm": 295.34613037109375, "learning_rate": 1.9709152852928e-06, "loss": 37.2969, "step": 11056 }, { "epoch": 0.10466580210335003, "grad_norm": 384.87994384765625, "learning_rate": 1.9709079445116376e-06, "loss": 23.3828, "step": 11057 }, { "epoch": 0.10467526812506509, "grad_norm": 252.2899169921875, "learning_rate": 1.970900602817885e-06, "loss": 19.6719, "step": 11058 }, { "epoch": 0.10468473414678013, "grad_norm": 3.295797824859619, "learning_rate": 1.970893260211549e-06, "loss": 0.9912, "step": 11059 }, { "epoch": 0.10469420016849519, "grad_norm": 325.1255798339844, "learning_rate": 1.970885916692637e-06, "loss": 22.0117, "step": 11060 }, { "epoch": 0.10470366619021024, "grad_norm": 2.886643409729004, "learning_rate": 1.970878572261155e-06, "loss": 0.8691, "step": 11061 }, { "epoch": 0.1047131322119253, "grad_norm": 296.6607971191406, "learning_rate": 1.97087122691711e-06, "loss": 17.3516, "step": 11062 }, { "epoch": 0.10472259823364034, "grad_norm": 598.5655517578125, "learning_rate": 1.97086388066051e-06, "loss": 49.6562, "step": 11063 }, { "epoch": 0.1047320642553554, "grad_norm": 3.0879268646240234, "learning_rate": 1.9708565334913602e-06, "loss": 1.0122, "step": 11064 }, { "epoch": 0.10474153027707045, "grad_norm": 394.7765808105469, "learning_rate": 1.9708491854096686e-06, "loss": 26.4375, "step": 11065 }, { "epoch": 0.10475099629878551, "grad_norm": 188.1182861328125, "learning_rate": 1.970841836415442e-06, "loss": 23.1602, "step": 11066 }, { "epoch": 0.10476046232050057, "grad_norm": 168.74896240234375, "learning_rate": 1.970834486508688e-06, "loss": 23.4062, "step": 11067 }, { "epoch": 0.10476992834221562, "grad_norm": 3.2329797744750977, "learning_rate": 1.970827135689412e-06, "loss": 1.0718, "step": 11068 }, { "epoch": 0.10477939436393068, "grad_norm": 336.25469970703125, "learning_rate": 1.9708197839576217e-06, "loss": 17.8555, "step": 11069 }, { "epoch": 0.10478886038564572, "grad_norm": 270.8706359863281, "learning_rate": 1.970812431313324e-06, "loss": 19.7734, "step": 11070 }, { "epoch": 0.10479832640736078, "grad_norm": 250.5419158935547, "learning_rate": 1.9708050777565257e-06, "loss": 17.2578, "step": 11071 }, { "epoch": 0.10480779242907583, "grad_norm": 3.028418779373169, "learning_rate": 1.9707977232872337e-06, "loss": 0.8308, "step": 11072 }, { "epoch": 0.10481725845079089, "grad_norm": 271.75396728515625, "learning_rate": 1.9707903679054555e-06, "loss": 23.2422, "step": 11073 }, { "epoch": 0.10482672447250593, "grad_norm": 277.798828125, "learning_rate": 1.970783011611197e-06, "loss": 20.4062, "step": 11074 }, { "epoch": 0.104836190494221, "grad_norm": 1366.8245849609375, "learning_rate": 1.9707756544044657e-06, "loss": 45.7344, "step": 11075 }, { "epoch": 0.10484565651593605, "grad_norm": 495.7766418457031, "learning_rate": 1.9707682962852684e-06, "loss": 23.8281, "step": 11076 }, { "epoch": 0.1048551225376511, "grad_norm": 440.3424987792969, "learning_rate": 1.970760937253612e-06, "loss": 18.5938, "step": 11077 }, { "epoch": 0.10486458855936616, "grad_norm": 4.059333324432373, "learning_rate": 1.970753577309504e-06, "loss": 0.9746, "step": 11078 }, { "epoch": 0.1048740545810812, "grad_norm": 295.55401611328125, "learning_rate": 1.9707462164529504e-06, "loss": 20.625, "step": 11079 }, { "epoch": 0.10488352060279627, "grad_norm": 155.9383544921875, "learning_rate": 1.9707388546839585e-06, "loss": 23.2109, "step": 11080 }, { "epoch": 0.10489298662451131, "grad_norm": 266.9360656738281, "learning_rate": 1.970731492002535e-06, "loss": 21.0312, "step": 11081 }, { "epoch": 0.10490245264622637, "grad_norm": 323.8378601074219, "learning_rate": 1.9707241284086878e-06, "loss": 42.8125, "step": 11082 }, { "epoch": 0.10491191866794143, "grad_norm": 583.0111083984375, "learning_rate": 1.9707167639024225e-06, "loss": 67.625, "step": 11083 }, { "epoch": 0.10492138468965648, "grad_norm": 502.0019226074219, "learning_rate": 1.9707093984837465e-06, "loss": 19.8945, "step": 11084 }, { "epoch": 0.10493085071137154, "grad_norm": 577.9376831054688, "learning_rate": 1.9707020321526673e-06, "loss": 27.4609, "step": 11085 }, { "epoch": 0.10494031673308658, "grad_norm": 262.5251770019531, "learning_rate": 1.970694664909191e-06, "loss": 19.2266, "step": 11086 }, { "epoch": 0.10494978275480164, "grad_norm": 356.4413757324219, "learning_rate": 1.970687296753325e-06, "loss": 19.625, "step": 11087 }, { "epoch": 0.10495924877651669, "grad_norm": 1665.1514892578125, "learning_rate": 1.9706799276850758e-06, "loss": 42.6758, "step": 11088 }, { "epoch": 0.10496871479823175, "grad_norm": 318.0236511230469, "learning_rate": 1.9706725577044505e-06, "loss": 22.6836, "step": 11089 }, { "epoch": 0.1049781808199468, "grad_norm": 285.42578125, "learning_rate": 1.970665186811457e-06, "loss": 10.8867, "step": 11090 }, { "epoch": 0.10498764684166186, "grad_norm": 408.74188232421875, "learning_rate": 1.9706578150061003e-06, "loss": 27.2656, "step": 11091 }, { "epoch": 0.10499711286337692, "grad_norm": 353.296630859375, "learning_rate": 1.9706504422883894e-06, "loss": 42.3828, "step": 11092 }, { "epoch": 0.10500657888509196, "grad_norm": 365.7181396484375, "learning_rate": 1.9706430686583294e-06, "loss": 20.2422, "step": 11093 }, { "epoch": 0.10501604490680702, "grad_norm": 297.9637451171875, "learning_rate": 1.9706356941159282e-06, "loss": 18.0391, "step": 11094 }, { "epoch": 0.10502551092852207, "grad_norm": 435.6208801269531, "learning_rate": 1.970628318661193e-06, "loss": 21.1406, "step": 11095 }, { "epoch": 0.10503497695023713, "grad_norm": 917.4715576171875, "learning_rate": 1.97062094229413e-06, "loss": 52.4531, "step": 11096 }, { "epoch": 0.10504444297195217, "grad_norm": 528.5549926757812, "learning_rate": 1.9706135650147466e-06, "loss": 42.6562, "step": 11097 }, { "epoch": 0.10505390899366723, "grad_norm": 312.6387939453125, "learning_rate": 1.9706061868230497e-06, "loss": 18.8906, "step": 11098 }, { "epoch": 0.10506337501538228, "grad_norm": 973.0550537109375, "learning_rate": 1.970598807719046e-06, "loss": 48.7305, "step": 11099 }, { "epoch": 0.10507284103709734, "grad_norm": 368.74652099609375, "learning_rate": 1.9705914277027427e-06, "loss": 28.9062, "step": 11100 }, { "epoch": 0.1050823070588124, "grad_norm": 225.9493408203125, "learning_rate": 1.9705840467741464e-06, "loss": 27.4297, "step": 11101 }, { "epoch": 0.10509177308052745, "grad_norm": 615.1433715820312, "learning_rate": 1.970576664933264e-06, "loss": 41.6172, "step": 11102 }, { "epoch": 0.1051012391022425, "grad_norm": 370.9982604980469, "learning_rate": 1.9705692821801033e-06, "loss": 16.2109, "step": 11103 }, { "epoch": 0.10511070512395755, "grad_norm": 424.4244384765625, "learning_rate": 1.9705618985146702e-06, "loss": 32.7891, "step": 11104 }, { "epoch": 0.10512017114567261, "grad_norm": 366.71148681640625, "learning_rate": 1.970554513936972e-06, "loss": 48.8281, "step": 11105 }, { "epoch": 0.10512963716738766, "grad_norm": 194.59536743164062, "learning_rate": 1.970547128447016e-06, "loss": 19.4922, "step": 11106 }, { "epoch": 0.10513910318910272, "grad_norm": 244.11044311523438, "learning_rate": 1.970539742044809e-06, "loss": 15.293, "step": 11107 }, { "epoch": 0.10514856921081776, "grad_norm": 478.781982421875, "learning_rate": 1.970532354730357e-06, "loss": 51.0703, "step": 11108 }, { "epoch": 0.10515803523253282, "grad_norm": 458.2375183105469, "learning_rate": 1.9705249665036684e-06, "loss": 25.7773, "step": 11109 }, { "epoch": 0.10516750125424788, "grad_norm": 217.58035278320312, "learning_rate": 1.970517577364749e-06, "loss": 19.3125, "step": 11110 }, { "epoch": 0.10517696727596293, "grad_norm": 191.0109100341797, "learning_rate": 1.9705101873136066e-06, "loss": 18.7266, "step": 11111 }, { "epoch": 0.10518643329767799, "grad_norm": 543.9049072265625, "learning_rate": 1.9705027963502477e-06, "loss": 49.4844, "step": 11112 }, { "epoch": 0.10519589931939304, "grad_norm": 672.0602416992188, "learning_rate": 1.9704954044746792e-06, "loss": 41.7578, "step": 11113 }, { "epoch": 0.1052053653411081, "grad_norm": 179.30120849609375, "learning_rate": 1.970488011686908e-06, "loss": 22.0781, "step": 11114 }, { "epoch": 0.10521483136282314, "grad_norm": 180.05706787109375, "learning_rate": 1.9704806179869415e-06, "loss": 26.2734, "step": 11115 }, { "epoch": 0.1052242973845382, "grad_norm": 256.25732421875, "learning_rate": 1.9704732233747862e-06, "loss": 22.5547, "step": 11116 }, { "epoch": 0.10523376340625325, "grad_norm": 660.0482788085938, "learning_rate": 1.9704658278504493e-06, "loss": 41.0547, "step": 11117 }, { "epoch": 0.10524322942796831, "grad_norm": 355.1515808105469, "learning_rate": 1.9704584314139375e-06, "loss": 33.0781, "step": 11118 }, { "epoch": 0.10525269544968337, "grad_norm": 305.4573669433594, "learning_rate": 1.9704510340652576e-06, "loss": 19.0859, "step": 11119 }, { "epoch": 0.10526216147139841, "grad_norm": 145.98382568359375, "learning_rate": 1.9704436358044177e-06, "loss": 20.1797, "step": 11120 }, { "epoch": 0.10527162749311347, "grad_norm": 224.48069763183594, "learning_rate": 1.970436236631423e-06, "loss": 18.9062, "step": 11121 }, { "epoch": 0.10528109351482852, "grad_norm": 622.5853271484375, "learning_rate": 1.9704288365462823e-06, "loss": 31.2969, "step": 11122 }, { "epoch": 0.10529055953654358, "grad_norm": 156.7534942626953, "learning_rate": 1.970421435549001e-06, "loss": 17.4922, "step": 11123 }, { "epoch": 0.10530002555825863, "grad_norm": 374.2091369628906, "learning_rate": 1.9704140336395867e-06, "loss": 25.2656, "step": 11124 }, { "epoch": 0.10530949157997369, "grad_norm": 3.594177484512329, "learning_rate": 1.9704066308180466e-06, "loss": 0.9155, "step": 11125 }, { "epoch": 0.10531895760168875, "grad_norm": 313.22039794921875, "learning_rate": 1.9703992270843874e-06, "loss": 44.3906, "step": 11126 }, { "epoch": 0.10532842362340379, "grad_norm": 303.8727722167969, "learning_rate": 1.970391822438616e-06, "loss": 17.1094, "step": 11127 }, { "epoch": 0.10533788964511885, "grad_norm": 359.5357971191406, "learning_rate": 1.9703844168807394e-06, "loss": 32.3906, "step": 11128 }, { "epoch": 0.1053473556668339, "grad_norm": 242.88963317871094, "learning_rate": 1.9703770104107644e-06, "loss": 22.5938, "step": 11129 }, { "epoch": 0.10535682168854896, "grad_norm": 2.9905214309692383, "learning_rate": 1.970369603028698e-06, "loss": 0.74, "step": 11130 }, { "epoch": 0.105366287710264, "grad_norm": 426.96624755859375, "learning_rate": 1.9703621947345475e-06, "loss": 18.5, "step": 11131 }, { "epoch": 0.10537575373197906, "grad_norm": 630.611083984375, "learning_rate": 1.97035478552832e-06, "loss": 40.3594, "step": 11132 }, { "epoch": 0.10538521975369411, "grad_norm": 499.47808837890625, "learning_rate": 1.970347375410022e-06, "loss": 23.2188, "step": 11133 }, { "epoch": 0.10539468577540917, "grad_norm": 313.9397888183594, "learning_rate": 1.97033996437966e-06, "loss": 39.0156, "step": 11134 }, { "epoch": 0.10540415179712423, "grad_norm": 375.9655456542969, "learning_rate": 1.970332552437242e-06, "loss": 47.9219, "step": 11135 }, { "epoch": 0.10541361781883928, "grad_norm": 700.2702026367188, "learning_rate": 1.970325139582775e-06, "loss": 43.0547, "step": 11136 }, { "epoch": 0.10542308384055434, "grad_norm": 536.7632446289062, "learning_rate": 1.9703177258162647e-06, "loss": 21.9961, "step": 11137 }, { "epoch": 0.10543254986226938, "grad_norm": 347.0466613769531, "learning_rate": 1.9703103111377193e-06, "loss": 34.4023, "step": 11138 }, { "epoch": 0.10544201588398444, "grad_norm": 532.3734130859375, "learning_rate": 1.970302895547145e-06, "loss": 42.5078, "step": 11139 }, { "epoch": 0.10545148190569949, "grad_norm": 588.6854858398438, "learning_rate": 1.9702954790445495e-06, "loss": 18.2266, "step": 11140 }, { "epoch": 0.10546094792741455, "grad_norm": 375.93255615234375, "learning_rate": 1.9702880616299395e-06, "loss": 42.7188, "step": 11141 }, { "epoch": 0.1054704139491296, "grad_norm": 146.7203369140625, "learning_rate": 1.9702806433033214e-06, "loss": 14.1367, "step": 11142 }, { "epoch": 0.10547987997084465, "grad_norm": 283.5679626464844, "learning_rate": 1.9702732240647026e-06, "loss": 12.8203, "step": 11143 }, { "epoch": 0.10548934599255971, "grad_norm": 364.35205078125, "learning_rate": 1.97026580391409e-06, "loss": 22.2969, "step": 11144 }, { "epoch": 0.10549881201427476, "grad_norm": 222.1013641357422, "learning_rate": 1.9702583828514913e-06, "loss": 10.9062, "step": 11145 }, { "epoch": 0.10550827803598982, "grad_norm": 1103.510986328125, "learning_rate": 1.970250960876912e-06, "loss": 61.1172, "step": 11146 }, { "epoch": 0.10551774405770487, "grad_norm": 389.6826171875, "learning_rate": 1.9702435379903607e-06, "loss": 39.6797, "step": 11147 }, { "epoch": 0.10552721007941993, "grad_norm": 385.320556640625, "learning_rate": 1.9702361141918434e-06, "loss": 52.1328, "step": 11148 }, { "epoch": 0.10553667610113497, "grad_norm": 2.58056902885437, "learning_rate": 1.9702286894813667e-06, "loss": 0.8652, "step": 11149 }, { "epoch": 0.10554614212285003, "grad_norm": 269.1755676269531, "learning_rate": 1.9702212638589388e-06, "loss": 18.1406, "step": 11150 }, { "epoch": 0.10555560814456508, "grad_norm": 428.0118408203125, "learning_rate": 1.970213837324566e-06, "loss": 53.3125, "step": 11151 }, { "epoch": 0.10556507416628014, "grad_norm": 318.7144775390625, "learning_rate": 1.970206409878255e-06, "loss": 16.6016, "step": 11152 }, { "epoch": 0.1055745401879952, "grad_norm": 302.85284423828125, "learning_rate": 1.9701989815200132e-06, "loss": 20.5469, "step": 11153 }, { "epoch": 0.10558400620971024, "grad_norm": 537.037841796875, "learning_rate": 1.9701915522498473e-06, "loss": 32.4922, "step": 11154 }, { "epoch": 0.1055934722314253, "grad_norm": 172.33468627929688, "learning_rate": 1.9701841220677648e-06, "loss": 25.4766, "step": 11155 }, { "epoch": 0.10560293825314035, "grad_norm": 381.8321838378906, "learning_rate": 1.970176690973772e-06, "loss": 33.8438, "step": 11156 }, { "epoch": 0.10561240427485541, "grad_norm": 278.1851501464844, "learning_rate": 1.9701692589678765e-06, "loss": 19.4844, "step": 11157 }, { "epoch": 0.10562187029657046, "grad_norm": 2.884880781173706, "learning_rate": 1.9701618260500846e-06, "loss": 1.0259, "step": 11158 }, { "epoch": 0.10563133631828552, "grad_norm": 3.8089451789855957, "learning_rate": 1.9701543922204043e-06, "loss": 0.8555, "step": 11159 }, { "epoch": 0.10564080234000056, "grad_norm": 228.39881896972656, "learning_rate": 1.9701469574788417e-06, "loss": 24.3359, "step": 11160 }, { "epoch": 0.10565026836171562, "grad_norm": 234.229248046875, "learning_rate": 1.970139521825404e-06, "loss": 19.3672, "step": 11161 }, { "epoch": 0.10565973438343068, "grad_norm": 290.1262512207031, "learning_rate": 1.9701320852600986e-06, "loss": 26.2266, "step": 11162 }, { "epoch": 0.10566920040514573, "grad_norm": 246.56866455078125, "learning_rate": 1.9701246477829313e-06, "loss": 19.5859, "step": 11163 }, { "epoch": 0.10567866642686079, "grad_norm": 445.8070068359375, "learning_rate": 1.9701172093939106e-06, "loss": 45.6719, "step": 11164 }, { "epoch": 0.10568813244857583, "grad_norm": 183.25245666503906, "learning_rate": 1.970109770093043e-06, "loss": 17.1562, "step": 11165 }, { "epoch": 0.1056975984702909, "grad_norm": 1346.407470703125, "learning_rate": 1.970102329880335e-06, "loss": 50.5938, "step": 11166 }, { "epoch": 0.10570706449200594, "grad_norm": 473.24560546875, "learning_rate": 1.970094888755794e-06, "loss": 24.4766, "step": 11167 }, { "epoch": 0.105716530513721, "grad_norm": 962.2197875976562, "learning_rate": 1.970087446719427e-06, "loss": 32.668, "step": 11168 }, { "epoch": 0.10572599653543605, "grad_norm": 336.3603515625, "learning_rate": 1.9700800037712404e-06, "loss": 27.9297, "step": 11169 }, { "epoch": 0.1057354625571511, "grad_norm": 222.56292724609375, "learning_rate": 1.9700725599112424e-06, "loss": 17.5938, "step": 11170 }, { "epoch": 0.10574492857886617, "grad_norm": 848.1484375, "learning_rate": 1.9700651151394387e-06, "loss": 25.3281, "step": 11171 }, { "epoch": 0.10575439460058121, "grad_norm": 3.0120837688446045, "learning_rate": 1.9700576694558376e-06, "loss": 0.9092, "step": 11172 }, { "epoch": 0.10576386062229627, "grad_norm": 759.2112426757812, "learning_rate": 1.970050222860445e-06, "loss": 40.2812, "step": 11173 }, { "epoch": 0.10577332664401132, "grad_norm": 250.86260986328125, "learning_rate": 1.9700427753532683e-06, "loss": 23.6406, "step": 11174 }, { "epoch": 0.10578279266572638, "grad_norm": 3.4811244010925293, "learning_rate": 1.9700353269343144e-06, "loss": 0.9614, "step": 11175 }, { "epoch": 0.10579225868744142, "grad_norm": 376.2232971191406, "learning_rate": 1.9700278776035906e-06, "loss": 41.5234, "step": 11176 }, { "epoch": 0.10580172470915648, "grad_norm": 560.048095703125, "learning_rate": 1.9700204273611036e-06, "loss": 46.7656, "step": 11177 }, { "epoch": 0.10581119073087154, "grad_norm": 284.73040771484375, "learning_rate": 1.9700129762068605e-06, "loss": 21.0938, "step": 11178 }, { "epoch": 0.10582065675258659, "grad_norm": 404.44927978515625, "learning_rate": 1.9700055241408683e-06, "loss": 20.4297, "step": 11179 }, { "epoch": 0.10583012277430165, "grad_norm": 391.1131286621094, "learning_rate": 1.969998071163134e-06, "loss": 37.875, "step": 11180 }, { "epoch": 0.1058395887960167, "grad_norm": 577.6972045898438, "learning_rate": 1.969990617273665e-06, "loss": 41.9219, "step": 11181 }, { "epoch": 0.10584905481773176, "grad_norm": 684.9730834960938, "learning_rate": 1.969983162472467e-06, "loss": 60.4453, "step": 11182 }, { "epoch": 0.1058585208394468, "grad_norm": 829.9010620117188, "learning_rate": 1.969975706759549e-06, "loss": 50.25, "step": 11183 }, { "epoch": 0.10586798686116186, "grad_norm": 537.1361694335938, "learning_rate": 1.969968250134916e-06, "loss": 34.1016, "step": 11184 }, { "epoch": 0.10587745288287691, "grad_norm": 211.88499450683594, "learning_rate": 1.9699607925985764e-06, "loss": 21.25, "step": 11185 }, { "epoch": 0.10588691890459197, "grad_norm": 299.1098937988281, "learning_rate": 1.969953334150537e-06, "loss": 19.6094, "step": 11186 }, { "epoch": 0.10589638492630703, "grad_norm": 317.7273254394531, "learning_rate": 1.969945874790804e-06, "loss": 49.1562, "step": 11187 }, { "epoch": 0.10590585094802207, "grad_norm": 242.1336669921875, "learning_rate": 1.9699384145193856e-06, "loss": 14.4609, "step": 11188 }, { "epoch": 0.10591531696973713, "grad_norm": 293.61627197265625, "learning_rate": 1.9699309533362877e-06, "loss": 18.4141, "step": 11189 }, { "epoch": 0.10592478299145218, "grad_norm": 986.9085083007812, "learning_rate": 1.9699234912415176e-06, "loss": 34.7031, "step": 11190 }, { "epoch": 0.10593424901316724, "grad_norm": 549.4795532226562, "learning_rate": 1.969916028235083e-06, "loss": 49.2891, "step": 11191 }, { "epoch": 0.10594371503488229, "grad_norm": 345.14276123046875, "learning_rate": 1.9699085643169903e-06, "loss": 32.4219, "step": 11192 }, { "epoch": 0.10595318105659735, "grad_norm": 437.192626953125, "learning_rate": 1.9699010994872466e-06, "loss": 46.0781, "step": 11193 }, { "epoch": 0.10596264707831239, "grad_norm": 376.1905822753906, "learning_rate": 1.969893633745859e-06, "loss": 12.9863, "step": 11194 }, { "epoch": 0.10597211310002745, "grad_norm": 425.7456970214844, "learning_rate": 1.9698861670928347e-06, "loss": 13.6523, "step": 11195 }, { "epoch": 0.10598157912174251, "grad_norm": 203.59129333496094, "learning_rate": 1.96987869952818e-06, "loss": 23.375, "step": 11196 }, { "epoch": 0.10599104514345756, "grad_norm": 2.9906961917877197, "learning_rate": 1.9698712310519027e-06, "loss": 0.9756, "step": 11197 }, { "epoch": 0.10600051116517262, "grad_norm": 478.7656555175781, "learning_rate": 1.9698637616640096e-06, "loss": 27.5391, "step": 11198 }, { "epoch": 0.10600997718688766, "grad_norm": 238.276611328125, "learning_rate": 1.969856291364508e-06, "loss": 14.332, "step": 11199 }, { "epoch": 0.10601944320860272, "grad_norm": 431.1570129394531, "learning_rate": 1.969848820153404e-06, "loss": 20.5156, "step": 11200 }, { "epoch": 0.10602890923031777, "grad_norm": 351.4415588378906, "learning_rate": 1.969841348030705e-06, "loss": 35.3281, "step": 11201 }, { "epoch": 0.10603837525203283, "grad_norm": 383.7952880859375, "learning_rate": 1.969833874996419e-06, "loss": 43.1641, "step": 11202 }, { "epoch": 0.10604784127374788, "grad_norm": 560.71826171875, "learning_rate": 1.9698264010505515e-06, "loss": 48.2656, "step": 11203 }, { "epoch": 0.10605730729546294, "grad_norm": 253.08343505859375, "learning_rate": 1.9698189261931112e-06, "loss": 36.7969, "step": 11204 }, { "epoch": 0.106066773317178, "grad_norm": 245.6465606689453, "learning_rate": 1.969811450424103e-06, "loss": 16.6562, "step": 11205 }, { "epoch": 0.10607623933889304, "grad_norm": 305.86505126953125, "learning_rate": 1.969803973743536e-06, "loss": 29.625, "step": 11206 }, { "epoch": 0.1060857053606081, "grad_norm": 248.66102600097656, "learning_rate": 1.969796496151416e-06, "loss": 20.6016, "step": 11207 }, { "epoch": 0.10609517138232315, "grad_norm": 207.6014404296875, "learning_rate": 1.9697890176477507e-06, "loss": 17.9922, "step": 11208 }, { "epoch": 0.10610463740403821, "grad_norm": 320.77337646484375, "learning_rate": 1.9697815382325465e-06, "loss": 25.2812, "step": 11209 }, { "epoch": 0.10611410342575325, "grad_norm": 530.2566528320312, "learning_rate": 1.9697740579058107e-06, "loss": 40.25, "step": 11210 }, { "epoch": 0.10612356944746831, "grad_norm": 450.7305908203125, "learning_rate": 1.969766576667551e-06, "loss": 36.8906, "step": 11211 }, { "epoch": 0.10613303546918336, "grad_norm": 3.4505257606506348, "learning_rate": 1.9697590945177733e-06, "loss": 0.9863, "step": 11212 }, { "epoch": 0.10614250149089842, "grad_norm": 197.18539428710938, "learning_rate": 1.9697516114564848e-06, "loss": 17.2656, "step": 11213 }, { "epoch": 0.10615196751261348, "grad_norm": 841.001953125, "learning_rate": 1.9697441274836936e-06, "loss": 46.7539, "step": 11214 }, { "epoch": 0.10616143353432853, "grad_norm": 467.5375061035156, "learning_rate": 1.9697366425994055e-06, "loss": 25.4219, "step": 11215 }, { "epoch": 0.10617089955604359, "grad_norm": 325.1080627441406, "learning_rate": 1.9697291568036283e-06, "loss": 9.3711, "step": 11216 }, { "epoch": 0.10618036557775863, "grad_norm": 370.6496887207031, "learning_rate": 1.9697216700963686e-06, "loss": 18.9492, "step": 11217 }, { "epoch": 0.10618983159947369, "grad_norm": 298.5588684082031, "learning_rate": 1.9697141824776334e-06, "loss": 12.8047, "step": 11218 }, { "epoch": 0.10619929762118874, "grad_norm": 439.714599609375, "learning_rate": 1.9697066939474305e-06, "loss": 34.4727, "step": 11219 }, { "epoch": 0.1062087636429038, "grad_norm": 571.2153930664062, "learning_rate": 1.969699204505766e-06, "loss": 47.8906, "step": 11220 }, { "epoch": 0.10621822966461886, "grad_norm": 175.71792602539062, "learning_rate": 1.969691714152648e-06, "loss": 9.9961, "step": 11221 }, { "epoch": 0.1062276956863339, "grad_norm": 227.92332458496094, "learning_rate": 1.969684222888082e-06, "loss": 22.6016, "step": 11222 }, { "epoch": 0.10623716170804896, "grad_norm": 450.0300598144531, "learning_rate": 1.969676730712076e-06, "loss": 43.625, "step": 11223 }, { "epoch": 0.10624662772976401, "grad_norm": 727.7874755859375, "learning_rate": 1.9696692376246373e-06, "loss": 56.0469, "step": 11224 }, { "epoch": 0.10625609375147907, "grad_norm": 318.4004211425781, "learning_rate": 1.9696617436257725e-06, "loss": 25.1094, "step": 11225 }, { "epoch": 0.10626555977319412, "grad_norm": 277.0768737792969, "learning_rate": 1.9696542487154887e-06, "loss": 27.6406, "step": 11226 }, { "epoch": 0.10627502579490918, "grad_norm": 628.3143310546875, "learning_rate": 1.969646752893793e-06, "loss": 28.4062, "step": 11227 }, { "epoch": 0.10628449181662422, "grad_norm": 483.32611083984375, "learning_rate": 1.9696392561606926e-06, "loss": 63.0156, "step": 11228 }, { "epoch": 0.10629395783833928, "grad_norm": 493.4352722167969, "learning_rate": 1.969631758516194e-06, "loss": 49.2344, "step": 11229 }, { "epoch": 0.10630342386005434, "grad_norm": 1045.99072265625, "learning_rate": 1.969624259960305e-06, "loss": 43.8203, "step": 11230 }, { "epoch": 0.10631288988176939, "grad_norm": 398.0616455078125, "learning_rate": 1.9696167604930324e-06, "loss": 39.1172, "step": 11231 }, { "epoch": 0.10632235590348445, "grad_norm": 435.5480651855469, "learning_rate": 1.9696092601143824e-06, "loss": 21.7891, "step": 11232 }, { "epoch": 0.1063318219251995, "grad_norm": 178.97152709960938, "learning_rate": 1.9696017588243634e-06, "loss": 19.5547, "step": 11233 }, { "epoch": 0.10634128794691455, "grad_norm": 291.32354736328125, "learning_rate": 1.969594256622982e-06, "loss": 19.8047, "step": 11234 }, { "epoch": 0.1063507539686296, "grad_norm": 402.9325256347656, "learning_rate": 1.9695867535102445e-06, "loss": 44.3125, "step": 11235 }, { "epoch": 0.10636021999034466, "grad_norm": 208.78424072265625, "learning_rate": 1.9695792494861585e-06, "loss": 16.9375, "step": 11236 }, { "epoch": 0.1063696860120597, "grad_norm": 289.7113037109375, "learning_rate": 1.9695717445507312e-06, "loss": 15.0742, "step": 11237 }, { "epoch": 0.10637915203377477, "grad_norm": 196.8052520751953, "learning_rate": 1.9695642387039698e-06, "loss": 18.2578, "step": 11238 }, { "epoch": 0.10638861805548983, "grad_norm": 322.812744140625, "learning_rate": 1.969556731945881e-06, "loss": 16.1016, "step": 11239 }, { "epoch": 0.10639808407720487, "grad_norm": 295.0429992675781, "learning_rate": 1.9695492242764718e-06, "loss": 11.0156, "step": 11240 }, { "epoch": 0.10640755009891993, "grad_norm": 594.0059204101562, "learning_rate": 1.9695417156957497e-06, "loss": 39.0, "step": 11241 }, { "epoch": 0.10641701612063498, "grad_norm": 539.0309448242188, "learning_rate": 1.9695342062037213e-06, "loss": 39.6016, "step": 11242 }, { "epoch": 0.10642648214235004, "grad_norm": 877.5274658203125, "learning_rate": 1.969526695800394e-06, "loss": 46.5898, "step": 11243 }, { "epoch": 0.10643594816406508, "grad_norm": 310.42144775390625, "learning_rate": 1.9695191844857743e-06, "loss": 28.9609, "step": 11244 }, { "epoch": 0.10644541418578014, "grad_norm": 358.73187255859375, "learning_rate": 1.9695116722598696e-06, "loss": 45.9375, "step": 11245 }, { "epoch": 0.10645488020749519, "grad_norm": 200.55050659179688, "learning_rate": 1.969504159122687e-06, "loss": 23.5625, "step": 11246 }, { "epoch": 0.10646434622921025, "grad_norm": 372.4938659667969, "learning_rate": 1.969496645074234e-06, "loss": 36.2656, "step": 11247 }, { "epoch": 0.10647381225092531, "grad_norm": 245.9890899658203, "learning_rate": 1.9694891301145173e-06, "loss": 18.7031, "step": 11248 }, { "epoch": 0.10648327827264036, "grad_norm": 349.2228698730469, "learning_rate": 1.969481614243543e-06, "loss": 26.0547, "step": 11249 }, { "epoch": 0.10649274429435542, "grad_norm": 289.35845947265625, "learning_rate": 1.96947409746132e-06, "loss": 17.9219, "step": 11250 }, { "epoch": 0.10650221031607046, "grad_norm": 185.69879150390625, "learning_rate": 1.969466579767854e-06, "loss": 16.8555, "step": 11251 }, { "epoch": 0.10651167633778552, "grad_norm": 280.4979248046875, "learning_rate": 1.9694590611631525e-06, "loss": 23.7734, "step": 11252 }, { "epoch": 0.10652114235950057, "grad_norm": 245.7454833984375, "learning_rate": 1.9694515416472228e-06, "loss": 23.3828, "step": 11253 }, { "epoch": 0.10653060838121563, "grad_norm": 1181.4847412109375, "learning_rate": 1.969444021220071e-06, "loss": 27.5, "step": 11254 }, { "epoch": 0.10654007440293067, "grad_norm": 319.011474609375, "learning_rate": 1.9694364998817055e-06, "loss": 25.6562, "step": 11255 }, { "epoch": 0.10654954042464573, "grad_norm": 984.9982299804688, "learning_rate": 1.9694289776321328e-06, "loss": 49.5664, "step": 11256 }, { "epoch": 0.1065590064463608, "grad_norm": 1552.0279541015625, "learning_rate": 1.9694214544713597e-06, "loss": 59.125, "step": 11257 }, { "epoch": 0.10656847246807584, "grad_norm": 1067.95947265625, "learning_rate": 1.969413930399394e-06, "loss": 73.5156, "step": 11258 }, { "epoch": 0.1065779384897909, "grad_norm": 313.3894958496094, "learning_rate": 1.9694064054162413e-06, "loss": 52.4375, "step": 11259 }, { "epoch": 0.10658740451150595, "grad_norm": 216.41946411132812, "learning_rate": 1.9693988795219104e-06, "loss": 10.8086, "step": 11260 }, { "epoch": 0.106596870533221, "grad_norm": 3.134725570678711, "learning_rate": 1.9693913527164075e-06, "loss": 0.9849, "step": 11261 }, { "epoch": 0.10660633655493605, "grad_norm": 401.1539306640625, "learning_rate": 1.9693838249997394e-06, "loss": 25.6094, "step": 11262 }, { "epoch": 0.10661580257665111, "grad_norm": 437.25848388671875, "learning_rate": 1.969376296371914e-06, "loss": 32.2891, "step": 11263 }, { "epoch": 0.10662526859836617, "grad_norm": 234.0981903076172, "learning_rate": 1.9693687668329376e-06, "loss": 21.0625, "step": 11264 }, { "epoch": 0.10663473462008122, "grad_norm": 251.52645874023438, "learning_rate": 1.969361236382818e-06, "loss": 23.375, "step": 11265 }, { "epoch": 0.10664420064179628, "grad_norm": 360.71771240234375, "learning_rate": 1.9693537050215615e-06, "loss": 25.6875, "step": 11266 }, { "epoch": 0.10665366666351132, "grad_norm": 391.6448059082031, "learning_rate": 1.969346172749176e-06, "loss": 19.3906, "step": 11267 }, { "epoch": 0.10666313268522638, "grad_norm": 479.71954345703125, "learning_rate": 1.969338639565668e-06, "loss": 36.0625, "step": 11268 }, { "epoch": 0.10667259870694143, "grad_norm": 193.28500366210938, "learning_rate": 1.9693311054710448e-06, "loss": 23.2969, "step": 11269 }, { "epoch": 0.10668206472865649, "grad_norm": 241.6361541748047, "learning_rate": 1.969323570465313e-06, "loss": 24.7773, "step": 11270 }, { "epoch": 0.10669153075037154, "grad_norm": 1389.0191650390625, "learning_rate": 1.9693160345484804e-06, "loss": 23.0078, "step": 11271 }, { "epoch": 0.1067009967720866, "grad_norm": 324.25177001953125, "learning_rate": 1.9693084977205537e-06, "loss": 22.5781, "step": 11272 }, { "epoch": 0.10671046279380166, "grad_norm": 408.4810791015625, "learning_rate": 1.9693009599815406e-06, "loss": 30.0469, "step": 11273 }, { "epoch": 0.1067199288155167, "grad_norm": 196.2861785888672, "learning_rate": 1.969293421331447e-06, "loss": 8.6406, "step": 11274 }, { "epoch": 0.10672939483723176, "grad_norm": 383.9305725097656, "learning_rate": 1.969285881770281e-06, "loss": 31.8164, "step": 11275 }, { "epoch": 0.10673886085894681, "grad_norm": 386.2887268066406, "learning_rate": 1.969278341298049e-06, "loss": 43.5312, "step": 11276 }, { "epoch": 0.10674832688066187, "grad_norm": 261.6397705078125, "learning_rate": 1.9692707999147587e-06, "loss": 22.7969, "step": 11277 }, { "epoch": 0.10675779290237691, "grad_norm": 337.3226623535156, "learning_rate": 1.969263257620417e-06, "loss": 21.2773, "step": 11278 }, { "epoch": 0.10676725892409197, "grad_norm": 433.3535461425781, "learning_rate": 1.9692557144150305e-06, "loss": 28.1406, "step": 11279 }, { "epoch": 0.10677672494580702, "grad_norm": 211.93223571777344, "learning_rate": 1.9692481702986064e-06, "loss": 15.793, "step": 11280 }, { "epoch": 0.10678619096752208, "grad_norm": 149.2986602783203, "learning_rate": 1.969240625271153e-06, "loss": 17.7656, "step": 11281 }, { "epoch": 0.10679565698923714, "grad_norm": 253.97288513183594, "learning_rate": 1.969233079332676e-06, "loss": 20.6562, "step": 11282 }, { "epoch": 0.10680512301095219, "grad_norm": 406.56121826171875, "learning_rate": 1.969225532483183e-06, "loss": 22.6797, "step": 11283 }, { "epoch": 0.10681458903266725, "grad_norm": 310.1609191894531, "learning_rate": 1.969217984722681e-06, "loss": 30.5391, "step": 11284 }, { "epoch": 0.10682405505438229, "grad_norm": 413.9848327636719, "learning_rate": 1.969210436051177e-06, "loss": 47.8438, "step": 11285 }, { "epoch": 0.10683352107609735, "grad_norm": 225.36187744140625, "learning_rate": 1.9692028864686784e-06, "loss": 18.5312, "step": 11286 }, { "epoch": 0.1068429870978124, "grad_norm": 533.88037109375, "learning_rate": 1.9691953359751923e-06, "loss": 44.5, "step": 11287 }, { "epoch": 0.10685245311952746, "grad_norm": 927.4794311523438, "learning_rate": 1.9691877845707256e-06, "loss": 67.2461, "step": 11288 }, { "epoch": 0.1068619191412425, "grad_norm": 397.68035888671875, "learning_rate": 1.969180232255285e-06, "loss": 22.9453, "step": 11289 }, { "epoch": 0.10687138516295756, "grad_norm": 294.1354064941406, "learning_rate": 1.9691726790288786e-06, "loss": 9.4609, "step": 11290 }, { "epoch": 0.10688085118467262, "grad_norm": 361.87591552734375, "learning_rate": 1.9691651248915126e-06, "loss": 25.6016, "step": 11291 }, { "epoch": 0.10689031720638767, "grad_norm": 214.62594604492188, "learning_rate": 1.9691575698431945e-06, "loss": 14.1484, "step": 11292 }, { "epoch": 0.10689978322810273, "grad_norm": 236.98716735839844, "learning_rate": 1.9691500138839314e-06, "loss": 20.2734, "step": 11293 }, { "epoch": 0.10690924924981778, "grad_norm": 282.70465087890625, "learning_rate": 1.96914245701373e-06, "loss": 20.9453, "step": 11294 }, { "epoch": 0.10691871527153284, "grad_norm": 264.4690246582031, "learning_rate": 1.969134899232598e-06, "loss": 21.6172, "step": 11295 }, { "epoch": 0.10692818129324788, "grad_norm": 534.4027709960938, "learning_rate": 1.9691273405405425e-06, "loss": 26.5312, "step": 11296 }, { "epoch": 0.10693764731496294, "grad_norm": 204.4641876220703, "learning_rate": 1.9691197809375703e-06, "loss": 20.1406, "step": 11297 }, { "epoch": 0.10694711333667799, "grad_norm": 189.16372680664062, "learning_rate": 1.969112220423688e-06, "loss": 20.3906, "step": 11298 }, { "epoch": 0.10695657935839305, "grad_norm": 202.27232360839844, "learning_rate": 1.969104658998904e-06, "loss": 15.125, "step": 11299 }, { "epoch": 0.10696604538010811, "grad_norm": 370.5359191894531, "learning_rate": 1.9690970966632242e-06, "loss": 38.4297, "step": 11300 }, { "epoch": 0.10697551140182315, "grad_norm": 3.106902599334717, "learning_rate": 1.969089533416656e-06, "loss": 0.8115, "step": 11301 }, { "epoch": 0.10698497742353821, "grad_norm": 581.0082397460938, "learning_rate": 1.969081969259207e-06, "loss": 53.6484, "step": 11302 }, { "epoch": 0.10699444344525326, "grad_norm": 436.8468017578125, "learning_rate": 1.9690744041908838e-06, "loss": 49.4375, "step": 11303 }, { "epoch": 0.10700390946696832, "grad_norm": 180.16818237304688, "learning_rate": 1.969066838211694e-06, "loss": 19.3281, "step": 11304 }, { "epoch": 0.10701337548868337, "grad_norm": 634.3945922851562, "learning_rate": 1.9690592713216443e-06, "loss": 39.0938, "step": 11305 }, { "epoch": 0.10702284151039843, "grad_norm": 900.6057739257812, "learning_rate": 1.969051703520742e-06, "loss": 45.4609, "step": 11306 }, { "epoch": 0.10703230753211349, "grad_norm": 204.68109130859375, "learning_rate": 1.969044134808994e-06, "loss": 17.0625, "step": 11307 }, { "epoch": 0.10704177355382853, "grad_norm": 197.55027770996094, "learning_rate": 1.9690365651864075e-06, "loss": 16.7266, "step": 11308 }, { "epoch": 0.10705123957554359, "grad_norm": 760.6207275390625, "learning_rate": 1.9690289946529896e-06, "loss": 42.2969, "step": 11309 }, { "epoch": 0.10706070559725864, "grad_norm": 241.17259216308594, "learning_rate": 1.9690214232087475e-06, "loss": 25.7656, "step": 11310 }, { "epoch": 0.1070701716189737, "grad_norm": 902.7659301757812, "learning_rate": 1.9690138508536883e-06, "loss": 51.375, "step": 11311 }, { "epoch": 0.10707963764068874, "grad_norm": 385.6280212402344, "learning_rate": 1.9690062775878196e-06, "loss": 22.6562, "step": 11312 }, { "epoch": 0.1070891036624038, "grad_norm": 491.4438781738281, "learning_rate": 1.9689987034111474e-06, "loss": 23.6562, "step": 11313 }, { "epoch": 0.10709856968411885, "grad_norm": 488.6046142578125, "learning_rate": 1.96899112832368e-06, "loss": 11.0898, "step": 11314 }, { "epoch": 0.10710803570583391, "grad_norm": 158.42694091796875, "learning_rate": 1.968983552325424e-06, "loss": 18.75, "step": 11315 }, { "epoch": 0.10711750172754897, "grad_norm": 394.5203857421875, "learning_rate": 1.9689759754163856e-06, "loss": 45.125, "step": 11316 }, { "epoch": 0.10712696774926402, "grad_norm": 407.99981689453125, "learning_rate": 1.9689683975965735e-06, "loss": 23.9922, "step": 11317 }, { "epoch": 0.10713643377097908, "grad_norm": 725.556640625, "learning_rate": 1.9689608188659943e-06, "loss": 9.7109, "step": 11318 }, { "epoch": 0.10714589979269412, "grad_norm": 1000.3292236328125, "learning_rate": 1.9689532392246547e-06, "loss": 29.5859, "step": 11319 }, { "epoch": 0.10715536581440918, "grad_norm": 467.39947509765625, "learning_rate": 1.968945658672562e-06, "loss": 22.1836, "step": 11320 }, { "epoch": 0.10716483183612423, "grad_norm": 2.79194712638855, "learning_rate": 1.9689380772097235e-06, "loss": 0.8257, "step": 11321 }, { "epoch": 0.10717429785783929, "grad_norm": 389.38311767578125, "learning_rate": 1.9689304948361462e-06, "loss": 26.9531, "step": 11322 }, { "epoch": 0.10718376387955433, "grad_norm": 756.8054809570312, "learning_rate": 1.9689229115518374e-06, "loss": 21.6719, "step": 11323 }, { "epoch": 0.1071932299012694, "grad_norm": 378.751708984375, "learning_rate": 1.968915327356804e-06, "loss": 51.5547, "step": 11324 }, { "epoch": 0.10720269592298445, "grad_norm": 594.526611328125, "learning_rate": 1.968907742251053e-06, "loss": 51.2266, "step": 11325 }, { "epoch": 0.1072121619446995, "grad_norm": 277.5216064453125, "learning_rate": 1.968900156234592e-06, "loss": 26.9844, "step": 11326 }, { "epoch": 0.10722162796641456, "grad_norm": 329.41217041015625, "learning_rate": 1.968892569307428e-06, "loss": 31.4062, "step": 11327 }, { "epoch": 0.1072310939881296, "grad_norm": 339.146240234375, "learning_rate": 1.968884981469568e-06, "loss": 26.6406, "step": 11328 }, { "epoch": 0.10724056000984467, "grad_norm": 711.0836181640625, "learning_rate": 1.968877392721019e-06, "loss": 20.8594, "step": 11329 }, { "epoch": 0.10725002603155971, "grad_norm": 3.548253059387207, "learning_rate": 1.9688698030617887e-06, "loss": 0.9653, "step": 11330 }, { "epoch": 0.10725949205327477, "grad_norm": 567.5795288085938, "learning_rate": 1.968862212491883e-06, "loss": 38.3594, "step": 11331 }, { "epoch": 0.10726895807498982, "grad_norm": 1340.634521484375, "learning_rate": 1.9688546210113108e-06, "loss": 39.0078, "step": 11332 }, { "epoch": 0.10727842409670488, "grad_norm": 416.36309814453125, "learning_rate": 1.9688470286200777e-06, "loss": 22.9922, "step": 11333 }, { "epoch": 0.10728789011841994, "grad_norm": 595.3240966796875, "learning_rate": 1.9688394353181914e-06, "loss": 25.1484, "step": 11334 }, { "epoch": 0.10729735614013498, "grad_norm": 2013.709228515625, "learning_rate": 1.968831841105659e-06, "loss": 31.8125, "step": 11335 }, { "epoch": 0.10730682216185004, "grad_norm": 460.8668212890625, "learning_rate": 1.9688242459824878e-06, "loss": 22.9219, "step": 11336 }, { "epoch": 0.10731628818356509, "grad_norm": 299.69268798828125, "learning_rate": 1.968816649948685e-06, "loss": 10.5273, "step": 11337 }, { "epoch": 0.10732575420528015, "grad_norm": 1813.3956298828125, "learning_rate": 1.9688090530042574e-06, "loss": 10.3672, "step": 11338 }, { "epoch": 0.1073352202269952, "grad_norm": 241.71771240234375, "learning_rate": 1.9688014551492125e-06, "loss": 9.0547, "step": 11339 }, { "epoch": 0.10734468624871026, "grad_norm": 495.4393615722656, "learning_rate": 1.968793856383557e-06, "loss": 31.0781, "step": 11340 }, { "epoch": 0.1073541522704253, "grad_norm": 458.2211608886719, "learning_rate": 1.9687862567072986e-06, "loss": 38.0625, "step": 11341 }, { "epoch": 0.10736361829214036, "grad_norm": 580.509033203125, "learning_rate": 1.968778656120444e-06, "loss": 27.7812, "step": 11342 }, { "epoch": 0.10737308431385542, "grad_norm": 249.2209014892578, "learning_rate": 1.9687710546230007e-06, "loss": 19.6328, "step": 11343 }, { "epoch": 0.10738255033557047, "grad_norm": 359.7764892578125, "learning_rate": 1.9687634522149754e-06, "loss": 43.4688, "step": 11344 }, { "epoch": 0.10739201635728553, "grad_norm": 333.9904479980469, "learning_rate": 1.968755848896375e-06, "loss": 18.4766, "step": 11345 }, { "epoch": 0.10740148237900057, "grad_norm": 474.66229248046875, "learning_rate": 1.968748244667208e-06, "loss": 16.2734, "step": 11346 }, { "epoch": 0.10741094840071563, "grad_norm": 999.7936401367188, "learning_rate": 1.96874063952748e-06, "loss": 48.3203, "step": 11347 }, { "epoch": 0.10742041442243068, "grad_norm": 1148.3453369140625, "learning_rate": 1.9687330334771995e-06, "loss": 70.5469, "step": 11348 }, { "epoch": 0.10742988044414574, "grad_norm": 224.39486694335938, "learning_rate": 1.9687254265163725e-06, "loss": 12.9648, "step": 11349 }, { "epoch": 0.1074393464658608, "grad_norm": 334.9405822753906, "learning_rate": 1.9687178186450066e-06, "loss": 23.6875, "step": 11350 }, { "epoch": 0.10744881248757585, "grad_norm": 412.22113037109375, "learning_rate": 1.968710209863109e-06, "loss": 54.5781, "step": 11351 }, { "epoch": 0.1074582785092909, "grad_norm": 685.9068603515625, "learning_rate": 1.968702600170687e-06, "loss": 23.7266, "step": 11352 }, { "epoch": 0.10746774453100595, "grad_norm": 393.84375, "learning_rate": 1.9686949895677474e-06, "loss": 35.3516, "step": 11353 }, { "epoch": 0.10747721055272101, "grad_norm": 3.255270481109619, "learning_rate": 1.968687378054298e-06, "loss": 0.918, "step": 11354 }, { "epoch": 0.10748667657443606, "grad_norm": 473.4698181152344, "learning_rate": 1.9686797656303446e-06, "loss": 24.7656, "step": 11355 }, { "epoch": 0.10749614259615112, "grad_norm": 303.4213562011719, "learning_rate": 1.9686721522958954e-06, "loss": 19.4922, "step": 11356 }, { "epoch": 0.10750560861786616, "grad_norm": 416.7363586425781, "learning_rate": 1.9686645380509577e-06, "loss": 40.3125, "step": 11357 }, { "epoch": 0.10751507463958122, "grad_norm": 280.58758544921875, "learning_rate": 1.968656922895539e-06, "loss": 20.2891, "step": 11358 }, { "epoch": 0.10752454066129628, "grad_norm": 449.6608581542969, "learning_rate": 1.9686493068296446e-06, "loss": 49.8359, "step": 11359 }, { "epoch": 0.10753400668301133, "grad_norm": 675.7167358398438, "learning_rate": 1.9686416898532836e-06, "loss": 53.9375, "step": 11360 }, { "epoch": 0.10754347270472639, "grad_norm": 492.70855712890625, "learning_rate": 1.968634071966462e-06, "loss": 42.0938, "step": 11361 }, { "epoch": 0.10755293872644144, "grad_norm": 166.78985595703125, "learning_rate": 1.968626453169188e-06, "loss": 23.75, "step": 11362 }, { "epoch": 0.1075624047481565, "grad_norm": 228.62852478027344, "learning_rate": 1.9686188334614675e-06, "loss": 19.9844, "step": 11363 }, { "epoch": 0.10757187076987154, "grad_norm": 166.3992919921875, "learning_rate": 1.9686112128433086e-06, "loss": 13.8633, "step": 11364 }, { "epoch": 0.1075813367915866, "grad_norm": 398.0650939941406, "learning_rate": 1.968603591314718e-06, "loss": 36.4062, "step": 11365 }, { "epoch": 0.10759080281330165, "grad_norm": 389.8878479003906, "learning_rate": 1.9685959688757036e-06, "loss": 22.4922, "step": 11366 }, { "epoch": 0.10760026883501671, "grad_norm": 394.0623779296875, "learning_rate": 1.9685883455262714e-06, "loss": 25.5, "step": 11367 }, { "epoch": 0.10760973485673177, "grad_norm": 304.0694580078125, "learning_rate": 1.9685807212664295e-06, "loss": 20.1719, "step": 11368 }, { "epoch": 0.10761920087844681, "grad_norm": 179.82281494140625, "learning_rate": 1.9685730960961843e-06, "loss": 9.8477, "step": 11369 }, { "epoch": 0.10762866690016187, "grad_norm": 736.1580200195312, "learning_rate": 1.968565470015544e-06, "loss": 37.2383, "step": 11370 }, { "epoch": 0.10763813292187692, "grad_norm": 815.4160766601562, "learning_rate": 1.9685578430245146e-06, "loss": 36.1719, "step": 11371 }, { "epoch": 0.10764759894359198, "grad_norm": 222.85003662109375, "learning_rate": 1.968550215123104e-06, "loss": 25.1406, "step": 11372 }, { "epoch": 0.10765706496530703, "grad_norm": 348.6305236816406, "learning_rate": 1.9685425863113197e-06, "loss": 13.5039, "step": 11373 }, { "epoch": 0.10766653098702209, "grad_norm": 629.8975830078125, "learning_rate": 1.9685349565891678e-06, "loss": 51.375, "step": 11374 }, { "epoch": 0.10767599700873713, "grad_norm": 375.9354553222656, "learning_rate": 1.968527325956656e-06, "loss": 25.3438, "step": 11375 }, { "epoch": 0.10768546303045219, "grad_norm": 325.79132080078125, "learning_rate": 1.9685196944137916e-06, "loss": 34.2969, "step": 11376 }, { "epoch": 0.10769492905216725, "grad_norm": 234.2823028564453, "learning_rate": 1.9685120619605823e-06, "loss": 18.5625, "step": 11377 }, { "epoch": 0.1077043950738823, "grad_norm": 442.72015380859375, "learning_rate": 1.968504428597034e-06, "loss": 25.6328, "step": 11378 }, { "epoch": 0.10771386109559736, "grad_norm": 383.73187255859375, "learning_rate": 1.968496794323155e-06, "loss": 39.0391, "step": 11379 }, { "epoch": 0.1077233271173124, "grad_norm": 396.77838134765625, "learning_rate": 1.9684891591389516e-06, "loss": 42.875, "step": 11380 }, { "epoch": 0.10773279313902746, "grad_norm": 570.2440185546875, "learning_rate": 1.9684815230444314e-06, "loss": 32.3906, "step": 11381 }, { "epoch": 0.10774225916074251, "grad_norm": 500.7940368652344, "learning_rate": 1.968473886039602e-06, "loss": 41.6094, "step": 11382 }, { "epoch": 0.10775172518245757, "grad_norm": 769.3854370117188, "learning_rate": 1.9684662481244696e-06, "loss": 50.0703, "step": 11383 }, { "epoch": 0.10776119120417262, "grad_norm": 597.0076293945312, "learning_rate": 1.9684586092990424e-06, "loss": 40.5703, "step": 11384 }, { "epoch": 0.10777065722588768, "grad_norm": 169.088134765625, "learning_rate": 1.968450969563327e-06, "loss": 16.7266, "step": 11385 }, { "epoch": 0.10778012324760274, "grad_norm": 400.74468994140625, "learning_rate": 1.9684433289173305e-06, "loss": 24.2266, "step": 11386 }, { "epoch": 0.10778958926931778, "grad_norm": 489.7159423828125, "learning_rate": 1.9684356873610605e-06, "loss": 31.7188, "step": 11387 }, { "epoch": 0.10779905529103284, "grad_norm": 625.556396484375, "learning_rate": 1.9684280448945237e-06, "loss": 48.2188, "step": 11388 }, { "epoch": 0.10780852131274789, "grad_norm": 1159.969970703125, "learning_rate": 1.968420401517728e-06, "loss": 39.3438, "step": 11389 }, { "epoch": 0.10781798733446295, "grad_norm": 222.9630584716797, "learning_rate": 1.9684127572306798e-06, "loss": 18.1484, "step": 11390 }, { "epoch": 0.107827453356178, "grad_norm": 514.8803100585938, "learning_rate": 1.9684051120333866e-06, "loss": 34.0469, "step": 11391 }, { "epoch": 0.10783691937789305, "grad_norm": 235.42002868652344, "learning_rate": 1.9683974659258554e-06, "loss": 7.1328, "step": 11392 }, { "epoch": 0.10784638539960811, "grad_norm": 260.0542907714844, "learning_rate": 1.968389818908094e-06, "loss": 19.2031, "step": 11393 }, { "epoch": 0.10785585142132316, "grad_norm": 378.1548767089844, "learning_rate": 1.9683821709801086e-06, "loss": 38.2734, "step": 11394 }, { "epoch": 0.10786531744303822, "grad_norm": 893.2279052734375, "learning_rate": 1.9683745221419074e-06, "loss": 50.875, "step": 11395 }, { "epoch": 0.10787478346475327, "grad_norm": 382.3701477050781, "learning_rate": 1.9683668723934974e-06, "loss": 36.6953, "step": 11396 }, { "epoch": 0.10788424948646833, "grad_norm": 444.77960205078125, "learning_rate": 1.968359221734885e-06, "loss": 60.2031, "step": 11397 }, { "epoch": 0.10789371550818337, "grad_norm": 416.14447021484375, "learning_rate": 1.968351570166078e-06, "loss": 29.2812, "step": 11398 }, { "epoch": 0.10790318152989843, "grad_norm": 200.78590393066406, "learning_rate": 1.9683439176870837e-06, "loss": 15.6641, "step": 11399 }, { "epoch": 0.10791264755161348, "grad_norm": 258.99658203125, "learning_rate": 1.9683362642979092e-06, "loss": 19.0625, "step": 11400 }, { "epoch": 0.10792211357332854, "grad_norm": 2.8006341457366943, "learning_rate": 1.9683286099985614e-06, "loss": 0.7822, "step": 11401 }, { "epoch": 0.1079315795950436, "grad_norm": 348.53839111328125, "learning_rate": 1.9683209547890475e-06, "loss": 29.5156, "step": 11402 }, { "epoch": 0.10794104561675864, "grad_norm": 616.1009521484375, "learning_rate": 1.9683132986693754e-06, "loss": 19.2441, "step": 11403 }, { "epoch": 0.1079505116384737, "grad_norm": 321.23309326171875, "learning_rate": 1.9683056416395517e-06, "loss": 52.2344, "step": 11404 }, { "epoch": 0.10795997766018875, "grad_norm": 386.47265625, "learning_rate": 1.9682979836995834e-06, "loss": 48.8438, "step": 11405 }, { "epoch": 0.10796944368190381, "grad_norm": 3.4781322479248047, "learning_rate": 1.968290324849478e-06, "loss": 1.1235, "step": 11406 }, { "epoch": 0.10797890970361886, "grad_norm": 358.7845153808594, "learning_rate": 1.968282665089243e-06, "loss": 9.2031, "step": 11407 }, { "epoch": 0.10798837572533392, "grad_norm": 355.1993408203125, "learning_rate": 1.968275004418885e-06, "loss": 28.8594, "step": 11408 }, { "epoch": 0.10799784174704896, "grad_norm": 744.41064453125, "learning_rate": 1.9682673428384116e-06, "loss": 65.7031, "step": 11409 }, { "epoch": 0.10800730776876402, "grad_norm": 655.889892578125, "learning_rate": 1.9682596803478296e-06, "loss": 41.0547, "step": 11410 }, { "epoch": 0.10801677379047908, "grad_norm": 301.79888916015625, "learning_rate": 1.968252016947147e-06, "loss": 20.3359, "step": 11411 }, { "epoch": 0.10802623981219413, "grad_norm": 175.65245056152344, "learning_rate": 1.96824435263637e-06, "loss": 21.5703, "step": 11412 }, { "epoch": 0.10803570583390919, "grad_norm": 421.4400939941406, "learning_rate": 1.9682366874155068e-06, "loss": 31.0, "step": 11413 }, { "epoch": 0.10804517185562423, "grad_norm": 483.7490539550781, "learning_rate": 1.968229021284564e-06, "loss": 39.3125, "step": 11414 }, { "epoch": 0.1080546378773393, "grad_norm": 758.927001953125, "learning_rate": 1.9682213542435485e-06, "loss": 22.0781, "step": 11415 }, { "epoch": 0.10806410389905434, "grad_norm": 228.1251678466797, "learning_rate": 1.968213686292468e-06, "loss": 17.9688, "step": 11416 }, { "epoch": 0.1080735699207694, "grad_norm": 194.93600463867188, "learning_rate": 1.96820601743133e-06, "loss": 20.25, "step": 11417 }, { "epoch": 0.10808303594248445, "grad_norm": 964.3837280273438, "learning_rate": 1.968198347660141e-06, "loss": 27.3125, "step": 11418 }, { "epoch": 0.1080925019641995, "grad_norm": 991.326904296875, "learning_rate": 1.968190676978909e-06, "loss": 19.8984, "step": 11419 }, { "epoch": 0.10810196798591457, "grad_norm": 394.7591857910156, "learning_rate": 1.96818300538764e-06, "loss": 45.1719, "step": 11420 }, { "epoch": 0.10811143400762961, "grad_norm": 412.7296447753906, "learning_rate": 1.968175332886343e-06, "loss": 26.1094, "step": 11421 }, { "epoch": 0.10812090002934467, "grad_norm": 373.0243225097656, "learning_rate": 1.968167659475023e-06, "loss": 22.2383, "step": 11422 }, { "epoch": 0.10813036605105972, "grad_norm": 703.442626953125, "learning_rate": 1.968159985153689e-06, "loss": 41.1406, "step": 11423 }, { "epoch": 0.10813983207277478, "grad_norm": 513.9996948242188, "learning_rate": 1.9681523099223475e-06, "loss": 29.6094, "step": 11424 }, { "epoch": 0.10814929809448982, "grad_norm": 382.1614074707031, "learning_rate": 1.9681446337810057e-06, "loss": 29.3594, "step": 11425 }, { "epoch": 0.10815876411620488, "grad_norm": 229.53228759765625, "learning_rate": 1.968136956729671e-06, "loss": 22.0391, "step": 11426 }, { "epoch": 0.10816823013791993, "grad_norm": 399.0087585449219, "learning_rate": 1.9681292787683507e-06, "loss": 22.6172, "step": 11427 }, { "epoch": 0.10817769615963499, "grad_norm": 988.6678466796875, "learning_rate": 1.968121599897052e-06, "loss": 49.9219, "step": 11428 }, { "epoch": 0.10818716218135005, "grad_norm": 365.75970458984375, "learning_rate": 1.9681139201157817e-06, "loss": 24.1719, "step": 11429 }, { "epoch": 0.1081966282030651, "grad_norm": 267.1945495605469, "learning_rate": 1.9681062394245474e-06, "loss": 33.2969, "step": 11430 }, { "epoch": 0.10820609422478016, "grad_norm": 303.700927734375, "learning_rate": 1.9680985578233563e-06, "loss": 22.1094, "step": 11431 }, { "epoch": 0.1082155602464952, "grad_norm": 244.3128204345703, "learning_rate": 1.9680908753122154e-06, "loss": 25.3633, "step": 11432 }, { "epoch": 0.10822502626821026, "grad_norm": 849.0704956054688, "learning_rate": 1.968083191891132e-06, "loss": 28.5, "step": 11433 }, { "epoch": 0.10823449228992531, "grad_norm": 201.23194885253906, "learning_rate": 1.9680755075601133e-06, "loss": 19.2891, "step": 11434 }, { "epoch": 0.10824395831164037, "grad_norm": 756.1685791015625, "learning_rate": 1.9680678223191665e-06, "loss": 29.4766, "step": 11435 }, { "epoch": 0.10825342433335543, "grad_norm": 978.5477294921875, "learning_rate": 1.968060136168299e-06, "loss": 69.1641, "step": 11436 }, { "epoch": 0.10826289035507047, "grad_norm": 583.02392578125, "learning_rate": 1.968052449107518e-06, "loss": 40.2812, "step": 11437 }, { "epoch": 0.10827235637678553, "grad_norm": 215.78196716308594, "learning_rate": 1.968044761136831e-06, "loss": 14.5586, "step": 11438 }, { "epoch": 0.10828182239850058, "grad_norm": 174.60348510742188, "learning_rate": 1.9680370722562447e-06, "loss": 14.7891, "step": 11439 }, { "epoch": 0.10829128842021564, "grad_norm": 513.251708984375, "learning_rate": 1.9680293824657666e-06, "loss": 18.9141, "step": 11440 }, { "epoch": 0.10830075444193069, "grad_norm": 461.840087890625, "learning_rate": 1.9680216917654036e-06, "loss": 26.9609, "step": 11441 }, { "epoch": 0.10831022046364575, "grad_norm": 199.66143798828125, "learning_rate": 1.9680140001551633e-06, "loss": 20.3672, "step": 11442 }, { "epoch": 0.10831968648536079, "grad_norm": 250.4950408935547, "learning_rate": 1.968006307635053e-06, "loss": 15.9766, "step": 11443 }, { "epoch": 0.10832915250707585, "grad_norm": 969.6625366210938, "learning_rate": 1.9679986142050797e-06, "loss": 13.1836, "step": 11444 }, { "epoch": 0.10833861852879091, "grad_norm": 343.18890380859375, "learning_rate": 1.9679909198652503e-06, "loss": 23.0078, "step": 11445 }, { "epoch": 0.10834808455050596, "grad_norm": 338.71136474609375, "learning_rate": 1.967983224615573e-06, "loss": 20.1484, "step": 11446 }, { "epoch": 0.10835755057222102, "grad_norm": 245.13954162597656, "learning_rate": 1.9679755284560538e-06, "loss": 18.2812, "step": 11447 }, { "epoch": 0.10836701659393606, "grad_norm": 2.889066457748413, "learning_rate": 1.967967831386701e-06, "loss": 0.832, "step": 11448 }, { "epoch": 0.10837648261565112, "grad_norm": 927.7006225585938, "learning_rate": 1.9679601334075213e-06, "loss": 17.6172, "step": 11449 }, { "epoch": 0.10838594863736617, "grad_norm": 397.9100341796875, "learning_rate": 1.967952434518522e-06, "loss": 27.0234, "step": 11450 }, { "epoch": 0.10839541465908123, "grad_norm": 429.01123046875, "learning_rate": 1.9679447347197104e-06, "loss": 28.5, "step": 11451 }, { "epoch": 0.10840488068079628, "grad_norm": 865.3442993164062, "learning_rate": 1.9679370340110937e-06, "loss": 52.2891, "step": 11452 }, { "epoch": 0.10841434670251134, "grad_norm": 423.3039245605469, "learning_rate": 1.9679293323926793e-06, "loss": 29.6641, "step": 11453 }, { "epoch": 0.1084238127242264, "grad_norm": 336.04656982421875, "learning_rate": 1.967921629864474e-06, "loss": 21.8125, "step": 11454 }, { "epoch": 0.10843327874594144, "grad_norm": 147.23292541503906, "learning_rate": 1.9679139264264855e-06, "loss": 17.9219, "step": 11455 }, { "epoch": 0.1084427447676565, "grad_norm": 365.6387939453125, "learning_rate": 1.967906222078721e-06, "loss": 44.8594, "step": 11456 }, { "epoch": 0.10845221078937155, "grad_norm": 282.4096374511719, "learning_rate": 1.9678985168211874e-06, "loss": 19.7188, "step": 11457 }, { "epoch": 0.10846167681108661, "grad_norm": 962.9359130859375, "learning_rate": 1.9678908106538927e-06, "loss": 58.4688, "step": 11458 }, { "epoch": 0.10847114283280165, "grad_norm": 738.0048828125, "learning_rate": 1.967883103576843e-06, "loss": 47.8438, "step": 11459 }, { "epoch": 0.10848060885451671, "grad_norm": 232.5845489501953, "learning_rate": 1.967875395590046e-06, "loss": 16.7969, "step": 11460 }, { "epoch": 0.10849007487623176, "grad_norm": 314.80572509765625, "learning_rate": 1.9678676866935096e-06, "loss": 20.1484, "step": 11461 }, { "epoch": 0.10849954089794682, "grad_norm": 183.26531982421875, "learning_rate": 1.9678599768872402e-06, "loss": 26.1016, "step": 11462 }, { "epoch": 0.10850900691966188, "grad_norm": 296.9437255859375, "learning_rate": 1.9678522661712456e-06, "loss": 22.3203, "step": 11463 }, { "epoch": 0.10851847294137693, "grad_norm": 309.3370666503906, "learning_rate": 1.967844554545533e-06, "loss": 32.5703, "step": 11464 }, { "epoch": 0.10852793896309199, "grad_norm": 421.884765625, "learning_rate": 1.9678368420101094e-06, "loss": 20.2812, "step": 11465 }, { "epoch": 0.10853740498480703, "grad_norm": 298.2139587402344, "learning_rate": 1.967829128564982e-06, "loss": 19.5, "step": 11466 }, { "epoch": 0.10854687100652209, "grad_norm": 392.583740234375, "learning_rate": 1.9678214142101578e-06, "loss": 30.1406, "step": 11467 }, { "epoch": 0.10855633702823714, "grad_norm": 380.10015869140625, "learning_rate": 1.967813698945645e-06, "loss": 17.7344, "step": 11468 }, { "epoch": 0.1085658030499522, "grad_norm": 522.3056640625, "learning_rate": 1.96780598277145e-06, "loss": 46.6406, "step": 11469 }, { "epoch": 0.10857526907166724, "grad_norm": 474.6283264160156, "learning_rate": 1.9677982656875803e-06, "loss": 19.4688, "step": 11470 }, { "epoch": 0.1085847350933823, "grad_norm": 3.5505824089050293, "learning_rate": 1.9677905476940433e-06, "loss": 0.9346, "step": 11471 }, { "epoch": 0.10859420111509736, "grad_norm": 232.61639404296875, "learning_rate": 1.9677828287908463e-06, "loss": 19.0, "step": 11472 }, { "epoch": 0.10860366713681241, "grad_norm": 567.9075927734375, "learning_rate": 1.9677751089779965e-06, "loss": 63.1562, "step": 11473 }, { "epoch": 0.10861313315852747, "grad_norm": 574.2100830078125, "learning_rate": 1.9677673882555006e-06, "loss": 30.8672, "step": 11474 }, { "epoch": 0.10862259918024252, "grad_norm": 280.9361877441406, "learning_rate": 1.9677596666233663e-06, "loss": 26.625, "step": 11475 }, { "epoch": 0.10863206520195758, "grad_norm": 222.94708251953125, "learning_rate": 1.9677519440816012e-06, "loss": 21.9688, "step": 11476 }, { "epoch": 0.10864153122367262, "grad_norm": 409.7029113769531, "learning_rate": 1.967744220630212e-06, "loss": 24.4688, "step": 11477 }, { "epoch": 0.10865099724538768, "grad_norm": 784.4891357421875, "learning_rate": 1.9677364962692066e-06, "loss": 23.3438, "step": 11478 }, { "epoch": 0.10866046326710274, "grad_norm": 346.03448486328125, "learning_rate": 1.9677287709985915e-06, "loss": 26.7656, "step": 11479 }, { "epoch": 0.10866992928881779, "grad_norm": 368.8300476074219, "learning_rate": 1.9677210448183744e-06, "loss": 29.9219, "step": 11480 }, { "epoch": 0.10867939531053285, "grad_norm": 294.06048583984375, "learning_rate": 1.9677133177285624e-06, "loss": 7.1152, "step": 11481 }, { "epoch": 0.1086888613322479, "grad_norm": 251.40577697753906, "learning_rate": 1.967705589729163e-06, "loss": 21.8594, "step": 11482 }, { "epoch": 0.10869832735396295, "grad_norm": 3.6285340785980225, "learning_rate": 1.967697860820183e-06, "loss": 0.998, "step": 11483 }, { "epoch": 0.108707793375678, "grad_norm": 249.27459716796875, "learning_rate": 1.9676901310016302e-06, "loss": 23.0703, "step": 11484 }, { "epoch": 0.10871725939739306, "grad_norm": 151.68264770507812, "learning_rate": 1.9676824002735115e-06, "loss": 12.0469, "step": 11485 }, { "epoch": 0.1087267254191081, "grad_norm": 544.0054321289062, "learning_rate": 1.9676746686358344e-06, "loss": 26.8906, "step": 11486 }, { "epoch": 0.10873619144082317, "grad_norm": 423.39715576171875, "learning_rate": 1.967666936088606e-06, "loss": 37.1016, "step": 11487 }, { "epoch": 0.10874565746253823, "grad_norm": 703.2177124023438, "learning_rate": 1.967659202631834e-06, "loss": 57.4844, "step": 11488 }, { "epoch": 0.10875512348425327, "grad_norm": 699.1116333007812, "learning_rate": 1.9676514682655244e-06, "loss": 50.1719, "step": 11489 }, { "epoch": 0.10876458950596833, "grad_norm": 544.3113403320312, "learning_rate": 1.967643732989686e-06, "loss": 52.0312, "step": 11490 }, { "epoch": 0.10877405552768338, "grad_norm": 562.1798706054688, "learning_rate": 1.9676359968043253e-06, "loss": 48.3438, "step": 11491 }, { "epoch": 0.10878352154939844, "grad_norm": 241.47555541992188, "learning_rate": 1.96762825970945e-06, "loss": 11.9219, "step": 11492 }, { "epoch": 0.10879298757111348, "grad_norm": 270.0235290527344, "learning_rate": 1.9676205217050666e-06, "loss": 18.7734, "step": 11493 }, { "epoch": 0.10880245359282854, "grad_norm": 294.74530029296875, "learning_rate": 1.9676127827911834e-06, "loss": 31.7578, "step": 11494 }, { "epoch": 0.10881191961454359, "grad_norm": 3.1433558464050293, "learning_rate": 1.967605042967807e-06, "loss": 0.9985, "step": 11495 }, { "epoch": 0.10882138563625865, "grad_norm": 335.44659423828125, "learning_rate": 1.9675973022349447e-06, "loss": 30.7969, "step": 11496 }, { "epoch": 0.10883085165797371, "grad_norm": 256.3725280761719, "learning_rate": 1.9675895605926036e-06, "loss": 20.0391, "step": 11497 }, { "epoch": 0.10884031767968876, "grad_norm": 298.67584228515625, "learning_rate": 1.9675818180407913e-06, "loss": 34.0156, "step": 11498 }, { "epoch": 0.10884978370140382, "grad_norm": 206.30792236328125, "learning_rate": 1.967574074579515e-06, "loss": 17.3828, "step": 11499 }, { "epoch": 0.10885924972311886, "grad_norm": 660.9520874023438, "learning_rate": 1.9675663302087827e-06, "loss": 42.5469, "step": 11500 }, { "epoch": 0.10886871574483392, "grad_norm": 292.3359069824219, "learning_rate": 1.9675585849286007e-06, "loss": 27.5938, "step": 11501 }, { "epoch": 0.10887818176654897, "grad_norm": 184.54737854003906, "learning_rate": 1.9675508387389763e-06, "loss": 20.5234, "step": 11502 }, { "epoch": 0.10888764778826403, "grad_norm": 433.817626953125, "learning_rate": 1.9675430916399172e-06, "loss": 25.1719, "step": 11503 }, { "epoch": 0.10889711380997907, "grad_norm": 225.734619140625, "learning_rate": 1.9675353436314307e-06, "loss": 16.0469, "step": 11504 }, { "epoch": 0.10890657983169413, "grad_norm": 388.60565185546875, "learning_rate": 1.9675275947135233e-06, "loss": 18.5312, "step": 11505 }, { "epoch": 0.1089160458534092, "grad_norm": 549.512939453125, "learning_rate": 1.9675198448862033e-06, "loss": 43.0703, "step": 11506 }, { "epoch": 0.10892551187512424, "grad_norm": 647.7848510742188, "learning_rate": 1.967512094149478e-06, "loss": 40.4688, "step": 11507 }, { "epoch": 0.1089349778968393, "grad_norm": 304.9736328125, "learning_rate": 1.9675043425033537e-06, "loss": 33.9922, "step": 11508 }, { "epoch": 0.10894444391855435, "grad_norm": 220.94847106933594, "learning_rate": 1.9674965899478384e-06, "loss": 17.8203, "step": 11509 }, { "epoch": 0.1089539099402694, "grad_norm": 363.2182922363281, "learning_rate": 1.9674888364829396e-06, "loss": 35.6406, "step": 11510 }, { "epoch": 0.10896337596198445, "grad_norm": 484.2433166503906, "learning_rate": 1.9674810821086637e-06, "loss": 37.0156, "step": 11511 }, { "epoch": 0.10897284198369951, "grad_norm": 279.92529296875, "learning_rate": 1.967473326825019e-06, "loss": 25.7344, "step": 11512 }, { "epoch": 0.10898230800541456, "grad_norm": 262.6466064453125, "learning_rate": 1.967465570632012e-06, "loss": 12.3203, "step": 11513 }, { "epoch": 0.10899177402712962, "grad_norm": 3.218191146850586, "learning_rate": 1.96745781352965e-06, "loss": 1.0122, "step": 11514 }, { "epoch": 0.10900124004884468, "grad_norm": 490.5176696777344, "learning_rate": 1.967450055517941e-06, "loss": 24.3711, "step": 11515 }, { "epoch": 0.10901070607055972, "grad_norm": 376.39208984375, "learning_rate": 1.967442296596892e-06, "loss": 29.2578, "step": 11516 }, { "epoch": 0.10902017209227478, "grad_norm": 538.0573120117188, "learning_rate": 1.96743453676651e-06, "loss": 54.6953, "step": 11517 }, { "epoch": 0.10902963811398983, "grad_norm": 378.45849609375, "learning_rate": 1.9674267760268024e-06, "loss": 20.8047, "step": 11518 }, { "epoch": 0.10903910413570489, "grad_norm": 273.3712158203125, "learning_rate": 1.967419014377777e-06, "loss": 44.5391, "step": 11519 }, { "epoch": 0.10904857015741994, "grad_norm": 313.4364318847656, "learning_rate": 1.9674112518194403e-06, "loss": 21.8828, "step": 11520 }, { "epoch": 0.109058036179135, "grad_norm": 623.07421875, "learning_rate": 1.9674034883518e-06, "loss": 30.0156, "step": 11521 }, { "epoch": 0.10906750220085006, "grad_norm": 472.95703125, "learning_rate": 1.9673957239748628e-06, "loss": 50.875, "step": 11522 }, { "epoch": 0.1090769682225651, "grad_norm": 222.1414031982422, "learning_rate": 1.967387958688637e-06, "loss": 17.4609, "step": 11523 }, { "epoch": 0.10908643424428016, "grad_norm": 479.81671142578125, "learning_rate": 1.9673801924931296e-06, "loss": 46.375, "step": 11524 }, { "epoch": 0.10909590026599521, "grad_norm": 184.447021484375, "learning_rate": 1.967372425388348e-06, "loss": 16.6172, "step": 11525 }, { "epoch": 0.10910536628771027, "grad_norm": 231.922119140625, "learning_rate": 1.967364657374299e-06, "loss": 18.9375, "step": 11526 }, { "epoch": 0.10911483230942531, "grad_norm": 945.27099609375, "learning_rate": 1.96735688845099e-06, "loss": 52.2188, "step": 11527 }, { "epoch": 0.10912429833114037, "grad_norm": 473.9387512207031, "learning_rate": 1.9673491186184284e-06, "loss": 35.1641, "step": 11528 }, { "epoch": 0.10913376435285542, "grad_norm": 285.6877746582031, "learning_rate": 1.967341347876622e-06, "loss": 26.6875, "step": 11529 }, { "epoch": 0.10914323037457048, "grad_norm": 327.85968017578125, "learning_rate": 1.967333576225577e-06, "loss": 23.8594, "step": 11530 }, { "epoch": 0.10915269639628554, "grad_norm": 242.3740997314453, "learning_rate": 1.967325803665302e-06, "loss": 25.8711, "step": 11531 }, { "epoch": 0.10916216241800059, "grad_norm": 4.1103034019470215, "learning_rate": 1.9673180301958033e-06, "loss": 1.0596, "step": 11532 }, { "epoch": 0.10917162843971565, "grad_norm": 255.63111877441406, "learning_rate": 1.9673102558170885e-06, "loss": 28.2188, "step": 11533 }, { "epoch": 0.10918109446143069, "grad_norm": 378.79034423828125, "learning_rate": 1.9673024805291655e-06, "loss": 32.6406, "step": 11534 }, { "epoch": 0.10919056048314575, "grad_norm": 644.8316650390625, "learning_rate": 1.9672947043320408e-06, "loss": 25.0625, "step": 11535 }, { "epoch": 0.1092000265048608, "grad_norm": 995.300537109375, "learning_rate": 1.967286927225722e-06, "loss": 56.0352, "step": 11536 }, { "epoch": 0.10920949252657586, "grad_norm": 568.790283203125, "learning_rate": 1.967279149210216e-06, "loss": 24.5, "step": 11537 }, { "epoch": 0.1092189585482909, "grad_norm": 176.45436096191406, "learning_rate": 1.9672713702855313e-06, "loss": 21.8906, "step": 11538 }, { "epoch": 0.10922842457000596, "grad_norm": 564.360107421875, "learning_rate": 1.967263590451674e-06, "loss": 32.1016, "step": 11539 }, { "epoch": 0.10923789059172102, "grad_norm": 243.02090454101562, "learning_rate": 1.967255809708652e-06, "loss": 7.9375, "step": 11540 }, { "epoch": 0.10924735661343607, "grad_norm": 510.9360656738281, "learning_rate": 1.9672480280564723e-06, "loss": 55.0625, "step": 11541 }, { "epoch": 0.10925682263515113, "grad_norm": 194.0226287841797, "learning_rate": 1.967240245495142e-06, "loss": 21.4844, "step": 11542 }, { "epoch": 0.10926628865686618, "grad_norm": 910.73486328125, "learning_rate": 1.9672324620246696e-06, "loss": 20.9297, "step": 11543 }, { "epoch": 0.10927575467858124, "grad_norm": 3.4038467407226562, "learning_rate": 1.9672246776450614e-06, "loss": 0.8677, "step": 11544 }, { "epoch": 0.10928522070029628, "grad_norm": 227.6520538330078, "learning_rate": 1.967216892356324e-06, "loss": 14.1719, "step": 11545 }, { "epoch": 0.10929468672201134, "grad_norm": 412.49053955078125, "learning_rate": 1.967209106158467e-06, "loss": 37.8125, "step": 11546 }, { "epoch": 0.10930415274372639, "grad_norm": 264.4400329589844, "learning_rate": 1.9672013190514956e-06, "loss": 21.0469, "step": 11547 }, { "epoch": 0.10931361876544145, "grad_norm": 375.57049560546875, "learning_rate": 1.967193531035418e-06, "loss": 13.5781, "step": 11548 }, { "epoch": 0.10932308478715651, "grad_norm": 598.000732421875, "learning_rate": 1.9671857421102415e-06, "loss": 39.3281, "step": 11549 }, { "epoch": 0.10933255080887155, "grad_norm": 280.05438232421875, "learning_rate": 1.9671779522759736e-06, "loss": 28.3594, "step": 11550 }, { "epoch": 0.10934201683058661, "grad_norm": 3.0023956298828125, "learning_rate": 1.9671701615326207e-06, "loss": 0.8809, "step": 11551 }, { "epoch": 0.10935148285230166, "grad_norm": 577.3534545898438, "learning_rate": 1.967162369880191e-06, "loss": 47.7344, "step": 11552 }, { "epoch": 0.10936094887401672, "grad_norm": 289.90289306640625, "learning_rate": 1.9671545773186918e-06, "loss": 32.2656, "step": 11553 }, { "epoch": 0.10937041489573177, "grad_norm": 709.934326171875, "learning_rate": 1.96714678384813e-06, "loss": 33.5, "step": 11554 }, { "epoch": 0.10937988091744683, "grad_norm": 413.7619934082031, "learning_rate": 1.9671389894685135e-06, "loss": 50.4062, "step": 11555 }, { "epoch": 0.10938934693916187, "grad_norm": 600.8081665039062, "learning_rate": 1.967131194179849e-06, "loss": 28.7734, "step": 11556 }, { "epoch": 0.10939881296087693, "grad_norm": 882.328857421875, "learning_rate": 1.967123397982144e-06, "loss": 50.1094, "step": 11557 }, { "epoch": 0.10940827898259199, "grad_norm": 2.7335166931152344, "learning_rate": 1.967115600875406e-06, "loss": 0.812, "step": 11558 }, { "epoch": 0.10941774500430704, "grad_norm": 289.65081787109375, "learning_rate": 1.967107802859642e-06, "loss": 19.8203, "step": 11559 }, { "epoch": 0.1094272110260221, "grad_norm": 262.42901611328125, "learning_rate": 1.9671000039348604e-06, "loss": 22.5156, "step": 11560 }, { "epoch": 0.10943667704773714, "grad_norm": 508.3982238769531, "learning_rate": 1.9670922041010666e-06, "loss": 42.3281, "step": 11561 }, { "epoch": 0.1094461430694522, "grad_norm": 431.0394287109375, "learning_rate": 1.96708440335827e-06, "loss": 45.0312, "step": 11562 }, { "epoch": 0.10945560909116725, "grad_norm": 325.6947937011719, "learning_rate": 1.967076601706476e-06, "loss": 37.7656, "step": 11563 }, { "epoch": 0.10946507511288231, "grad_norm": 477.89453125, "learning_rate": 1.9670687991456937e-06, "loss": 10.1406, "step": 11564 }, { "epoch": 0.10947454113459737, "grad_norm": 248.9674072265625, "learning_rate": 1.9670609956759296e-06, "loss": 20.2812, "step": 11565 }, { "epoch": 0.10948400715631242, "grad_norm": 236.36842346191406, "learning_rate": 1.9670531912971907e-06, "loss": 9.1641, "step": 11566 }, { "epoch": 0.10949347317802748, "grad_norm": 799.955078125, "learning_rate": 1.967045386009485e-06, "loss": 62.5156, "step": 11567 }, { "epoch": 0.10950293919974252, "grad_norm": 180.75726318359375, "learning_rate": 1.9670375798128196e-06, "loss": 31.4219, "step": 11568 }, { "epoch": 0.10951240522145758, "grad_norm": 571.602294921875, "learning_rate": 1.9670297727072018e-06, "loss": 22.8672, "step": 11569 }, { "epoch": 0.10952187124317263, "grad_norm": 559.058349609375, "learning_rate": 1.9670219646926383e-06, "loss": 32.1719, "step": 11570 }, { "epoch": 0.10953133726488769, "grad_norm": 459.8942565917969, "learning_rate": 1.967014155769138e-06, "loss": 23.5195, "step": 11571 }, { "epoch": 0.10954080328660273, "grad_norm": 354.49169921875, "learning_rate": 1.9670063459367066e-06, "loss": 21.25, "step": 11572 }, { "epoch": 0.1095502693083178, "grad_norm": 558.8107299804688, "learning_rate": 1.9669985351953523e-06, "loss": 33.1562, "step": 11573 }, { "epoch": 0.10955973533003285, "grad_norm": 508.0396423339844, "learning_rate": 1.9669907235450825e-06, "loss": 26.0312, "step": 11574 }, { "epoch": 0.1095692013517479, "grad_norm": 500.3486328125, "learning_rate": 1.966982910985904e-06, "loss": 20.6562, "step": 11575 }, { "epoch": 0.10957866737346296, "grad_norm": 770.6831665039062, "learning_rate": 1.966975097517825e-06, "loss": 36.2578, "step": 11576 }, { "epoch": 0.109588133395178, "grad_norm": 325.6020812988281, "learning_rate": 1.966967283140852e-06, "loss": 18.5156, "step": 11577 }, { "epoch": 0.10959759941689307, "grad_norm": 184.6699676513672, "learning_rate": 1.9669594678549925e-06, "loss": 17.1875, "step": 11578 }, { "epoch": 0.10960706543860811, "grad_norm": 272.4907531738281, "learning_rate": 1.966951651660254e-06, "loss": 33.6406, "step": 11579 }, { "epoch": 0.10961653146032317, "grad_norm": 489.2724609375, "learning_rate": 1.966943834556644e-06, "loss": 40.2969, "step": 11580 }, { "epoch": 0.10962599748203822, "grad_norm": 832.9443969726562, "learning_rate": 1.9669360165441695e-06, "loss": 24.3438, "step": 11581 }, { "epoch": 0.10963546350375328, "grad_norm": 313.80914306640625, "learning_rate": 1.9669281976228384e-06, "loss": 16.6484, "step": 11582 }, { "epoch": 0.10964492952546834, "grad_norm": 2.9804930686950684, "learning_rate": 1.9669203777926575e-06, "loss": 0.9102, "step": 11583 }, { "epoch": 0.10965439554718338, "grad_norm": 622.3955078125, "learning_rate": 1.9669125570536344e-06, "loss": 39.5898, "step": 11584 }, { "epoch": 0.10966386156889844, "grad_norm": 328.3230285644531, "learning_rate": 1.9669047354057763e-06, "loss": 41.9688, "step": 11585 }, { "epoch": 0.10967332759061349, "grad_norm": 447.8307189941406, "learning_rate": 1.966896912849091e-06, "loss": 20.5, "step": 11586 }, { "epoch": 0.10968279361232855, "grad_norm": 268.0477294921875, "learning_rate": 1.966889089383585e-06, "loss": 27.4844, "step": 11587 }, { "epoch": 0.1096922596340436, "grad_norm": 459.071533203125, "learning_rate": 1.9668812650092664e-06, "loss": 40.4453, "step": 11588 }, { "epoch": 0.10970172565575866, "grad_norm": 226.7638397216797, "learning_rate": 1.9668734397261424e-06, "loss": 18.4453, "step": 11589 }, { "epoch": 0.1097111916774737, "grad_norm": 1165.430908203125, "learning_rate": 1.96686561353422e-06, "loss": 30.3281, "step": 11590 }, { "epoch": 0.10972065769918876, "grad_norm": 460.531005859375, "learning_rate": 1.966857786433507e-06, "loss": 30.0, "step": 11591 }, { "epoch": 0.10973012372090382, "grad_norm": 565.0679931640625, "learning_rate": 1.9668499584240104e-06, "loss": 35.5312, "step": 11592 }, { "epoch": 0.10973958974261887, "grad_norm": 226.12013244628906, "learning_rate": 1.9668421295057377e-06, "loss": 17.4023, "step": 11593 }, { "epoch": 0.10974905576433393, "grad_norm": 180.57003784179688, "learning_rate": 1.966834299678697e-06, "loss": 9.3379, "step": 11594 }, { "epoch": 0.10975852178604897, "grad_norm": 272.1738586425781, "learning_rate": 1.966826468942894e-06, "loss": 16.9766, "step": 11595 }, { "epoch": 0.10976798780776403, "grad_norm": 418.64141845703125, "learning_rate": 1.9668186372983374e-06, "loss": 42.4688, "step": 11596 }, { "epoch": 0.10977745382947908, "grad_norm": 220.49183654785156, "learning_rate": 1.9668108047450343e-06, "loss": 24.7891, "step": 11597 }, { "epoch": 0.10978691985119414, "grad_norm": 266.99713134765625, "learning_rate": 1.9668029712829923e-06, "loss": 24.2969, "step": 11598 }, { "epoch": 0.10979638587290919, "grad_norm": 426.0027160644531, "learning_rate": 1.9667951369122177e-06, "loss": 26.3906, "step": 11599 }, { "epoch": 0.10980585189462425, "grad_norm": 1140.570068359375, "learning_rate": 1.9667873016327187e-06, "loss": 63.7891, "step": 11600 }, { "epoch": 0.1098153179163393, "grad_norm": 456.0509948730469, "learning_rate": 1.966779465444503e-06, "loss": 37.3281, "step": 11601 }, { "epoch": 0.10982478393805435, "grad_norm": 277.34869384765625, "learning_rate": 1.966771628347577e-06, "loss": 25.2266, "step": 11602 }, { "epoch": 0.10983424995976941, "grad_norm": 913.8724975585938, "learning_rate": 1.9667637903419486e-06, "loss": 65.4141, "step": 11603 }, { "epoch": 0.10984371598148446, "grad_norm": 349.3119201660156, "learning_rate": 1.9667559514276254e-06, "loss": 21.4375, "step": 11604 }, { "epoch": 0.10985318200319952, "grad_norm": 359.8923034667969, "learning_rate": 1.966748111604614e-06, "loss": 32.9844, "step": 11605 }, { "epoch": 0.10986264802491456, "grad_norm": 549.0721435546875, "learning_rate": 1.966740270872923e-06, "loss": 35.6602, "step": 11606 }, { "epoch": 0.10987211404662962, "grad_norm": 442.5870056152344, "learning_rate": 1.9667324292325588e-06, "loss": 42.082, "step": 11607 }, { "epoch": 0.10988158006834468, "grad_norm": 287.224365234375, "learning_rate": 1.9667245866835287e-06, "loss": 13.5312, "step": 11608 }, { "epoch": 0.10989104609005973, "grad_norm": 3.158830165863037, "learning_rate": 1.9667167432258406e-06, "loss": 0.9302, "step": 11609 }, { "epoch": 0.10990051211177479, "grad_norm": 237.39846801757812, "learning_rate": 1.9667088988595017e-06, "loss": 22.0469, "step": 11610 }, { "epoch": 0.10990997813348984, "grad_norm": 403.6527404785156, "learning_rate": 1.966701053584519e-06, "loss": 34.7344, "step": 11611 }, { "epoch": 0.1099194441552049, "grad_norm": 305.6593017578125, "learning_rate": 1.9666932074009005e-06, "loss": 17.2227, "step": 11612 }, { "epoch": 0.10992891017691994, "grad_norm": 3.8422977924346924, "learning_rate": 1.966685360308653e-06, "loss": 1.0122, "step": 11613 }, { "epoch": 0.109938376198635, "grad_norm": 2.7512478828430176, "learning_rate": 1.966677512307784e-06, "loss": 0.8711, "step": 11614 }, { "epoch": 0.10994784222035005, "grad_norm": 161.09190368652344, "learning_rate": 1.966669663398302e-06, "loss": 18.8047, "step": 11615 }, { "epoch": 0.10995730824206511, "grad_norm": 484.0468444824219, "learning_rate": 1.9666618135802125e-06, "loss": 18.332, "step": 11616 }, { "epoch": 0.10996677426378017, "grad_norm": 1319.0252685546875, "learning_rate": 1.966653962853524e-06, "loss": 72.4219, "step": 11617 }, { "epoch": 0.10997624028549521, "grad_norm": 360.25128173828125, "learning_rate": 1.9666461112182436e-06, "loss": 42.125, "step": 11618 }, { "epoch": 0.10998570630721027, "grad_norm": 442.1338195800781, "learning_rate": 1.966638258674379e-06, "loss": 46.7461, "step": 11619 }, { "epoch": 0.10999517232892532, "grad_norm": 254.5100555419922, "learning_rate": 1.966630405221937e-06, "loss": 16.2266, "step": 11620 }, { "epoch": 0.11000463835064038, "grad_norm": 157.24710083007812, "learning_rate": 1.9666225508609258e-06, "loss": 18.7734, "step": 11621 }, { "epoch": 0.11001410437235543, "grad_norm": 550.8811645507812, "learning_rate": 1.966614695591352e-06, "loss": 46.832, "step": 11622 }, { "epoch": 0.11002357039407049, "grad_norm": 978.1143188476562, "learning_rate": 1.9666068394132228e-06, "loss": 37.7344, "step": 11623 }, { "epoch": 0.11003303641578553, "grad_norm": 206.0538787841797, "learning_rate": 1.9665989823265467e-06, "loss": 17.2422, "step": 11624 }, { "epoch": 0.11004250243750059, "grad_norm": 306.9471130371094, "learning_rate": 1.9665911243313304e-06, "loss": 18.9375, "step": 11625 }, { "epoch": 0.11005196845921565, "grad_norm": 289.1575012207031, "learning_rate": 1.966583265427581e-06, "loss": 44.6719, "step": 11626 }, { "epoch": 0.1100614344809307, "grad_norm": 222.7789306640625, "learning_rate": 1.9665754056153063e-06, "loss": 20.1172, "step": 11627 }, { "epoch": 0.11007090050264576, "grad_norm": 751.2871704101562, "learning_rate": 1.9665675448945137e-06, "loss": 36.5, "step": 11628 }, { "epoch": 0.1100803665243608, "grad_norm": 378.0991516113281, "learning_rate": 1.96655968326521e-06, "loss": 30.0234, "step": 11629 }, { "epoch": 0.11008983254607586, "grad_norm": 197.96597290039062, "learning_rate": 1.966551820727404e-06, "loss": 21.1719, "step": 11630 }, { "epoch": 0.11009929856779091, "grad_norm": 371.2069091796875, "learning_rate": 1.9665439572811017e-06, "loss": 26.2734, "step": 11631 }, { "epoch": 0.11010876458950597, "grad_norm": 3.2116434574127197, "learning_rate": 1.9665360929263108e-06, "loss": 1.0586, "step": 11632 }, { "epoch": 0.11011823061122102, "grad_norm": 362.7815246582031, "learning_rate": 1.966528227663039e-06, "loss": 21.7578, "step": 11633 }, { "epoch": 0.11012769663293608, "grad_norm": 279.2822265625, "learning_rate": 1.9665203614912937e-06, "loss": 13.0078, "step": 11634 }, { "epoch": 0.11013716265465114, "grad_norm": 347.5174560546875, "learning_rate": 1.966512494411082e-06, "loss": 10.7305, "step": 11635 }, { "epoch": 0.11014662867636618, "grad_norm": 3.0758490562438965, "learning_rate": 1.9665046264224113e-06, "loss": 0.874, "step": 11636 }, { "epoch": 0.11015609469808124, "grad_norm": 230.75973510742188, "learning_rate": 1.966496757525289e-06, "loss": 17.3516, "step": 11637 }, { "epoch": 0.11016556071979629, "grad_norm": 664.6534423828125, "learning_rate": 1.966488887719723e-06, "loss": 22.4102, "step": 11638 }, { "epoch": 0.11017502674151135, "grad_norm": 763.2094116210938, "learning_rate": 1.9664810170057202e-06, "loss": 20.0469, "step": 11639 }, { "epoch": 0.1101844927632264, "grad_norm": 587.9544067382812, "learning_rate": 1.966473145383288e-06, "loss": 57.6562, "step": 11640 }, { "epoch": 0.11019395878494145, "grad_norm": 430.2040100097656, "learning_rate": 1.966465272852434e-06, "loss": 18.8203, "step": 11641 }, { "epoch": 0.1102034248066565, "grad_norm": 183.57037353515625, "learning_rate": 1.9664573994131656e-06, "loss": 18.1992, "step": 11642 }, { "epoch": 0.11021289082837156, "grad_norm": 347.7760009765625, "learning_rate": 1.96644952506549e-06, "loss": 17.5469, "step": 11643 }, { "epoch": 0.11022235685008662, "grad_norm": 371.8285827636719, "learning_rate": 1.9664416498094147e-06, "loss": 25.0625, "step": 11644 }, { "epoch": 0.11023182287180167, "grad_norm": 299.53643798828125, "learning_rate": 1.966433773644947e-06, "loss": 19.0391, "step": 11645 }, { "epoch": 0.11024128889351673, "grad_norm": 562.6380004882812, "learning_rate": 1.966425896572095e-06, "loss": 45.9531, "step": 11646 }, { "epoch": 0.11025075491523177, "grad_norm": 1181.1348876953125, "learning_rate": 1.9664180185908646e-06, "loss": 36.7578, "step": 11647 }, { "epoch": 0.11026022093694683, "grad_norm": 261.9545593261719, "learning_rate": 1.966410139701265e-06, "loss": 27.9531, "step": 11648 }, { "epoch": 0.11026968695866188, "grad_norm": 3.9275825023651123, "learning_rate": 1.9664022599033024e-06, "loss": 0.9023, "step": 11649 }, { "epoch": 0.11027915298037694, "grad_norm": 269.3408508300781, "learning_rate": 1.9663943791969843e-06, "loss": 17.1484, "step": 11650 }, { "epoch": 0.110288619002092, "grad_norm": 238.88340759277344, "learning_rate": 1.9663864975823186e-06, "loss": 18.0938, "step": 11651 }, { "epoch": 0.11029808502380704, "grad_norm": 1023.1051025390625, "learning_rate": 1.9663786150593126e-06, "loss": 65.2578, "step": 11652 }, { "epoch": 0.1103075510455221, "grad_norm": 356.8802490234375, "learning_rate": 1.966370731627973e-06, "loss": 24.7188, "step": 11653 }, { "epoch": 0.11031701706723715, "grad_norm": 447.6301574707031, "learning_rate": 1.9663628472883084e-06, "loss": 14.7031, "step": 11654 }, { "epoch": 0.11032648308895221, "grad_norm": 422.50262451171875, "learning_rate": 1.9663549620403254e-06, "loss": 43.5, "step": 11655 }, { "epoch": 0.11033594911066726, "grad_norm": 806.9295043945312, "learning_rate": 1.9663470758840317e-06, "loss": 61.0859, "step": 11656 }, { "epoch": 0.11034541513238232, "grad_norm": 227.2339630126953, "learning_rate": 1.9663391888194345e-06, "loss": 19.9141, "step": 11657 }, { "epoch": 0.11035488115409736, "grad_norm": 295.7070007324219, "learning_rate": 1.966331300846541e-06, "loss": 27.1016, "step": 11658 }, { "epoch": 0.11036434717581242, "grad_norm": 188.67742919921875, "learning_rate": 1.9663234119653597e-06, "loss": 11.2461, "step": 11659 }, { "epoch": 0.11037381319752748, "grad_norm": 245.34625244140625, "learning_rate": 1.966315522175897e-06, "loss": 21.6172, "step": 11660 }, { "epoch": 0.11038327921924253, "grad_norm": 338.973388671875, "learning_rate": 1.96630763147816e-06, "loss": 19.5469, "step": 11661 }, { "epoch": 0.11039274524095759, "grad_norm": 500.7450256347656, "learning_rate": 1.966299739872157e-06, "loss": 45.9609, "step": 11662 }, { "epoch": 0.11040221126267263, "grad_norm": 1381.1904296875, "learning_rate": 1.9662918473578954e-06, "loss": 54.1484, "step": 11663 }, { "epoch": 0.1104116772843877, "grad_norm": 239.47686767578125, "learning_rate": 1.966283953935382e-06, "loss": 20.2109, "step": 11664 }, { "epoch": 0.11042114330610274, "grad_norm": 395.47686767578125, "learning_rate": 1.966276059604625e-06, "loss": 50.6094, "step": 11665 }, { "epoch": 0.1104306093278178, "grad_norm": 546.701904296875, "learning_rate": 1.9662681643656305e-06, "loss": 41.3203, "step": 11666 }, { "epoch": 0.11044007534953285, "grad_norm": 3.654832124710083, "learning_rate": 1.9662602682184074e-06, "loss": 1.0054, "step": 11667 }, { "epoch": 0.1104495413712479, "grad_norm": 687.7979125976562, "learning_rate": 1.9662523711629626e-06, "loss": 44.5625, "step": 11668 }, { "epoch": 0.11045900739296297, "grad_norm": 301.586181640625, "learning_rate": 1.9662444731993032e-06, "loss": 31.2656, "step": 11669 }, { "epoch": 0.11046847341467801, "grad_norm": 291.43878173828125, "learning_rate": 1.966236574327437e-06, "loss": 17.6016, "step": 11670 }, { "epoch": 0.11047793943639307, "grad_norm": 206.71287536621094, "learning_rate": 1.9662286745473713e-06, "loss": 24.5664, "step": 11671 }, { "epoch": 0.11048740545810812, "grad_norm": 2.7949883937835693, "learning_rate": 1.966220773859113e-06, "loss": 0.845, "step": 11672 }, { "epoch": 0.11049687147982318, "grad_norm": 466.47650146484375, "learning_rate": 1.9662128722626704e-06, "loss": 48.4375, "step": 11673 }, { "epoch": 0.11050633750153822, "grad_norm": 438.54937744140625, "learning_rate": 1.966204969758051e-06, "loss": 9.4336, "step": 11674 }, { "epoch": 0.11051580352325328, "grad_norm": 2.5155436992645264, "learning_rate": 1.966197066345261e-06, "loss": 0.7583, "step": 11675 }, { "epoch": 0.11052526954496833, "grad_norm": 238.4827423095703, "learning_rate": 1.9661891620243088e-06, "loss": 22.7539, "step": 11676 }, { "epoch": 0.11053473556668339, "grad_norm": 648.9135131835938, "learning_rate": 1.9661812567952023e-06, "loss": 37.2344, "step": 11677 }, { "epoch": 0.11054420158839845, "grad_norm": 1042.5692138671875, "learning_rate": 1.9661733506579473e-06, "loss": 58.5312, "step": 11678 }, { "epoch": 0.1105536676101135, "grad_norm": 438.1525573730469, "learning_rate": 1.966165443612553e-06, "loss": 27.7344, "step": 11679 }, { "epoch": 0.11056313363182856, "grad_norm": 398.8329772949219, "learning_rate": 1.9661575356590256e-06, "loss": 22.6328, "step": 11680 }, { "epoch": 0.1105725996535436, "grad_norm": 414.911376953125, "learning_rate": 1.966149626797373e-06, "loss": 43.8594, "step": 11681 }, { "epoch": 0.11058206567525866, "grad_norm": 4.330409049987793, "learning_rate": 1.966141717027603e-06, "loss": 1.0498, "step": 11682 }, { "epoch": 0.11059153169697371, "grad_norm": 381.53631591796875, "learning_rate": 1.9661338063497223e-06, "loss": 23.957, "step": 11683 }, { "epoch": 0.11060099771868877, "grad_norm": 355.8726806640625, "learning_rate": 1.9661258947637385e-06, "loss": 37.4922, "step": 11684 }, { "epoch": 0.11061046374040381, "grad_norm": 3.0289359092712402, "learning_rate": 1.966117982269659e-06, "loss": 0.9995, "step": 11685 }, { "epoch": 0.11061992976211887, "grad_norm": 261.149658203125, "learning_rate": 1.9661100688674924e-06, "loss": 8.25, "step": 11686 }, { "epoch": 0.11062939578383393, "grad_norm": 395.0982971191406, "learning_rate": 1.9661021545572444e-06, "loss": 27.1562, "step": 11687 }, { "epoch": 0.11063886180554898, "grad_norm": 297.8149719238281, "learning_rate": 1.966094239338924e-06, "loss": 30.7031, "step": 11688 }, { "epoch": 0.11064832782726404, "grad_norm": 480.4171142578125, "learning_rate": 1.966086323212537e-06, "loss": 44.0938, "step": 11689 }, { "epoch": 0.11065779384897909, "grad_norm": 2.9270427227020264, "learning_rate": 1.966078406178092e-06, "loss": 0.8354, "step": 11690 }, { "epoch": 0.11066725987069415, "grad_norm": 152.99765014648438, "learning_rate": 1.966070488235596e-06, "loss": 15.9688, "step": 11691 }, { "epoch": 0.11067672589240919, "grad_norm": 446.4729919433594, "learning_rate": 1.966062569385057e-06, "loss": 31.1484, "step": 11692 }, { "epoch": 0.11068619191412425, "grad_norm": 252.77015686035156, "learning_rate": 1.9660546496264815e-06, "loss": 19.7422, "step": 11693 }, { "epoch": 0.11069565793583931, "grad_norm": 160.29949951171875, "learning_rate": 1.966046728959878e-06, "loss": 18.4766, "step": 11694 }, { "epoch": 0.11070512395755436, "grad_norm": 195.86155700683594, "learning_rate": 1.966038807385253e-06, "loss": 20.1953, "step": 11695 }, { "epoch": 0.11071458997926942, "grad_norm": 286.2187194824219, "learning_rate": 1.966030884902615e-06, "loss": 20.3984, "step": 11696 }, { "epoch": 0.11072405600098446, "grad_norm": 363.47283935546875, "learning_rate": 1.96602296151197e-06, "loss": 18.6953, "step": 11697 }, { "epoch": 0.11073352202269952, "grad_norm": 753.0455932617188, "learning_rate": 1.9660150372133266e-06, "loss": 47.5625, "step": 11698 }, { "epoch": 0.11074298804441457, "grad_norm": 242.1217498779297, "learning_rate": 1.9660071120066923e-06, "loss": 19.8828, "step": 11699 }, { "epoch": 0.11075245406612963, "grad_norm": 186.0167236328125, "learning_rate": 1.9659991858920737e-06, "loss": 17.7109, "step": 11700 }, { "epoch": 0.11076192008784468, "grad_norm": 208.97320556640625, "learning_rate": 1.9659912588694786e-06, "loss": 16.8828, "step": 11701 }, { "epoch": 0.11077138610955974, "grad_norm": 275.5387268066406, "learning_rate": 1.965983330938915e-06, "loss": 20.8906, "step": 11702 }, { "epoch": 0.1107808521312748, "grad_norm": 3.3300933837890625, "learning_rate": 1.9659754021003897e-06, "loss": 0.9321, "step": 11703 }, { "epoch": 0.11079031815298984, "grad_norm": 274.82476806640625, "learning_rate": 1.9659674723539103e-06, "loss": 21.8359, "step": 11704 }, { "epoch": 0.1107997841747049, "grad_norm": 430.6251220703125, "learning_rate": 1.9659595416994845e-06, "loss": 25.9453, "step": 11705 }, { "epoch": 0.11080925019641995, "grad_norm": 3.1388654708862305, "learning_rate": 1.9659516101371193e-06, "loss": 0.9863, "step": 11706 }, { "epoch": 0.11081871621813501, "grad_norm": 332.71490478515625, "learning_rate": 1.9659436776668225e-06, "loss": 22.4766, "step": 11707 }, { "epoch": 0.11082818223985005, "grad_norm": 3.371743679046631, "learning_rate": 1.9659357442886017e-06, "loss": 0.9219, "step": 11708 }, { "epoch": 0.11083764826156511, "grad_norm": 353.05029296875, "learning_rate": 1.965927810002464e-06, "loss": 40.1406, "step": 11709 }, { "epoch": 0.11084711428328016, "grad_norm": 355.4607849121094, "learning_rate": 1.9659198748084167e-06, "loss": 30.2891, "step": 11710 }, { "epoch": 0.11085658030499522, "grad_norm": 618.358642578125, "learning_rate": 1.965911938706468e-06, "loss": 32.8672, "step": 11711 }, { "epoch": 0.11086604632671028, "grad_norm": 527.87841796875, "learning_rate": 1.9659040016966246e-06, "loss": 38.2891, "step": 11712 }, { "epoch": 0.11087551234842533, "grad_norm": 3.5069689750671387, "learning_rate": 1.9658960637788946e-06, "loss": 1.0132, "step": 11713 }, { "epoch": 0.11088497837014039, "grad_norm": 602.0235595703125, "learning_rate": 1.9658881249532846e-06, "loss": 19.6094, "step": 11714 }, { "epoch": 0.11089444439185543, "grad_norm": 512.498291015625, "learning_rate": 1.965880185219803e-06, "loss": 41.9062, "step": 11715 }, { "epoch": 0.11090391041357049, "grad_norm": 819.5086059570312, "learning_rate": 1.9658722445784572e-06, "loss": 52.1094, "step": 11716 }, { "epoch": 0.11091337643528554, "grad_norm": 3.581167221069336, "learning_rate": 1.965864303029254e-06, "loss": 0.8657, "step": 11717 }, { "epoch": 0.1109228424570006, "grad_norm": 194.72799682617188, "learning_rate": 1.9658563605722006e-06, "loss": 21.1172, "step": 11718 }, { "epoch": 0.11093230847871564, "grad_norm": 678.2125244140625, "learning_rate": 1.965848417207306e-06, "loss": 47.6094, "step": 11719 }, { "epoch": 0.1109417745004307, "grad_norm": 430.13421630859375, "learning_rate": 1.965840472934576e-06, "loss": 31.6172, "step": 11720 }, { "epoch": 0.11095124052214576, "grad_norm": 826.6094360351562, "learning_rate": 1.9658325277540192e-06, "loss": 31.9805, "step": 11721 }, { "epoch": 0.11096070654386081, "grad_norm": 2.929476499557495, "learning_rate": 1.9658245816656425e-06, "loss": 0.8794, "step": 11722 }, { "epoch": 0.11097017256557587, "grad_norm": 316.6615295410156, "learning_rate": 1.9658166346694536e-06, "loss": 24.0469, "step": 11723 }, { "epoch": 0.11097963858729092, "grad_norm": 3.102555990219116, "learning_rate": 1.9658086867654597e-06, "loss": 0.9526, "step": 11724 }, { "epoch": 0.11098910460900598, "grad_norm": 176.4745330810547, "learning_rate": 1.9658007379536684e-06, "loss": 17.9609, "step": 11725 }, { "epoch": 0.11099857063072102, "grad_norm": 550.6502685546875, "learning_rate": 1.9657927882340877e-06, "loss": 34.9297, "step": 11726 }, { "epoch": 0.11100803665243608, "grad_norm": 330.0228576660156, "learning_rate": 1.9657848376067244e-06, "loss": 20.5859, "step": 11727 }, { "epoch": 0.11101750267415113, "grad_norm": 427.6939697265625, "learning_rate": 1.9657768860715862e-06, "loss": 40.7578, "step": 11728 }, { "epoch": 0.11102696869586619, "grad_norm": 351.5674743652344, "learning_rate": 1.9657689336286803e-06, "loss": 22.6797, "step": 11729 }, { "epoch": 0.11103643471758125, "grad_norm": 297.2141418457031, "learning_rate": 1.9657609802780146e-06, "loss": 16.2109, "step": 11730 }, { "epoch": 0.1110459007392963, "grad_norm": 311.5366516113281, "learning_rate": 1.9657530260195965e-06, "loss": 17.9062, "step": 11731 }, { "epoch": 0.11105536676101135, "grad_norm": 335.42236328125, "learning_rate": 1.9657450708534334e-06, "loss": 25.8047, "step": 11732 }, { "epoch": 0.1110648327827264, "grad_norm": 242.67848205566406, "learning_rate": 1.9657371147795327e-06, "loss": 17.7656, "step": 11733 }, { "epoch": 0.11107429880444146, "grad_norm": 401.8386535644531, "learning_rate": 1.965729157797902e-06, "loss": 30.1094, "step": 11734 }, { "epoch": 0.1110837648261565, "grad_norm": 454.2254333496094, "learning_rate": 1.9657211999085487e-06, "loss": 47.2031, "step": 11735 }, { "epoch": 0.11109323084787157, "grad_norm": 703.9826049804688, "learning_rate": 1.9657132411114803e-06, "loss": 49.7344, "step": 11736 }, { "epoch": 0.11110269686958663, "grad_norm": 390.07159423828125, "learning_rate": 1.965705281406704e-06, "loss": 30.9375, "step": 11737 }, { "epoch": 0.11111216289130167, "grad_norm": 240.27542114257812, "learning_rate": 1.9656973207942277e-06, "loss": 19.6719, "step": 11738 }, { "epoch": 0.11112162891301673, "grad_norm": 651.0078125, "learning_rate": 1.965689359274059e-06, "loss": 27.9844, "step": 11739 }, { "epoch": 0.11113109493473178, "grad_norm": 532.5540771484375, "learning_rate": 1.965681396846205e-06, "loss": 27.3281, "step": 11740 }, { "epoch": 0.11114056095644684, "grad_norm": 296.7336120605469, "learning_rate": 1.965673433510673e-06, "loss": 30.3672, "step": 11741 }, { "epoch": 0.11115002697816188, "grad_norm": 587.2009887695312, "learning_rate": 1.965665469267471e-06, "loss": 55.1719, "step": 11742 }, { "epoch": 0.11115949299987694, "grad_norm": 720.6033325195312, "learning_rate": 1.9656575041166065e-06, "loss": 55.0469, "step": 11743 }, { "epoch": 0.11116895902159199, "grad_norm": 611.6846923828125, "learning_rate": 1.9656495380580863e-06, "loss": 53.7812, "step": 11744 }, { "epoch": 0.11117842504330705, "grad_norm": 386.1188659667969, "learning_rate": 1.9656415710919187e-06, "loss": 33.3125, "step": 11745 }, { "epoch": 0.11118789106502211, "grad_norm": 753.240966796875, "learning_rate": 1.965633603218111e-06, "loss": 38.8594, "step": 11746 }, { "epoch": 0.11119735708673716, "grad_norm": 377.1036071777344, "learning_rate": 1.9656256344366704e-06, "loss": 20.5547, "step": 11747 }, { "epoch": 0.11120682310845222, "grad_norm": 290.7760314941406, "learning_rate": 1.965617664747604e-06, "loss": 32.0781, "step": 11748 }, { "epoch": 0.11121628913016726, "grad_norm": 495.35089111328125, "learning_rate": 1.96560969415092e-06, "loss": 26.1953, "step": 11749 }, { "epoch": 0.11122575515188232, "grad_norm": 3.341252088546753, "learning_rate": 1.965601722646626e-06, "loss": 0.9492, "step": 11750 }, { "epoch": 0.11123522117359737, "grad_norm": 280.80511474609375, "learning_rate": 1.9655937502347296e-06, "loss": 25.7266, "step": 11751 }, { "epoch": 0.11124468719531243, "grad_norm": 636.5368041992188, "learning_rate": 1.9655857769152372e-06, "loss": 47.5781, "step": 11752 }, { "epoch": 0.11125415321702747, "grad_norm": 212.32244873046875, "learning_rate": 1.965577802688157e-06, "loss": 18.4453, "step": 11753 }, { "epoch": 0.11126361923874253, "grad_norm": 2.9727799892425537, "learning_rate": 1.9655698275534973e-06, "loss": 0.897, "step": 11754 }, { "epoch": 0.1112730852604576, "grad_norm": 207.33998107910156, "learning_rate": 1.965561851511264e-06, "loss": 9.6055, "step": 11755 }, { "epoch": 0.11128255128217264, "grad_norm": 287.4629821777344, "learning_rate": 1.9655538745614656e-06, "loss": 9.375, "step": 11756 }, { "epoch": 0.1112920173038877, "grad_norm": 528.9722900390625, "learning_rate": 1.9655458967041094e-06, "loss": 53.125, "step": 11757 }, { "epoch": 0.11130148332560275, "grad_norm": 360.6204833984375, "learning_rate": 1.9655379179392027e-06, "loss": 24.5859, "step": 11758 }, { "epoch": 0.1113109493473178, "grad_norm": 405.0380859375, "learning_rate": 1.965529938266753e-06, "loss": 10.4844, "step": 11759 }, { "epoch": 0.11132041536903285, "grad_norm": 659.9009399414062, "learning_rate": 1.9655219576867684e-06, "loss": 15.0195, "step": 11760 }, { "epoch": 0.11132988139074791, "grad_norm": 528.8104248046875, "learning_rate": 1.9655139761992556e-06, "loss": 48.7188, "step": 11761 }, { "epoch": 0.11133934741246296, "grad_norm": 258.91217041015625, "learning_rate": 1.9655059938042227e-06, "loss": 25.4375, "step": 11762 }, { "epoch": 0.11134881343417802, "grad_norm": 768.3623046875, "learning_rate": 1.965498010501677e-06, "loss": 20.0078, "step": 11763 }, { "epoch": 0.11135827945589308, "grad_norm": 159.1790008544922, "learning_rate": 1.965490026291626e-06, "loss": 24.7891, "step": 11764 }, { "epoch": 0.11136774547760812, "grad_norm": 428.470703125, "learning_rate": 1.965482041174077e-06, "loss": 22.3203, "step": 11765 }, { "epoch": 0.11137721149932318, "grad_norm": 221.975341796875, "learning_rate": 1.965474055149038e-06, "loss": 23.4531, "step": 11766 }, { "epoch": 0.11138667752103823, "grad_norm": 523.70263671875, "learning_rate": 1.9654660682165162e-06, "loss": 36.918, "step": 11767 }, { "epoch": 0.11139614354275329, "grad_norm": 558.5667724609375, "learning_rate": 1.9654580803765185e-06, "loss": 21.3047, "step": 11768 }, { "epoch": 0.11140560956446834, "grad_norm": 569.7611083984375, "learning_rate": 1.9654500916290537e-06, "loss": 28.3203, "step": 11769 }, { "epoch": 0.1114150755861834, "grad_norm": 292.2543029785156, "learning_rate": 1.965442101974128e-06, "loss": 13.1289, "step": 11770 }, { "epoch": 0.11142454160789844, "grad_norm": 294.1295166015625, "learning_rate": 1.96543411141175e-06, "loss": 22.7539, "step": 11771 }, { "epoch": 0.1114340076296135, "grad_norm": 666.3610229492188, "learning_rate": 1.9654261199419267e-06, "loss": 34.6914, "step": 11772 }, { "epoch": 0.11144347365132856, "grad_norm": 257.47320556640625, "learning_rate": 1.9654181275646657e-06, "loss": 31.8281, "step": 11773 }, { "epoch": 0.11145293967304361, "grad_norm": 612.99755859375, "learning_rate": 1.965410134279974e-06, "loss": 58.1875, "step": 11774 }, { "epoch": 0.11146240569475867, "grad_norm": 487.3499450683594, "learning_rate": 1.96540214008786e-06, "loss": 48.5469, "step": 11775 }, { "epoch": 0.11147187171647371, "grad_norm": 362.91583251953125, "learning_rate": 1.965394144988331e-06, "loss": 18.7266, "step": 11776 }, { "epoch": 0.11148133773818877, "grad_norm": 186.30996704101562, "learning_rate": 1.965386148981394e-06, "loss": 23.0625, "step": 11777 }, { "epoch": 0.11149080375990382, "grad_norm": 269.5071105957031, "learning_rate": 1.9653781520670564e-06, "loss": 23.6875, "step": 11778 }, { "epoch": 0.11150026978161888, "grad_norm": 900.319091796875, "learning_rate": 1.965370154245327e-06, "loss": 45.4219, "step": 11779 }, { "epoch": 0.11150973580333394, "grad_norm": 416.7370910644531, "learning_rate": 1.965362155516212e-06, "loss": 23.8516, "step": 11780 }, { "epoch": 0.11151920182504899, "grad_norm": 486.7370910644531, "learning_rate": 1.965354155879719e-06, "loss": 57.6875, "step": 11781 }, { "epoch": 0.11152866784676405, "grad_norm": 3.192990779876709, "learning_rate": 1.9653461553358565e-06, "loss": 0.9414, "step": 11782 }, { "epoch": 0.11153813386847909, "grad_norm": 442.8413391113281, "learning_rate": 1.9653381538846313e-06, "loss": 15.5352, "step": 11783 }, { "epoch": 0.11154759989019415, "grad_norm": 333.4385681152344, "learning_rate": 1.965330151526051e-06, "loss": 8.5781, "step": 11784 }, { "epoch": 0.1115570659119092, "grad_norm": 2.7087290287017822, "learning_rate": 1.965322148260123e-06, "loss": 0.8677, "step": 11785 }, { "epoch": 0.11156653193362426, "grad_norm": 2.9596996307373047, "learning_rate": 1.965314144086855e-06, "loss": 0.9326, "step": 11786 }, { "epoch": 0.1115759979553393, "grad_norm": 266.14312744140625, "learning_rate": 1.9653061390062545e-06, "loss": 36.7656, "step": 11787 }, { "epoch": 0.11158546397705436, "grad_norm": 512.4434814453125, "learning_rate": 1.9652981330183293e-06, "loss": 39.1172, "step": 11788 }, { "epoch": 0.11159492999876942, "grad_norm": 372.62200927734375, "learning_rate": 1.965290126123086e-06, "loss": 30.6016, "step": 11789 }, { "epoch": 0.11160439602048447, "grad_norm": 369.6979675292969, "learning_rate": 1.9652821183205336e-06, "loss": 17.7656, "step": 11790 }, { "epoch": 0.11161386204219953, "grad_norm": 301.2774963378906, "learning_rate": 1.9652741096106786e-06, "loss": 18.2305, "step": 11791 }, { "epoch": 0.11162332806391458, "grad_norm": 291.8656311035156, "learning_rate": 1.965266099993528e-06, "loss": 23.2266, "step": 11792 }, { "epoch": 0.11163279408562964, "grad_norm": 291.867919921875, "learning_rate": 1.965258089469091e-06, "loss": 21.3633, "step": 11793 }, { "epoch": 0.11164226010734468, "grad_norm": 484.25115966796875, "learning_rate": 1.9652500780373737e-06, "loss": 30.9258, "step": 11794 }, { "epoch": 0.11165172612905974, "grad_norm": 354.8265075683594, "learning_rate": 1.9652420656983844e-06, "loss": 54.3125, "step": 11795 }, { "epoch": 0.11166119215077479, "grad_norm": 269.8039245605469, "learning_rate": 1.96523405245213e-06, "loss": 22.9336, "step": 11796 }, { "epoch": 0.11167065817248985, "grad_norm": 741.570556640625, "learning_rate": 1.965226038298619e-06, "loss": 48.0156, "step": 11797 }, { "epoch": 0.11168012419420491, "grad_norm": 3.0399882793426514, "learning_rate": 1.9652180232378575e-06, "loss": 0.9702, "step": 11798 }, { "epoch": 0.11168959021591995, "grad_norm": 321.8238525390625, "learning_rate": 1.9652100072698544e-06, "loss": 22.3672, "step": 11799 }, { "epoch": 0.11169905623763501, "grad_norm": 477.90716552734375, "learning_rate": 1.9652019903946166e-06, "loss": 27.3359, "step": 11800 }, { "epoch": 0.11170852225935006, "grad_norm": 301.3017272949219, "learning_rate": 1.9651939726121516e-06, "loss": 25.875, "step": 11801 }, { "epoch": 0.11171798828106512, "grad_norm": 373.2320251464844, "learning_rate": 1.965185953922467e-06, "loss": 35.7344, "step": 11802 }, { "epoch": 0.11172745430278017, "grad_norm": 428.96234130859375, "learning_rate": 1.9651779343255703e-06, "loss": 51.5781, "step": 11803 }, { "epoch": 0.11173692032449523, "grad_norm": 434.5393981933594, "learning_rate": 1.9651699138214693e-06, "loss": 39.2734, "step": 11804 }, { "epoch": 0.11174638634621027, "grad_norm": 399.5402526855469, "learning_rate": 1.9651618924101716e-06, "loss": 39.1094, "step": 11805 }, { "epoch": 0.11175585236792533, "grad_norm": 266.142333984375, "learning_rate": 1.965153870091684e-06, "loss": 10.5078, "step": 11806 }, { "epoch": 0.11176531838964039, "grad_norm": 739.8113403320312, "learning_rate": 1.965145846866015e-06, "loss": 38.1953, "step": 11807 }, { "epoch": 0.11177478441135544, "grad_norm": 246.14907836914062, "learning_rate": 1.9651378227331716e-06, "loss": 15.375, "step": 11808 }, { "epoch": 0.1117842504330705, "grad_norm": 324.5140380859375, "learning_rate": 1.9651297976931614e-06, "loss": 33.2656, "step": 11809 }, { "epoch": 0.11179371645478554, "grad_norm": 603.7135009765625, "learning_rate": 1.965121771745992e-06, "loss": 19.3164, "step": 11810 }, { "epoch": 0.1118031824765006, "grad_norm": 2.937551736831665, "learning_rate": 1.965113744891671e-06, "loss": 0.7651, "step": 11811 }, { "epoch": 0.11181264849821565, "grad_norm": 304.43438720703125, "learning_rate": 1.965105717130206e-06, "loss": 22.75, "step": 11812 }, { "epoch": 0.11182211451993071, "grad_norm": 703.4104614257812, "learning_rate": 1.965097688461604e-06, "loss": 25.5078, "step": 11813 }, { "epoch": 0.11183158054164576, "grad_norm": 360.3617858886719, "learning_rate": 1.965089658885873e-06, "loss": 20.4414, "step": 11814 }, { "epoch": 0.11184104656336082, "grad_norm": 326.96905517578125, "learning_rate": 1.9650816284030207e-06, "loss": 33.9375, "step": 11815 }, { "epoch": 0.11185051258507588, "grad_norm": 392.4375305175781, "learning_rate": 1.9650735970130543e-06, "loss": 34.4062, "step": 11816 }, { "epoch": 0.11185997860679092, "grad_norm": 369.2729797363281, "learning_rate": 1.9650655647159813e-06, "loss": 47.4062, "step": 11817 }, { "epoch": 0.11186944462850598, "grad_norm": 183.10122680664062, "learning_rate": 1.9650575315118097e-06, "loss": 17.7969, "step": 11818 }, { "epoch": 0.11187891065022103, "grad_norm": 266.5460510253906, "learning_rate": 1.9650494974005468e-06, "loss": 20.7188, "step": 11819 }, { "epoch": 0.11188837667193609, "grad_norm": 554.1602172851562, "learning_rate": 1.9650414623822004e-06, "loss": 43.4375, "step": 11820 }, { "epoch": 0.11189784269365113, "grad_norm": 3.0168724060058594, "learning_rate": 1.9650334264567775e-06, "loss": 0.9741, "step": 11821 }, { "epoch": 0.1119073087153662, "grad_norm": 252.2909393310547, "learning_rate": 1.965025389624286e-06, "loss": 25.4453, "step": 11822 }, { "epoch": 0.11191677473708125, "grad_norm": 195.7599334716797, "learning_rate": 1.9650173518847333e-06, "loss": 17.4141, "step": 11823 }, { "epoch": 0.1119262407587963, "grad_norm": 324.9488220214844, "learning_rate": 1.9650093132381273e-06, "loss": 49.9062, "step": 11824 }, { "epoch": 0.11193570678051136, "grad_norm": 359.6677551269531, "learning_rate": 1.9650012736844756e-06, "loss": 37.2969, "step": 11825 }, { "epoch": 0.1119451728022264, "grad_norm": 378.8276672363281, "learning_rate": 1.964993233223785e-06, "loss": 20.5156, "step": 11826 }, { "epoch": 0.11195463882394147, "grad_norm": 364.17291259765625, "learning_rate": 1.9649851918560636e-06, "loss": 22.8281, "step": 11827 }, { "epoch": 0.11196410484565651, "grad_norm": 551.3377075195312, "learning_rate": 1.964977149581319e-06, "loss": 36.7812, "step": 11828 }, { "epoch": 0.11197357086737157, "grad_norm": 432.240234375, "learning_rate": 1.9649691063995583e-06, "loss": 27.75, "step": 11829 }, { "epoch": 0.11198303688908662, "grad_norm": 728.6136474609375, "learning_rate": 1.9649610623107898e-06, "loss": 53.4062, "step": 11830 }, { "epoch": 0.11199250291080168, "grad_norm": 531.7673950195312, "learning_rate": 1.9649530173150204e-06, "loss": 29.3906, "step": 11831 }, { "epoch": 0.11200196893251674, "grad_norm": 446.7733154296875, "learning_rate": 1.9649449714122583e-06, "loss": 58.4531, "step": 11832 }, { "epoch": 0.11201143495423178, "grad_norm": 802.1394653320312, "learning_rate": 1.9649369246025108e-06, "loss": 19.2188, "step": 11833 }, { "epoch": 0.11202090097594684, "grad_norm": 3.3575124740600586, "learning_rate": 1.964928876885785e-06, "loss": 0.8633, "step": 11834 }, { "epoch": 0.11203036699766189, "grad_norm": 262.9703674316406, "learning_rate": 1.9649208282620887e-06, "loss": 20.8516, "step": 11835 }, { "epoch": 0.11203983301937695, "grad_norm": 183.20506286621094, "learning_rate": 1.96491277873143e-06, "loss": 14.1211, "step": 11836 }, { "epoch": 0.112049299041092, "grad_norm": 3.117825984954834, "learning_rate": 1.9649047282938156e-06, "loss": 0.9126, "step": 11837 }, { "epoch": 0.11205876506280706, "grad_norm": 1039.4307861328125, "learning_rate": 1.964896676949254e-06, "loss": 50.5, "step": 11838 }, { "epoch": 0.1120682310845221, "grad_norm": 486.6468505859375, "learning_rate": 1.964888624697752e-06, "loss": 53.6562, "step": 11839 }, { "epoch": 0.11207769710623716, "grad_norm": 279.1471252441406, "learning_rate": 1.9648805715393177e-06, "loss": 22.7578, "step": 11840 }, { "epoch": 0.11208716312795222, "grad_norm": 560.871337890625, "learning_rate": 1.9648725174739583e-06, "loss": 16.8477, "step": 11841 }, { "epoch": 0.11209662914966727, "grad_norm": 427.2511901855469, "learning_rate": 1.9648644625016816e-06, "loss": 27.1484, "step": 11842 }, { "epoch": 0.11210609517138233, "grad_norm": 510.57733154296875, "learning_rate": 1.9648564066224947e-06, "loss": 47.1719, "step": 11843 }, { "epoch": 0.11211556119309737, "grad_norm": 562.2008666992188, "learning_rate": 1.964848349836406e-06, "loss": 55.6406, "step": 11844 }, { "epoch": 0.11212502721481243, "grad_norm": 843.8475952148438, "learning_rate": 1.9648402921434224e-06, "loss": 64.5156, "step": 11845 }, { "epoch": 0.11213449323652748, "grad_norm": 3.1747097969055176, "learning_rate": 1.964832233543552e-06, "loss": 0.9487, "step": 11846 }, { "epoch": 0.11214395925824254, "grad_norm": 279.4598388671875, "learning_rate": 1.9648241740368015e-06, "loss": 7.1758, "step": 11847 }, { "epoch": 0.11215342527995759, "grad_norm": 591.0211791992188, "learning_rate": 1.964816113623179e-06, "loss": 47.2031, "step": 11848 }, { "epoch": 0.11216289130167265, "grad_norm": 208.06138610839844, "learning_rate": 1.964808052302693e-06, "loss": 26.2031, "step": 11849 }, { "epoch": 0.1121723573233877, "grad_norm": 165.39552307128906, "learning_rate": 1.9647999900753495e-06, "loss": 18.6953, "step": 11850 }, { "epoch": 0.11218182334510275, "grad_norm": 574.6751098632812, "learning_rate": 1.964791926941157e-06, "loss": 41.6875, "step": 11851 }, { "epoch": 0.11219128936681781, "grad_norm": 285.316650390625, "learning_rate": 1.9647838629001224e-06, "loss": 18.8047, "step": 11852 }, { "epoch": 0.11220075538853286, "grad_norm": 434.1623840332031, "learning_rate": 1.9647757979522543e-06, "loss": 21.5234, "step": 11853 }, { "epoch": 0.11221022141024792, "grad_norm": 1851.37744140625, "learning_rate": 1.9647677320975596e-06, "loss": 63.375, "step": 11854 }, { "epoch": 0.11221968743196296, "grad_norm": 591.4910278320312, "learning_rate": 1.964759665336046e-06, "loss": 43.2188, "step": 11855 }, { "epoch": 0.11222915345367802, "grad_norm": 140.3282470703125, "learning_rate": 1.964751597667721e-06, "loss": 19.7109, "step": 11856 }, { "epoch": 0.11223861947539307, "grad_norm": 755.3479614257812, "learning_rate": 1.9647435290925924e-06, "loss": 36.9922, "step": 11857 }, { "epoch": 0.11224808549710813, "grad_norm": 3.2923851013183594, "learning_rate": 1.964735459610667e-06, "loss": 0.9375, "step": 11858 }, { "epoch": 0.11225755151882319, "grad_norm": 458.12451171875, "learning_rate": 1.9647273892219537e-06, "loss": 28.5, "step": 11859 }, { "epoch": 0.11226701754053824, "grad_norm": 207.51177978515625, "learning_rate": 1.9647193179264593e-06, "loss": 12.75, "step": 11860 }, { "epoch": 0.1122764835622533, "grad_norm": 3.5491461753845215, "learning_rate": 1.9647112457241916e-06, "loss": 0.9761, "step": 11861 }, { "epoch": 0.11228594958396834, "grad_norm": 255.55099487304688, "learning_rate": 1.9647031726151578e-06, "loss": 22.5156, "step": 11862 }, { "epoch": 0.1122954156056834, "grad_norm": 886.084228515625, "learning_rate": 1.964695098599366e-06, "loss": 33.25, "step": 11863 }, { "epoch": 0.11230488162739845, "grad_norm": 465.6686706542969, "learning_rate": 1.9646870236768234e-06, "loss": 35.625, "step": 11864 }, { "epoch": 0.11231434764911351, "grad_norm": 527.2566528320312, "learning_rate": 1.9646789478475378e-06, "loss": 50.4453, "step": 11865 }, { "epoch": 0.11232381367082855, "grad_norm": 229.45413208007812, "learning_rate": 1.964670871111517e-06, "loss": 16.9609, "step": 11866 }, { "epoch": 0.11233327969254361, "grad_norm": 845.857421875, "learning_rate": 1.964662793468768e-06, "loss": 21.1953, "step": 11867 }, { "epoch": 0.11234274571425867, "grad_norm": 290.71746826171875, "learning_rate": 1.9646547149192987e-06, "loss": 19.2969, "step": 11868 }, { "epoch": 0.11235221173597372, "grad_norm": 270.8349304199219, "learning_rate": 1.9646466354631166e-06, "loss": 24.3359, "step": 11869 }, { "epoch": 0.11236167775768878, "grad_norm": 476.2601318359375, "learning_rate": 1.96463855510023e-06, "loss": 39.0625, "step": 11870 }, { "epoch": 0.11237114377940383, "grad_norm": 328.3622131347656, "learning_rate": 1.9646304738306455e-06, "loss": 35.3125, "step": 11871 }, { "epoch": 0.11238060980111889, "grad_norm": 3.325679302215576, "learning_rate": 1.9646223916543713e-06, "loss": 1.0088, "step": 11872 }, { "epoch": 0.11239007582283393, "grad_norm": 822.779296875, "learning_rate": 1.964614308571415e-06, "loss": 46.9062, "step": 11873 }, { "epoch": 0.11239954184454899, "grad_norm": 231.6111602783203, "learning_rate": 1.9646062245817832e-06, "loss": 20.8164, "step": 11874 }, { "epoch": 0.11240900786626405, "grad_norm": 601.4114990234375, "learning_rate": 1.964598139685485e-06, "loss": 64.7344, "step": 11875 }, { "epoch": 0.1124184738879791, "grad_norm": 3.3167943954467773, "learning_rate": 1.964590053882527e-06, "loss": 0.9282, "step": 11876 }, { "epoch": 0.11242793990969416, "grad_norm": 3.037031888961792, "learning_rate": 1.9645819671729172e-06, "loss": 0.9048, "step": 11877 }, { "epoch": 0.1124374059314092, "grad_norm": 175.35647583007812, "learning_rate": 1.9645738795566633e-06, "loss": 24.1484, "step": 11878 }, { "epoch": 0.11244687195312426, "grad_norm": 267.2883605957031, "learning_rate": 1.9645657910337724e-06, "loss": 18.0664, "step": 11879 }, { "epoch": 0.11245633797483931, "grad_norm": 270.5318908691406, "learning_rate": 1.9645577016042525e-06, "loss": 15.3906, "step": 11880 }, { "epoch": 0.11246580399655437, "grad_norm": 647.7953491210938, "learning_rate": 1.964549611268111e-06, "loss": 55.3438, "step": 11881 }, { "epoch": 0.11247527001826942, "grad_norm": 3.0943639278411865, "learning_rate": 1.9645415200253558e-06, "loss": 1.0674, "step": 11882 }, { "epoch": 0.11248473603998448, "grad_norm": 127.84879302978516, "learning_rate": 1.9645334278759945e-06, "loss": 14.8984, "step": 11883 }, { "epoch": 0.11249420206169954, "grad_norm": 627.2319946289062, "learning_rate": 1.964525334820034e-06, "loss": 49.6719, "step": 11884 }, { "epoch": 0.11250366808341458, "grad_norm": 764.001220703125, "learning_rate": 1.9645172408574826e-06, "loss": 42.3125, "step": 11885 }, { "epoch": 0.11251313410512964, "grad_norm": 238.67355346679688, "learning_rate": 1.964509145988348e-06, "loss": 22.5234, "step": 11886 }, { "epoch": 0.11252260012684469, "grad_norm": 413.6554260253906, "learning_rate": 1.9645010502126372e-06, "loss": 41.1719, "step": 11887 }, { "epoch": 0.11253206614855975, "grad_norm": 889.3638305664062, "learning_rate": 1.9644929535303584e-06, "loss": 30.4844, "step": 11888 }, { "epoch": 0.1125415321702748, "grad_norm": 221.21202087402344, "learning_rate": 1.964484855941519e-06, "loss": 19.875, "step": 11889 }, { "epoch": 0.11255099819198985, "grad_norm": 533.1671752929688, "learning_rate": 1.9644767574461266e-06, "loss": 19.6953, "step": 11890 }, { "epoch": 0.1125604642137049, "grad_norm": 239.32391357421875, "learning_rate": 1.964468658044189e-06, "loss": 17.9453, "step": 11891 }, { "epoch": 0.11256993023541996, "grad_norm": 352.6235046386719, "learning_rate": 1.964460557735713e-06, "loss": 26.7852, "step": 11892 }, { "epoch": 0.11257939625713502, "grad_norm": 546.8124389648438, "learning_rate": 1.964452456520707e-06, "loss": 45.8906, "step": 11893 }, { "epoch": 0.11258886227885007, "grad_norm": 436.5029602050781, "learning_rate": 1.9644443543991786e-06, "loss": 45.0469, "step": 11894 }, { "epoch": 0.11259832830056513, "grad_norm": 202.69366455078125, "learning_rate": 1.964436251371135e-06, "loss": 16.1094, "step": 11895 }, { "epoch": 0.11260779432228017, "grad_norm": 593.6011962890625, "learning_rate": 1.9644281474365844e-06, "loss": 29.793, "step": 11896 }, { "epoch": 0.11261726034399523, "grad_norm": 398.5366516113281, "learning_rate": 1.9644200425955343e-06, "loss": 36.2266, "step": 11897 }, { "epoch": 0.11262672636571028, "grad_norm": 1065.91650390625, "learning_rate": 1.9644119368479912e-06, "loss": 35.8594, "step": 11898 }, { "epoch": 0.11263619238742534, "grad_norm": 216.24664306640625, "learning_rate": 1.9644038301939645e-06, "loss": 18.0234, "step": 11899 }, { "epoch": 0.11264565840914038, "grad_norm": 481.7897033691406, "learning_rate": 1.9643957226334605e-06, "loss": 18.3867, "step": 11900 }, { "epoch": 0.11265512443085544, "grad_norm": 402.0020751953125, "learning_rate": 1.9643876141664872e-06, "loss": 47.9531, "step": 11901 }, { "epoch": 0.1126645904525705, "grad_norm": 182.9598388671875, "learning_rate": 1.9643795047930528e-06, "loss": 21.125, "step": 11902 }, { "epoch": 0.11267405647428555, "grad_norm": 484.8282165527344, "learning_rate": 1.964371394513164e-06, "loss": 48.1719, "step": 11903 }, { "epoch": 0.11268352249600061, "grad_norm": 767.2601928710938, "learning_rate": 1.9643632833268286e-06, "loss": 46.5195, "step": 11904 }, { "epoch": 0.11269298851771566, "grad_norm": 496.21356201171875, "learning_rate": 1.964355171234055e-06, "loss": 38.0156, "step": 11905 }, { "epoch": 0.11270245453943072, "grad_norm": 360.0345153808594, "learning_rate": 1.96434705823485e-06, "loss": 21.6328, "step": 11906 }, { "epoch": 0.11271192056114576, "grad_norm": 841.19873046875, "learning_rate": 1.964338944329221e-06, "loss": 40.8516, "step": 11907 }, { "epoch": 0.11272138658286082, "grad_norm": 739.2568359375, "learning_rate": 1.964330829517177e-06, "loss": 40.0781, "step": 11908 }, { "epoch": 0.11273085260457587, "grad_norm": 3.9100003242492676, "learning_rate": 1.9643227137987242e-06, "loss": 0.9985, "step": 11909 }, { "epoch": 0.11274031862629093, "grad_norm": 538.384521484375, "learning_rate": 1.964314597173871e-06, "loss": 29.3281, "step": 11910 }, { "epoch": 0.11274978464800599, "grad_norm": 2.951982021331787, "learning_rate": 1.9643064796426247e-06, "loss": 0.9468, "step": 11911 }, { "epoch": 0.11275925066972103, "grad_norm": 330.00006103515625, "learning_rate": 1.9642983612049933e-06, "loss": 25.1523, "step": 11912 }, { "epoch": 0.1127687166914361, "grad_norm": 462.856689453125, "learning_rate": 1.964290241860984e-06, "loss": 38.0781, "step": 11913 }, { "epoch": 0.11277818271315114, "grad_norm": 218.53709411621094, "learning_rate": 1.9642821216106043e-06, "loss": 23.7031, "step": 11914 }, { "epoch": 0.1127876487348662, "grad_norm": 1141.9334716796875, "learning_rate": 1.964274000453863e-06, "loss": 45.9766, "step": 11915 }, { "epoch": 0.11279711475658125, "grad_norm": 302.6029052734375, "learning_rate": 1.964265878390766e-06, "loss": 30.0391, "step": 11916 }, { "epoch": 0.1128065807782963, "grad_norm": 917.7401123046875, "learning_rate": 1.964257755421322e-06, "loss": 27.7734, "step": 11917 }, { "epoch": 0.11281604680001137, "grad_norm": 161.87667846679688, "learning_rate": 1.9642496315455387e-06, "loss": 18.5859, "step": 11918 }, { "epoch": 0.11282551282172641, "grad_norm": 342.9834899902344, "learning_rate": 1.964241506763423e-06, "loss": 40.0312, "step": 11919 }, { "epoch": 0.11283497884344147, "grad_norm": 623.6222534179688, "learning_rate": 1.9642333810749836e-06, "loss": 37.4609, "step": 11920 }, { "epoch": 0.11284444486515652, "grad_norm": 395.8309020996094, "learning_rate": 1.964225254480227e-06, "loss": 34.0078, "step": 11921 }, { "epoch": 0.11285391088687158, "grad_norm": 719.568603515625, "learning_rate": 1.964217126979162e-06, "loss": 38.0938, "step": 11922 }, { "epoch": 0.11286337690858662, "grad_norm": 416.4065246582031, "learning_rate": 1.964208998571795e-06, "loss": 41.8438, "step": 11923 }, { "epoch": 0.11287284293030168, "grad_norm": 552.0810546875, "learning_rate": 1.964200869258135e-06, "loss": 39.4375, "step": 11924 }, { "epoch": 0.11288230895201673, "grad_norm": 4.05979585647583, "learning_rate": 1.964192739038188e-06, "loss": 1.0059, "step": 11925 }, { "epoch": 0.11289177497373179, "grad_norm": 2.875101089477539, "learning_rate": 1.9641846079119635e-06, "loss": 0.9043, "step": 11926 }, { "epoch": 0.11290124099544685, "grad_norm": 931.3070678710938, "learning_rate": 1.9641764758794676e-06, "loss": 46.2031, "step": 11927 }, { "epoch": 0.1129107070171619, "grad_norm": 272.2159729003906, "learning_rate": 1.9641683429407083e-06, "loss": 19.2812, "step": 11928 }, { "epoch": 0.11292017303887696, "grad_norm": 307.9062805175781, "learning_rate": 1.9641602090956937e-06, "loss": 33.7812, "step": 11929 }, { "epoch": 0.112929639060592, "grad_norm": 2.820709705352783, "learning_rate": 1.9641520743444317e-06, "loss": 0.8848, "step": 11930 }, { "epoch": 0.11293910508230706, "grad_norm": 194.04608154296875, "learning_rate": 1.964143938686929e-06, "loss": 15.8047, "step": 11931 }, { "epoch": 0.11294857110402211, "grad_norm": 2.7401630878448486, "learning_rate": 1.9641358021231938e-06, "loss": 0.8071, "step": 11932 }, { "epoch": 0.11295803712573717, "grad_norm": 262.0780029296875, "learning_rate": 1.9641276646532334e-06, "loss": 21.8125, "step": 11933 }, { "epoch": 0.11296750314745221, "grad_norm": 440.6603088378906, "learning_rate": 1.9641195262770563e-06, "loss": 61.875, "step": 11934 }, { "epoch": 0.11297696916916727, "grad_norm": 242.6040496826172, "learning_rate": 1.964111386994669e-06, "loss": 27.6094, "step": 11935 }, { "epoch": 0.11298643519088233, "grad_norm": 959.8777465820312, "learning_rate": 1.9641032468060803e-06, "loss": 46.5469, "step": 11936 }, { "epoch": 0.11299590121259738, "grad_norm": 197.1245574951172, "learning_rate": 1.964095105711297e-06, "loss": 22.6016, "step": 11937 }, { "epoch": 0.11300536723431244, "grad_norm": 661.4392700195312, "learning_rate": 1.964086963710327e-06, "loss": 76.5938, "step": 11938 }, { "epoch": 0.11301483325602749, "grad_norm": 981.5134887695312, "learning_rate": 1.964078820803178e-06, "loss": 21.1914, "step": 11939 }, { "epoch": 0.11302429927774255, "grad_norm": 219.07411193847656, "learning_rate": 1.9640706769898574e-06, "loss": 21.7969, "step": 11940 }, { "epoch": 0.11303376529945759, "grad_norm": 493.24407958984375, "learning_rate": 1.9640625322703733e-06, "loss": 26.0391, "step": 11941 }, { "epoch": 0.11304323132117265, "grad_norm": 406.1639099121094, "learning_rate": 1.9640543866447334e-06, "loss": 43.3828, "step": 11942 }, { "epoch": 0.1130526973428877, "grad_norm": 347.8591613769531, "learning_rate": 1.964046240112945e-06, "loss": 30.8828, "step": 11943 }, { "epoch": 0.11306216336460276, "grad_norm": 189.30755615234375, "learning_rate": 1.9640380926750152e-06, "loss": 18.3828, "step": 11944 }, { "epoch": 0.11307162938631782, "grad_norm": 360.943359375, "learning_rate": 1.964029944330953e-06, "loss": 13.5645, "step": 11945 }, { "epoch": 0.11308109540803286, "grad_norm": 439.37384033203125, "learning_rate": 1.9640217950807647e-06, "loss": 38.7344, "step": 11946 }, { "epoch": 0.11309056142974792, "grad_norm": 306.2808532714844, "learning_rate": 1.9640136449244595e-06, "loss": 48.3438, "step": 11947 }, { "epoch": 0.11310002745146297, "grad_norm": 289.4149169921875, "learning_rate": 1.9640054938620433e-06, "loss": 18.8516, "step": 11948 }, { "epoch": 0.11310949347317803, "grad_norm": 271.3688049316406, "learning_rate": 1.9639973418935255e-06, "loss": 18.7383, "step": 11949 }, { "epoch": 0.11311895949489308, "grad_norm": 2.99210262298584, "learning_rate": 1.9639891890189124e-06, "loss": 0.9092, "step": 11950 }, { "epoch": 0.11312842551660814, "grad_norm": 300.530029296875, "learning_rate": 1.9639810352382124e-06, "loss": 21.1797, "step": 11951 }, { "epoch": 0.11313789153832318, "grad_norm": 765.3668823242188, "learning_rate": 1.9639728805514325e-06, "loss": 49.8828, "step": 11952 }, { "epoch": 0.11314735756003824, "grad_norm": 208.53614807128906, "learning_rate": 1.963964724958581e-06, "loss": 16.6094, "step": 11953 }, { "epoch": 0.1131568235817533, "grad_norm": 885.7110595703125, "learning_rate": 1.9639565684596655e-06, "loss": 15.0586, "step": 11954 }, { "epoch": 0.11316628960346835, "grad_norm": 387.4890441894531, "learning_rate": 1.9639484110546937e-06, "loss": 43.5312, "step": 11955 }, { "epoch": 0.11317575562518341, "grad_norm": 483.27313232421875, "learning_rate": 1.963940252743673e-06, "loss": 40.6016, "step": 11956 }, { "epoch": 0.11318522164689845, "grad_norm": 818.569091796875, "learning_rate": 1.963932093526611e-06, "loss": 49.8125, "step": 11957 }, { "epoch": 0.11319468766861351, "grad_norm": 269.51361083984375, "learning_rate": 1.9639239334035157e-06, "loss": 34.6562, "step": 11958 }, { "epoch": 0.11320415369032856, "grad_norm": 644.0307006835938, "learning_rate": 1.9639157723743945e-06, "loss": 51.0625, "step": 11959 }, { "epoch": 0.11321361971204362, "grad_norm": 442.998779296875, "learning_rate": 1.963907610439255e-06, "loss": 48.25, "step": 11960 }, { "epoch": 0.11322308573375868, "grad_norm": 308.0646057128906, "learning_rate": 1.9638994475981054e-06, "loss": 19.8047, "step": 11961 }, { "epoch": 0.11323255175547373, "grad_norm": 179.66009521484375, "learning_rate": 1.963891283850953e-06, "loss": 8.3184, "step": 11962 }, { "epoch": 0.11324201777718879, "grad_norm": 639.522705078125, "learning_rate": 1.9638831191978053e-06, "loss": 15.875, "step": 11963 }, { "epoch": 0.11325148379890383, "grad_norm": 367.2482604980469, "learning_rate": 1.96387495363867e-06, "loss": 13.8203, "step": 11964 }, { "epoch": 0.11326094982061889, "grad_norm": 325.9070739746094, "learning_rate": 1.9638667871735556e-06, "loss": 41.9219, "step": 11965 }, { "epoch": 0.11327041584233394, "grad_norm": 747.8284912109375, "learning_rate": 1.9638586198024683e-06, "loss": 51.9062, "step": 11966 }, { "epoch": 0.113279881864049, "grad_norm": 287.3350524902344, "learning_rate": 1.9638504515254174e-06, "loss": 17.3281, "step": 11967 }, { "epoch": 0.11328934788576404, "grad_norm": 3.2877156734466553, "learning_rate": 1.9638422823424093e-06, "loss": 0.8279, "step": 11968 }, { "epoch": 0.1132988139074791, "grad_norm": 295.4991149902344, "learning_rate": 1.9638341122534524e-06, "loss": 17.4688, "step": 11969 }, { "epoch": 0.11330827992919416, "grad_norm": 294.7041931152344, "learning_rate": 1.963825941258554e-06, "loss": 21.0703, "step": 11970 }, { "epoch": 0.11331774595090921, "grad_norm": 251.45005798339844, "learning_rate": 1.963817769357722e-06, "loss": 30.75, "step": 11971 }, { "epoch": 0.11332721197262427, "grad_norm": 239.039306640625, "learning_rate": 1.963809596550964e-06, "loss": 20.4297, "step": 11972 }, { "epoch": 0.11333667799433932, "grad_norm": 341.2623291015625, "learning_rate": 1.9638014228382874e-06, "loss": 36.9062, "step": 11973 }, { "epoch": 0.11334614401605438, "grad_norm": 1380.5069580078125, "learning_rate": 1.9637932482197002e-06, "loss": 25.1406, "step": 11974 }, { "epoch": 0.11335561003776942, "grad_norm": 644.3963623046875, "learning_rate": 1.96378507269521e-06, "loss": 55.0312, "step": 11975 }, { "epoch": 0.11336507605948448, "grad_norm": 371.4204406738281, "learning_rate": 1.963776896264825e-06, "loss": 45.0938, "step": 11976 }, { "epoch": 0.11337454208119953, "grad_norm": 330.2667236328125, "learning_rate": 1.9637687189285522e-06, "loss": 21.2031, "step": 11977 }, { "epoch": 0.11338400810291459, "grad_norm": 3.3172640800476074, "learning_rate": 1.9637605406863997e-06, "loss": 0.9326, "step": 11978 }, { "epoch": 0.11339347412462965, "grad_norm": 321.4062805175781, "learning_rate": 1.9637523615383746e-06, "loss": 19.5234, "step": 11979 }, { "epoch": 0.1134029401463447, "grad_norm": 398.2757873535156, "learning_rate": 1.963744181484485e-06, "loss": 19.1953, "step": 11980 }, { "epoch": 0.11341240616805975, "grad_norm": 311.7292785644531, "learning_rate": 1.9637360005247387e-06, "loss": 21.1328, "step": 11981 }, { "epoch": 0.1134218721897748, "grad_norm": 507.2645568847656, "learning_rate": 1.963727818659143e-06, "loss": 22.9688, "step": 11982 }, { "epoch": 0.11343133821148986, "grad_norm": 178.75479125976562, "learning_rate": 1.9637196358877063e-06, "loss": 15.0352, "step": 11983 }, { "epoch": 0.1134408042332049, "grad_norm": 229.23953247070312, "learning_rate": 1.9637114522104356e-06, "loss": 15.7188, "step": 11984 }, { "epoch": 0.11345027025491997, "grad_norm": 245.79791259765625, "learning_rate": 1.963703267627339e-06, "loss": 21.2969, "step": 11985 }, { "epoch": 0.11345973627663501, "grad_norm": 187.72689819335938, "learning_rate": 1.963695082138424e-06, "loss": 17.8203, "step": 11986 }, { "epoch": 0.11346920229835007, "grad_norm": 899.3807373046875, "learning_rate": 1.9636868957436983e-06, "loss": 46.0391, "step": 11987 }, { "epoch": 0.11347866832006513, "grad_norm": 389.66705322265625, "learning_rate": 1.963678708443169e-06, "loss": 10.4805, "step": 11988 }, { "epoch": 0.11348813434178018, "grad_norm": 212.05381774902344, "learning_rate": 1.963670520236845e-06, "loss": 9.9805, "step": 11989 }, { "epoch": 0.11349760036349524, "grad_norm": 231.63568115234375, "learning_rate": 1.9636623311247334e-06, "loss": 21.5586, "step": 11990 }, { "epoch": 0.11350706638521028, "grad_norm": 335.967529296875, "learning_rate": 1.9636541411068417e-06, "loss": 41.8906, "step": 11991 }, { "epoch": 0.11351653240692534, "grad_norm": 3.038712501525879, "learning_rate": 1.963645950183178e-06, "loss": 0.8408, "step": 11992 }, { "epoch": 0.11352599842864039, "grad_norm": 226.14663696289062, "learning_rate": 1.96363775835375e-06, "loss": 14.0, "step": 11993 }, { "epoch": 0.11353546445035545, "grad_norm": 491.6762390136719, "learning_rate": 1.9636295656185646e-06, "loss": 34.4531, "step": 11994 }, { "epoch": 0.1135449304720705, "grad_norm": 302.8894958496094, "learning_rate": 1.9636213719776304e-06, "loss": 17.7578, "step": 11995 }, { "epoch": 0.11355439649378556, "grad_norm": 462.54620361328125, "learning_rate": 1.9636131774309547e-06, "loss": 13.0273, "step": 11996 }, { "epoch": 0.11356386251550062, "grad_norm": 554.3950805664062, "learning_rate": 1.9636049819785457e-06, "loss": 60.9062, "step": 11997 }, { "epoch": 0.11357332853721566, "grad_norm": 524.7840576171875, "learning_rate": 1.9635967856204104e-06, "loss": 33.0625, "step": 11998 }, { "epoch": 0.11358279455893072, "grad_norm": 631.2916870117188, "learning_rate": 1.9635885883565566e-06, "loss": 54.3203, "step": 11999 }, { "epoch": 0.11359226058064577, "grad_norm": 165.8914794921875, "learning_rate": 1.963580390186992e-06, "loss": 20.8984, "step": 12000 }, { "epoch": 0.11360172660236083, "grad_norm": 307.2727355957031, "learning_rate": 1.9635721911117255e-06, "loss": 21.4062, "step": 12001 }, { "epoch": 0.11361119262407587, "grad_norm": 238.94908142089844, "learning_rate": 1.963563991130763e-06, "loss": 17.2969, "step": 12002 }, { "epoch": 0.11362065864579093, "grad_norm": 260.98370361328125, "learning_rate": 1.9635557902441133e-06, "loss": 20.0547, "step": 12003 }, { "epoch": 0.113630124667506, "grad_norm": 281.5532531738281, "learning_rate": 1.963547588451784e-06, "loss": 23.1172, "step": 12004 }, { "epoch": 0.11363959068922104, "grad_norm": 255.92117309570312, "learning_rate": 1.9635393857537826e-06, "loss": 18.1328, "step": 12005 }, { "epoch": 0.1136490567109361, "grad_norm": 3.3863608837127686, "learning_rate": 1.9635311821501164e-06, "loss": 1.0786, "step": 12006 }, { "epoch": 0.11365852273265115, "grad_norm": 2.821307897567749, "learning_rate": 1.963522977640794e-06, "loss": 0.7754, "step": 12007 }, { "epoch": 0.1136679887543662, "grad_norm": 416.58697509765625, "learning_rate": 1.963514772225823e-06, "loss": 14.3359, "step": 12008 }, { "epoch": 0.11367745477608125, "grad_norm": 523.8429565429688, "learning_rate": 1.96350656590521e-06, "loss": 20.7188, "step": 12009 }, { "epoch": 0.11368692079779631, "grad_norm": 465.7044982910156, "learning_rate": 1.963498358678964e-06, "loss": 40.9062, "step": 12010 }, { "epoch": 0.11369638681951136, "grad_norm": 236.51358032226562, "learning_rate": 1.963490150547092e-06, "loss": 18.7773, "step": 12011 }, { "epoch": 0.11370585284122642, "grad_norm": 178.36740112304688, "learning_rate": 1.963481941509602e-06, "loss": 19.8281, "step": 12012 }, { "epoch": 0.11371531886294148, "grad_norm": 3.340667247772217, "learning_rate": 1.9634737315665016e-06, "loss": 1.0933, "step": 12013 }, { "epoch": 0.11372478488465652, "grad_norm": 265.5243835449219, "learning_rate": 1.963465520717799e-06, "loss": 10.4336, "step": 12014 }, { "epoch": 0.11373425090637158, "grad_norm": 252.8907928466797, "learning_rate": 1.963457308963501e-06, "loss": 22.375, "step": 12015 }, { "epoch": 0.11374371692808663, "grad_norm": 267.5673522949219, "learning_rate": 1.963449096303616e-06, "loss": 17.9141, "step": 12016 }, { "epoch": 0.11375318294980169, "grad_norm": 301.9613342285156, "learning_rate": 1.9634408827381515e-06, "loss": 44.4688, "step": 12017 }, { "epoch": 0.11376264897151674, "grad_norm": 3.0691921710968018, "learning_rate": 1.9634326682671153e-06, "loss": 0.9629, "step": 12018 }, { "epoch": 0.1137721149932318, "grad_norm": 269.1675109863281, "learning_rate": 1.9634244528905153e-06, "loss": 24.4141, "step": 12019 }, { "epoch": 0.11378158101494684, "grad_norm": 3.11049222946167, "learning_rate": 1.9634162366083583e-06, "loss": 0.9253, "step": 12020 }, { "epoch": 0.1137910470366619, "grad_norm": 335.5738830566406, "learning_rate": 1.963408019420653e-06, "loss": 29.5, "step": 12021 }, { "epoch": 0.11380051305837696, "grad_norm": 410.2436828613281, "learning_rate": 1.963399801327407e-06, "loss": 31.875, "step": 12022 }, { "epoch": 0.11380997908009201, "grad_norm": 3.105776786804199, "learning_rate": 1.963391582328628e-06, "loss": 1.0317, "step": 12023 }, { "epoch": 0.11381944510180707, "grad_norm": 291.3841552734375, "learning_rate": 1.9633833624243234e-06, "loss": 28.3867, "step": 12024 }, { "epoch": 0.11382891112352211, "grad_norm": 368.7658996582031, "learning_rate": 1.963375141614501e-06, "loss": 25.2109, "step": 12025 }, { "epoch": 0.11383837714523717, "grad_norm": 347.1217956542969, "learning_rate": 1.9633669198991688e-06, "loss": 25.6875, "step": 12026 }, { "epoch": 0.11384784316695222, "grad_norm": 602.4761962890625, "learning_rate": 1.9633586972783343e-06, "loss": 39.5, "step": 12027 }, { "epoch": 0.11385730918866728, "grad_norm": 233.7889404296875, "learning_rate": 1.963350473752005e-06, "loss": 21.0703, "step": 12028 }, { "epoch": 0.11386677521038233, "grad_norm": 475.7135314941406, "learning_rate": 1.9633422493201896e-06, "loss": 42.2188, "step": 12029 }, { "epoch": 0.11387624123209739, "grad_norm": 3.4550209045410156, "learning_rate": 1.9633340239828947e-06, "loss": 0.8687, "step": 12030 }, { "epoch": 0.11388570725381245, "grad_norm": 492.0217590332031, "learning_rate": 1.9633257977401287e-06, "loss": 13.3164, "step": 12031 }, { "epoch": 0.11389517327552749, "grad_norm": 272.2711181640625, "learning_rate": 1.963317570591899e-06, "loss": 30.5, "step": 12032 }, { "epoch": 0.11390463929724255, "grad_norm": 213.5362548828125, "learning_rate": 1.9633093425382133e-06, "loss": 27.9062, "step": 12033 }, { "epoch": 0.1139141053189576, "grad_norm": 703.5894775390625, "learning_rate": 1.9633011135790796e-06, "loss": 48.6719, "step": 12034 }, { "epoch": 0.11392357134067266, "grad_norm": 1098.709228515625, "learning_rate": 1.9632928837145054e-06, "loss": 48.5312, "step": 12035 }, { "epoch": 0.1139330373623877, "grad_norm": 260.1205749511719, "learning_rate": 1.963284652944499e-06, "loss": 19.5078, "step": 12036 }, { "epoch": 0.11394250338410276, "grad_norm": 301.3098449707031, "learning_rate": 1.963276421269067e-06, "loss": 31.2969, "step": 12037 }, { "epoch": 0.11395196940581781, "grad_norm": 873.833984375, "learning_rate": 1.9632681886882186e-06, "loss": 45.1953, "step": 12038 }, { "epoch": 0.11396143542753287, "grad_norm": 219.25933837890625, "learning_rate": 1.9632599552019606e-06, "loss": 17.875, "step": 12039 }, { "epoch": 0.11397090144924793, "grad_norm": 492.94964599609375, "learning_rate": 1.9632517208103e-06, "loss": 43.7969, "step": 12040 }, { "epoch": 0.11398036747096298, "grad_norm": 291.4289855957031, "learning_rate": 1.9632434855132467e-06, "loss": 25.7656, "step": 12041 }, { "epoch": 0.11398983349267804, "grad_norm": 415.943359375, "learning_rate": 1.963235249310807e-06, "loss": 50.9062, "step": 12042 }, { "epoch": 0.11399929951439308, "grad_norm": 3.094031810760498, "learning_rate": 1.963227012202988e-06, "loss": 0.8647, "step": 12043 }, { "epoch": 0.11400876553610814, "grad_norm": 364.7358703613281, "learning_rate": 1.9632187741897987e-06, "loss": 25.9609, "step": 12044 }, { "epoch": 0.11401823155782319, "grad_norm": 2207.76416015625, "learning_rate": 1.9632105352712465e-06, "loss": 50.9141, "step": 12045 }, { "epoch": 0.11402769757953825, "grad_norm": 1373.6331787109375, "learning_rate": 1.963202295447339e-06, "loss": 17.4844, "step": 12046 }, { "epoch": 0.11403716360125331, "grad_norm": 252.42738342285156, "learning_rate": 1.963194054718084e-06, "loss": 16.8242, "step": 12047 }, { "epoch": 0.11404662962296835, "grad_norm": 394.9511413574219, "learning_rate": 1.9631858130834893e-06, "loss": 23.3672, "step": 12048 }, { "epoch": 0.11405609564468341, "grad_norm": 617.8291625976562, "learning_rate": 1.9631775705435625e-06, "loss": 25.4922, "step": 12049 }, { "epoch": 0.11406556166639846, "grad_norm": 853.3154907226562, "learning_rate": 1.9631693270983112e-06, "loss": 27.0312, "step": 12050 }, { "epoch": 0.11407502768811352, "grad_norm": 337.7823181152344, "learning_rate": 1.963161082747744e-06, "loss": 40.9531, "step": 12051 }, { "epoch": 0.11408449370982857, "grad_norm": 731.5675659179688, "learning_rate": 1.963152837491868e-06, "loss": 43.6094, "step": 12052 }, { "epoch": 0.11409395973154363, "grad_norm": 179.36810302734375, "learning_rate": 1.9631445913306905e-06, "loss": 9.8398, "step": 12053 }, { "epoch": 0.11410342575325867, "grad_norm": 386.2151184082031, "learning_rate": 1.9631363442642197e-06, "loss": 24.5, "step": 12054 }, { "epoch": 0.11411289177497373, "grad_norm": 486.63775634765625, "learning_rate": 1.9631280962924637e-06, "loss": 14.625, "step": 12055 }, { "epoch": 0.11412235779668879, "grad_norm": 167.93017578125, "learning_rate": 1.96311984741543e-06, "loss": 23.0898, "step": 12056 }, { "epoch": 0.11413182381840384, "grad_norm": 304.01715087890625, "learning_rate": 1.963111597633126e-06, "loss": 19.9297, "step": 12057 }, { "epoch": 0.1141412898401189, "grad_norm": 214.80670166015625, "learning_rate": 1.96310334694556e-06, "loss": 18.9062, "step": 12058 }, { "epoch": 0.11415075586183394, "grad_norm": 220.2945556640625, "learning_rate": 1.9630950953527392e-06, "loss": 20.5234, "step": 12059 }, { "epoch": 0.114160221883549, "grad_norm": 592.93798828125, "learning_rate": 1.963086842854672e-06, "loss": 38.3906, "step": 12060 }, { "epoch": 0.11416968790526405, "grad_norm": 3.267629861831665, "learning_rate": 1.963078589451366e-06, "loss": 0.9287, "step": 12061 }, { "epoch": 0.11417915392697911, "grad_norm": 257.01629638671875, "learning_rate": 1.9630703351428288e-06, "loss": 15.5, "step": 12062 }, { "epoch": 0.11418861994869416, "grad_norm": 613.304443359375, "learning_rate": 1.9630620799290677e-06, "loss": 35.0781, "step": 12063 }, { "epoch": 0.11419808597040922, "grad_norm": 430.0739440917969, "learning_rate": 1.963053823810091e-06, "loss": 44.1562, "step": 12064 }, { "epoch": 0.11420755199212428, "grad_norm": 554.4951782226562, "learning_rate": 1.9630455667859064e-06, "loss": 53.3516, "step": 12065 }, { "epoch": 0.11421701801383932, "grad_norm": 772.4570922851562, "learning_rate": 1.9630373088565213e-06, "loss": 33.3438, "step": 12066 }, { "epoch": 0.11422648403555438, "grad_norm": 420.44586181640625, "learning_rate": 1.9630290500219443e-06, "loss": 27.5234, "step": 12067 }, { "epoch": 0.11423595005726943, "grad_norm": 560.6339111328125, "learning_rate": 1.9630207902821826e-06, "loss": 17.3281, "step": 12068 }, { "epoch": 0.11424541607898449, "grad_norm": 187.31687927246094, "learning_rate": 1.9630125296372438e-06, "loss": 19.8828, "step": 12069 }, { "epoch": 0.11425488210069953, "grad_norm": 320.87164306640625, "learning_rate": 1.963004268087136e-06, "loss": 48.5, "step": 12070 }, { "epoch": 0.1142643481224146, "grad_norm": 321.5313720703125, "learning_rate": 1.9629960056318666e-06, "loss": 32.1562, "step": 12071 }, { "epoch": 0.11427381414412964, "grad_norm": 398.90252685546875, "learning_rate": 1.962987742271444e-06, "loss": 61.0, "step": 12072 }, { "epoch": 0.1142832801658447, "grad_norm": 566.2442626953125, "learning_rate": 1.962979478005875e-06, "loss": 39.5156, "step": 12073 }, { "epoch": 0.11429274618755976, "grad_norm": 1031.2513427734375, "learning_rate": 1.9629712128351684e-06, "loss": 20.2422, "step": 12074 }, { "epoch": 0.1143022122092748, "grad_norm": 396.77020263671875, "learning_rate": 1.9629629467593313e-06, "loss": 43.0156, "step": 12075 }, { "epoch": 0.11431167823098987, "grad_norm": 572.1468505859375, "learning_rate": 1.9629546797783717e-06, "loss": 36.2812, "step": 12076 }, { "epoch": 0.11432114425270491, "grad_norm": 484.68695068359375, "learning_rate": 1.9629464118922973e-06, "loss": 40.9688, "step": 12077 }, { "epoch": 0.11433061027441997, "grad_norm": 375.64642333984375, "learning_rate": 1.962938143101116e-06, "loss": 35.7188, "step": 12078 }, { "epoch": 0.11434007629613502, "grad_norm": 814.9924926757812, "learning_rate": 1.9629298734048357e-06, "loss": 57.3281, "step": 12079 }, { "epoch": 0.11434954231785008, "grad_norm": 586.5043334960938, "learning_rate": 1.9629216028034638e-06, "loss": 60.1719, "step": 12080 }, { "epoch": 0.11435900833956512, "grad_norm": 883.0030517578125, "learning_rate": 1.962913331297008e-06, "loss": 43.0391, "step": 12081 }, { "epoch": 0.11436847436128018, "grad_norm": 150.07948303222656, "learning_rate": 1.9629050588854766e-06, "loss": 17.1094, "step": 12082 }, { "epoch": 0.11437794038299524, "grad_norm": 208.9055938720703, "learning_rate": 1.962896785568877e-06, "loss": 24.8359, "step": 12083 }, { "epoch": 0.11438740640471029, "grad_norm": 228.0472412109375, "learning_rate": 1.962888511347217e-06, "loss": 23.3984, "step": 12084 }, { "epoch": 0.11439687242642535, "grad_norm": 774.8074951171875, "learning_rate": 1.9628802362205045e-06, "loss": 46.0234, "step": 12085 }, { "epoch": 0.1144063384481404, "grad_norm": 162.78488159179688, "learning_rate": 1.962871960188747e-06, "loss": 21.2344, "step": 12086 }, { "epoch": 0.11441580446985546, "grad_norm": 600.254150390625, "learning_rate": 1.962863683251953e-06, "loss": 31.5234, "step": 12087 }, { "epoch": 0.1144252704915705, "grad_norm": 276.975341796875, "learning_rate": 1.9628554054101293e-06, "loss": 25.3203, "step": 12088 }, { "epoch": 0.11443473651328556, "grad_norm": 863.9351196289062, "learning_rate": 1.962847126663284e-06, "loss": 40.7031, "step": 12089 }, { "epoch": 0.11444420253500062, "grad_norm": 230.25442504882812, "learning_rate": 1.9628388470114257e-06, "loss": 20.3594, "step": 12090 }, { "epoch": 0.11445366855671567, "grad_norm": 317.7967224121094, "learning_rate": 1.9628305664545613e-06, "loss": 14.9844, "step": 12091 }, { "epoch": 0.11446313457843073, "grad_norm": 3.2705509662628174, "learning_rate": 1.962822284992699e-06, "loss": 0.918, "step": 12092 }, { "epoch": 0.11447260060014577, "grad_norm": 191.15463256835938, "learning_rate": 1.962814002625846e-06, "loss": 21.1875, "step": 12093 }, { "epoch": 0.11448206662186083, "grad_norm": 336.5748291015625, "learning_rate": 1.9628057193540104e-06, "loss": 30.5234, "step": 12094 }, { "epoch": 0.11449153264357588, "grad_norm": 411.33221435546875, "learning_rate": 1.9627974351772e-06, "loss": 39.1406, "step": 12095 }, { "epoch": 0.11450099866529094, "grad_norm": 870.3285522460938, "learning_rate": 1.962789150095423e-06, "loss": 51.1172, "step": 12096 }, { "epoch": 0.11451046468700599, "grad_norm": 240.18226623535156, "learning_rate": 1.9627808641086867e-06, "loss": 28.7578, "step": 12097 }, { "epoch": 0.11451993070872105, "grad_norm": 846.9229125976562, "learning_rate": 1.962772577216999e-06, "loss": 44.4375, "step": 12098 }, { "epoch": 0.1145293967304361, "grad_norm": 403.7541809082031, "learning_rate": 1.9627642894203677e-06, "loss": 38.7031, "step": 12099 }, { "epoch": 0.11453886275215115, "grad_norm": 561.1138305664062, "learning_rate": 1.9627560007188008e-06, "loss": 8.8047, "step": 12100 }, { "epoch": 0.11454832877386621, "grad_norm": 722.9204711914062, "learning_rate": 1.962747711112306e-06, "loss": 29.2812, "step": 12101 }, { "epoch": 0.11455779479558126, "grad_norm": 335.15142822265625, "learning_rate": 1.9627394206008904e-06, "loss": 40.5938, "step": 12102 }, { "epoch": 0.11456726081729632, "grad_norm": 293.01318359375, "learning_rate": 1.9627311291845626e-06, "loss": 22.5586, "step": 12103 }, { "epoch": 0.11457672683901136, "grad_norm": 414.8636169433594, "learning_rate": 1.96272283686333e-06, "loss": 20.2422, "step": 12104 }, { "epoch": 0.11458619286072642, "grad_norm": 433.44744873046875, "learning_rate": 1.962714543637201e-06, "loss": 38.4453, "step": 12105 }, { "epoch": 0.11459565888244147, "grad_norm": 3.233431577682495, "learning_rate": 1.9627062495061826e-06, "loss": 0.9001, "step": 12106 }, { "epoch": 0.11460512490415653, "grad_norm": 198.54722595214844, "learning_rate": 1.962697954470283e-06, "loss": 16.6094, "step": 12107 }, { "epoch": 0.11461459092587159, "grad_norm": 206.85162353515625, "learning_rate": 1.96268965852951e-06, "loss": 17.4258, "step": 12108 }, { "epoch": 0.11462405694758664, "grad_norm": 735.930419921875, "learning_rate": 1.9626813616838714e-06, "loss": 36.3984, "step": 12109 }, { "epoch": 0.1146335229693017, "grad_norm": 836.337158203125, "learning_rate": 1.9626730639333753e-06, "loss": 17.3984, "step": 12110 }, { "epoch": 0.11464298899101674, "grad_norm": 404.1311340332031, "learning_rate": 1.9626647652780287e-06, "loss": 56.9531, "step": 12111 }, { "epoch": 0.1146524550127318, "grad_norm": 226.58010864257812, "learning_rate": 1.9626564657178403e-06, "loss": 22.4609, "step": 12112 }, { "epoch": 0.11466192103444685, "grad_norm": 511.6348571777344, "learning_rate": 1.9626481652528167e-06, "loss": 50.9844, "step": 12113 }, { "epoch": 0.11467138705616191, "grad_norm": 3.15378737449646, "learning_rate": 1.962639863882967e-06, "loss": 0.9165, "step": 12114 }, { "epoch": 0.11468085307787695, "grad_norm": 251.5749969482422, "learning_rate": 1.962631561608298e-06, "loss": 17.5859, "step": 12115 }, { "epoch": 0.11469031909959201, "grad_norm": 682.1864624023438, "learning_rate": 1.9626232584288184e-06, "loss": 46.125, "step": 12116 }, { "epoch": 0.11469978512130707, "grad_norm": 473.5752868652344, "learning_rate": 1.9626149543445356e-06, "loss": 34.7188, "step": 12117 }, { "epoch": 0.11470925114302212, "grad_norm": 515.7091064453125, "learning_rate": 1.962606649355457e-06, "loss": 41.9062, "step": 12118 }, { "epoch": 0.11471871716473718, "grad_norm": 469.5472106933594, "learning_rate": 1.962598343461591e-06, "loss": 46.625, "step": 12119 }, { "epoch": 0.11472818318645223, "grad_norm": 727.4461059570312, "learning_rate": 1.962590036662945e-06, "loss": 53.6406, "step": 12120 }, { "epoch": 0.11473764920816729, "grad_norm": 244.74041748046875, "learning_rate": 1.9625817289595272e-06, "loss": 19.8672, "step": 12121 }, { "epoch": 0.11474711522988233, "grad_norm": 213.21151733398438, "learning_rate": 1.9625734203513452e-06, "loss": 25.9531, "step": 12122 }, { "epoch": 0.11475658125159739, "grad_norm": 219.12930297851562, "learning_rate": 1.9625651108384067e-06, "loss": 9.1328, "step": 12123 }, { "epoch": 0.11476604727331244, "grad_norm": 555.3278198242188, "learning_rate": 1.9625568004207196e-06, "loss": 46.0547, "step": 12124 }, { "epoch": 0.1147755132950275, "grad_norm": 326.5265808105469, "learning_rate": 1.962548489098292e-06, "loss": 32.8438, "step": 12125 }, { "epoch": 0.11478497931674256, "grad_norm": 255.4576416015625, "learning_rate": 1.9625401768711314e-06, "loss": 14.4023, "step": 12126 }, { "epoch": 0.1147944453384576, "grad_norm": 566.1110229492188, "learning_rate": 1.9625318637392454e-06, "loss": 26.7266, "step": 12127 }, { "epoch": 0.11480391136017266, "grad_norm": 351.2911682128906, "learning_rate": 1.962523549702642e-06, "loss": 21.5156, "step": 12128 }, { "epoch": 0.11481337738188771, "grad_norm": 315.3689270019531, "learning_rate": 1.962515234761329e-06, "loss": 26.4609, "step": 12129 }, { "epoch": 0.11482284340360277, "grad_norm": 687.9088134765625, "learning_rate": 1.962506918915314e-06, "loss": 40.1875, "step": 12130 }, { "epoch": 0.11483230942531782, "grad_norm": 242.49520874023438, "learning_rate": 1.962498602164606e-06, "loss": 21.8984, "step": 12131 }, { "epoch": 0.11484177544703288, "grad_norm": 2.913031816482544, "learning_rate": 1.9624902845092114e-06, "loss": 0.9771, "step": 12132 }, { "epoch": 0.11485124146874794, "grad_norm": 1345.6357421875, "learning_rate": 1.962481965949139e-06, "loss": 24.168, "step": 12133 }, { "epoch": 0.11486070749046298, "grad_norm": 632.3096923828125, "learning_rate": 1.9624736464843954e-06, "loss": 48.25, "step": 12134 }, { "epoch": 0.11487017351217804, "grad_norm": 417.4035949707031, "learning_rate": 1.96246532611499e-06, "loss": 44.875, "step": 12135 }, { "epoch": 0.11487963953389309, "grad_norm": 483.35125732421875, "learning_rate": 1.9624570048409294e-06, "loss": 36.5, "step": 12136 }, { "epoch": 0.11488910555560815, "grad_norm": 206.81112670898438, "learning_rate": 1.9624486826622216e-06, "loss": 17.6406, "step": 12137 }, { "epoch": 0.1148985715773232, "grad_norm": 346.3813781738281, "learning_rate": 1.962440359578875e-06, "loss": 20.9258, "step": 12138 }, { "epoch": 0.11490803759903825, "grad_norm": 903.63134765625, "learning_rate": 1.962432035590897e-06, "loss": 79.4141, "step": 12139 }, { "epoch": 0.1149175036207533, "grad_norm": 307.0149841308594, "learning_rate": 1.9624237106982955e-06, "loss": 21.6875, "step": 12140 }, { "epoch": 0.11492696964246836, "grad_norm": 210.011474609375, "learning_rate": 1.9624153849010783e-06, "loss": 21.3906, "step": 12141 }, { "epoch": 0.11493643566418342, "grad_norm": 3.392737865447998, "learning_rate": 1.962407058199253e-06, "loss": 1.0942, "step": 12142 }, { "epoch": 0.11494590168589847, "grad_norm": 291.4000549316406, "learning_rate": 1.962398730592828e-06, "loss": 26.5234, "step": 12143 }, { "epoch": 0.11495536770761353, "grad_norm": 395.94708251953125, "learning_rate": 1.962390402081811e-06, "loss": 37.5625, "step": 12144 }, { "epoch": 0.11496483372932857, "grad_norm": 485.7785949707031, "learning_rate": 1.9623820726662094e-06, "loss": 52.6719, "step": 12145 }, { "epoch": 0.11497429975104363, "grad_norm": 377.10809326171875, "learning_rate": 1.962373742346031e-06, "loss": 28.2891, "step": 12146 }, { "epoch": 0.11498376577275868, "grad_norm": 232.33392333984375, "learning_rate": 1.962365411121284e-06, "loss": 22.6641, "step": 12147 }, { "epoch": 0.11499323179447374, "grad_norm": 369.1182861328125, "learning_rate": 1.9623570789919764e-06, "loss": 36.7422, "step": 12148 }, { "epoch": 0.11500269781618878, "grad_norm": 551.7356567382812, "learning_rate": 1.9623487459581158e-06, "loss": 33.3828, "step": 12149 }, { "epoch": 0.11501216383790384, "grad_norm": 731.4528198242188, "learning_rate": 1.9623404120197095e-06, "loss": 74.4688, "step": 12150 }, { "epoch": 0.1150216298596189, "grad_norm": 425.5931091308594, "learning_rate": 1.962332077176766e-06, "loss": 33.5859, "step": 12151 }, { "epoch": 0.11503109588133395, "grad_norm": 550.836669921875, "learning_rate": 1.962323741429293e-06, "loss": 51.8125, "step": 12152 }, { "epoch": 0.11504056190304901, "grad_norm": 363.13775634765625, "learning_rate": 1.9623154047772986e-06, "loss": 33.0625, "step": 12153 }, { "epoch": 0.11505002792476406, "grad_norm": 737.3048095703125, "learning_rate": 1.96230706722079e-06, "loss": 38.2969, "step": 12154 }, { "epoch": 0.11505949394647912, "grad_norm": 921.6763305664062, "learning_rate": 1.9622987287597753e-06, "loss": 69.7188, "step": 12155 }, { "epoch": 0.11506895996819416, "grad_norm": 572.6326293945312, "learning_rate": 1.9622903893942625e-06, "loss": 55.3906, "step": 12156 }, { "epoch": 0.11507842598990922, "grad_norm": 3.0013480186462402, "learning_rate": 1.962282049124259e-06, "loss": 0.8535, "step": 12157 }, { "epoch": 0.11508789201162427, "grad_norm": 332.5553283691406, "learning_rate": 1.9622737079497737e-06, "loss": 50.3047, "step": 12158 }, { "epoch": 0.11509735803333933, "grad_norm": 270.02545166015625, "learning_rate": 1.962265365870813e-06, "loss": 19.3125, "step": 12159 }, { "epoch": 0.11510682405505439, "grad_norm": 198.59228515625, "learning_rate": 1.962257022887386e-06, "loss": 23.5312, "step": 12160 }, { "epoch": 0.11511629007676943, "grad_norm": 274.89154052734375, "learning_rate": 1.9622486789994995e-06, "loss": 17.8281, "step": 12161 }, { "epoch": 0.1151257560984845, "grad_norm": 600.837158203125, "learning_rate": 1.9622403342071623e-06, "loss": 43.4531, "step": 12162 }, { "epoch": 0.11513522212019954, "grad_norm": 204.60365295410156, "learning_rate": 1.9622319885103814e-06, "loss": 15.8984, "step": 12163 }, { "epoch": 0.1151446881419146, "grad_norm": 338.4901123046875, "learning_rate": 1.9622236419091655e-06, "loss": 20.0703, "step": 12164 }, { "epoch": 0.11515415416362965, "grad_norm": 340.3957214355469, "learning_rate": 1.9622152944035217e-06, "loss": 20.5938, "step": 12165 }, { "epoch": 0.1151636201853447, "grad_norm": 496.2470703125, "learning_rate": 1.962206945993458e-06, "loss": 22.5859, "step": 12166 }, { "epoch": 0.11517308620705975, "grad_norm": 176.51889038085938, "learning_rate": 1.9621985966789825e-06, "loss": 21.9688, "step": 12167 }, { "epoch": 0.11518255222877481, "grad_norm": 235.48876953125, "learning_rate": 1.962190246460103e-06, "loss": 20.6562, "step": 12168 }, { "epoch": 0.11519201825048987, "grad_norm": 260.2770080566406, "learning_rate": 1.962181895336827e-06, "loss": 17.9531, "step": 12169 }, { "epoch": 0.11520148427220492, "grad_norm": 459.59600830078125, "learning_rate": 1.9621735433091627e-06, "loss": 31.8281, "step": 12170 }, { "epoch": 0.11521095029391998, "grad_norm": 3.08390212059021, "learning_rate": 1.962165190377118e-06, "loss": 0.8511, "step": 12171 }, { "epoch": 0.11522041631563502, "grad_norm": 270.1244812011719, "learning_rate": 1.9621568365407003e-06, "loss": 21.5078, "step": 12172 }, { "epoch": 0.11522988233735008, "grad_norm": 446.3852844238281, "learning_rate": 1.9621484817999182e-06, "loss": 20.2578, "step": 12173 }, { "epoch": 0.11523934835906513, "grad_norm": 598.8528442382812, "learning_rate": 1.962140126154779e-06, "loss": 16.7656, "step": 12174 }, { "epoch": 0.11524881438078019, "grad_norm": 316.7774353027344, "learning_rate": 1.9621317696052903e-06, "loss": 16.8281, "step": 12175 }, { "epoch": 0.11525828040249525, "grad_norm": 155.33457946777344, "learning_rate": 1.962123412151461e-06, "loss": 18.9453, "step": 12176 }, { "epoch": 0.1152677464242103, "grad_norm": 477.1852722167969, "learning_rate": 1.9621150537932977e-06, "loss": 25.8281, "step": 12177 }, { "epoch": 0.11527721244592536, "grad_norm": 304.2066650390625, "learning_rate": 1.962106694530809e-06, "loss": 28.3906, "step": 12178 }, { "epoch": 0.1152866784676404, "grad_norm": 199.75355529785156, "learning_rate": 1.9620983343640026e-06, "loss": 14.2109, "step": 12179 }, { "epoch": 0.11529614448935546, "grad_norm": 183.22239685058594, "learning_rate": 1.9620899732928864e-06, "loss": 14.0625, "step": 12180 }, { "epoch": 0.11530561051107051, "grad_norm": 331.8613586425781, "learning_rate": 1.9620816113174685e-06, "loss": 19.1797, "step": 12181 }, { "epoch": 0.11531507653278557, "grad_norm": 564.6083374023438, "learning_rate": 1.962073248437756e-06, "loss": 44.3594, "step": 12182 }, { "epoch": 0.11532454255450061, "grad_norm": 429.9804382324219, "learning_rate": 1.9620648846537577e-06, "loss": 18.3984, "step": 12183 }, { "epoch": 0.11533400857621567, "grad_norm": 159.52195739746094, "learning_rate": 1.9620565199654805e-06, "loss": 20.5078, "step": 12184 }, { "epoch": 0.11534347459793073, "grad_norm": 297.75299072265625, "learning_rate": 1.962048154372933e-06, "loss": 19.5625, "step": 12185 }, { "epoch": 0.11535294061964578, "grad_norm": 419.9986877441406, "learning_rate": 1.9620397878761226e-06, "loss": 27.8984, "step": 12186 }, { "epoch": 0.11536240664136084, "grad_norm": 202.77101135253906, "learning_rate": 1.9620314204750577e-06, "loss": 19.3672, "step": 12187 }, { "epoch": 0.11537187266307589, "grad_norm": 538.3274536132812, "learning_rate": 1.9620230521697456e-06, "loss": 39.5234, "step": 12188 }, { "epoch": 0.11538133868479095, "grad_norm": 3.5200321674346924, "learning_rate": 1.9620146829601946e-06, "loss": 0.8359, "step": 12189 }, { "epoch": 0.11539080470650599, "grad_norm": 261.3374328613281, "learning_rate": 1.9620063128464125e-06, "loss": 15.4375, "step": 12190 }, { "epoch": 0.11540027072822105, "grad_norm": 255.47482299804688, "learning_rate": 1.9619979418284068e-06, "loss": 23.4141, "step": 12191 }, { "epoch": 0.1154097367499361, "grad_norm": 391.22808837890625, "learning_rate": 1.9619895699061857e-06, "loss": 36.6875, "step": 12192 }, { "epoch": 0.11541920277165116, "grad_norm": 245.4217529296875, "learning_rate": 1.961981197079757e-06, "loss": 18.125, "step": 12193 }, { "epoch": 0.11542866879336622, "grad_norm": 530.6443481445312, "learning_rate": 1.9619728233491286e-06, "loss": 20.5547, "step": 12194 }, { "epoch": 0.11543813481508126, "grad_norm": 245.76358032226562, "learning_rate": 1.961964448714308e-06, "loss": 19.25, "step": 12195 }, { "epoch": 0.11544760083679632, "grad_norm": 3.511868476867676, "learning_rate": 1.961956073175304e-06, "loss": 1.0122, "step": 12196 }, { "epoch": 0.11545706685851137, "grad_norm": 1996.687255859375, "learning_rate": 1.961947696732123e-06, "loss": 33.2773, "step": 12197 }, { "epoch": 0.11546653288022643, "grad_norm": 216.2938690185547, "learning_rate": 1.9619393193847747e-06, "loss": 19.625, "step": 12198 }, { "epoch": 0.11547599890194148, "grad_norm": 234.44168090820312, "learning_rate": 1.9619309411332655e-06, "loss": 26.1406, "step": 12199 }, { "epoch": 0.11548546492365654, "grad_norm": 225.90411376953125, "learning_rate": 1.9619225619776037e-06, "loss": 15.1562, "step": 12200 }, { "epoch": 0.11549493094537158, "grad_norm": 162.80125427246094, "learning_rate": 1.9619141819177974e-06, "loss": 9.9219, "step": 12201 }, { "epoch": 0.11550439696708664, "grad_norm": 244.09152221679688, "learning_rate": 1.9619058009538545e-06, "loss": 27.5938, "step": 12202 }, { "epoch": 0.1155138629888017, "grad_norm": 163.40553283691406, "learning_rate": 1.9618974190857827e-06, "loss": 15.3438, "step": 12203 }, { "epoch": 0.11552332901051675, "grad_norm": 186.61459350585938, "learning_rate": 1.9618890363135896e-06, "loss": 26.5156, "step": 12204 }, { "epoch": 0.11553279503223181, "grad_norm": 283.16082763671875, "learning_rate": 1.961880652637284e-06, "loss": 29.3438, "step": 12205 }, { "epoch": 0.11554226105394685, "grad_norm": 264.1119079589844, "learning_rate": 1.9618722680568728e-06, "loss": 19.9375, "step": 12206 }, { "epoch": 0.11555172707566191, "grad_norm": 166.08387756347656, "learning_rate": 1.9618638825723637e-06, "loss": 24.5312, "step": 12207 }, { "epoch": 0.11556119309737696, "grad_norm": 373.44757080078125, "learning_rate": 1.961855496183766e-06, "loss": 34.6719, "step": 12208 }, { "epoch": 0.11557065911909202, "grad_norm": 491.7212829589844, "learning_rate": 1.961847108891086e-06, "loss": 34.6719, "step": 12209 }, { "epoch": 0.11558012514080707, "grad_norm": 237.3571319580078, "learning_rate": 1.9618387206943327e-06, "loss": 22.5625, "step": 12210 }, { "epoch": 0.11558959116252213, "grad_norm": 518.815673828125, "learning_rate": 1.9618303315935135e-06, "loss": 50.4219, "step": 12211 }, { "epoch": 0.11559905718423719, "grad_norm": 312.156005859375, "learning_rate": 1.9618219415886365e-06, "loss": 19.0156, "step": 12212 }, { "epoch": 0.11560852320595223, "grad_norm": 238.16180419921875, "learning_rate": 1.961813550679709e-06, "loss": 26.75, "step": 12213 }, { "epoch": 0.11561798922766729, "grad_norm": 286.8798828125, "learning_rate": 1.96180515886674e-06, "loss": 18.2031, "step": 12214 }, { "epoch": 0.11562745524938234, "grad_norm": 398.097900390625, "learning_rate": 1.9617967661497363e-06, "loss": 19.0312, "step": 12215 }, { "epoch": 0.1156369212710974, "grad_norm": 2262.36572265625, "learning_rate": 1.9617883725287063e-06, "loss": 48.8242, "step": 12216 }, { "epoch": 0.11564638729281244, "grad_norm": 273.3660888671875, "learning_rate": 1.9617799780036575e-06, "loss": 19.9453, "step": 12217 }, { "epoch": 0.1156558533145275, "grad_norm": 226.1705780029297, "learning_rate": 1.9617715825745984e-06, "loss": 19.5625, "step": 12218 }, { "epoch": 0.11566531933624256, "grad_norm": 2.943511962890625, "learning_rate": 1.9617631862415366e-06, "loss": 0.8208, "step": 12219 }, { "epoch": 0.11567478535795761, "grad_norm": 552.800048828125, "learning_rate": 1.9617547890044798e-06, "loss": 53.3047, "step": 12220 }, { "epoch": 0.11568425137967267, "grad_norm": 718.3345336914062, "learning_rate": 1.9617463908634363e-06, "loss": 21.7422, "step": 12221 }, { "epoch": 0.11569371740138772, "grad_norm": 271.1273498535156, "learning_rate": 1.9617379918184135e-06, "loss": 18.9922, "step": 12222 }, { "epoch": 0.11570318342310278, "grad_norm": 293.2017822265625, "learning_rate": 1.9617295918694197e-06, "loss": 21.8516, "step": 12223 }, { "epoch": 0.11571264944481782, "grad_norm": 608.1657104492188, "learning_rate": 1.961721191016463e-06, "loss": 42.9844, "step": 12224 }, { "epoch": 0.11572211546653288, "grad_norm": 854.9348754882812, "learning_rate": 1.9617127892595503e-06, "loss": 44.2344, "step": 12225 }, { "epoch": 0.11573158148824793, "grad_norm": 190.7877197265625, "learning_rate": 1.9617043865986903e-06, "loss": 19.2031, "step": 12226 }, { "epoch": 0.11574104750996299, "grad_norm": 305.3858337402344, "learning_rate": 1.9616959830338908e-06, "loss": 19.8438, "step": 12227 }, { "epoch": 0.11575051353167805, "grad_norm": 713.9384765625, "learning_rate": 1.9616875785651597e-06, "loss": 41.0078, "step": 12228 }, { "epoch": 0.1157599795533931, "grad_norm": 248.88536071777344, "learning_rate": 1.9616791731925047e-06, "loss": 22.5977, "step": 12229 }, { "epoch": 0.11576944557510815, "grad_norm": 275.8935546875, "learning_rate": 1.9616707669159342e-06, "loss": 12.625, "step": 12230 }, { "epoch": 0.1157789115968232, "grad_norm": 282.9179382324219, "learning_rate": 1.9616623597354556e-06, "loss": 18.1953, "step": 12231 }, { "epoch": 0.11578837761853826, "grad_norm": 243.08316040039062, "learning_rate": 1.9616539516510767e-06, "loss": 16.0625, "step": 12232 }, { "epoch": 0.1157978436402533, "grad_norm": 358.9370422363281, "learning_rate": 1.961645542662806e-06, "loss": 35.4062, "step": 12233 }, { "epoch": 0.11580730966196837, "grad_norm": 409.30096435546875, "learning_rate": 1.9616371327706505e-06, "loss": 17.0234, "step": 12234 }, { "epoch": 0.11581677568368341, "grad_norm": 171.73394775390625, "learning_rate": 1.9616287219746193e-06, "loss": 24.8906, "step": 12235 }, { "epoch": 0.11582624170539847, "grad_norm": 762.1257934570312, "learning_rate": 1.9616203102747192e-06, "loss": 54.9531, "step": 12236 }, { "epoch": 0.11583570772711353, "grad_norm": 628.5601806640625, "learning_rate": 1.9616118976709588e-06, "loss": 50.1797, "step": 12237 }, { "epoch": 0.11584517374882858, "grad_norm": 283.4427795410156, "learning_rate": 1.9616034841633456e-06, "loss": 16.5781, "step": 12238 }, { "epoch": 0.11585463977054364, "grad_norm": 991.2252807617188, "learning_rate": 1.9615950697518876e-06, "loss": 60.6406, "step": 12239 }, { "epoch": 0.11586410579225868, "grad_norm": 240.96546936035156, "learning_rate": 1.9615866544365935e-06, "loss": 25.5469, "step": 12240 }, { "epoch": 0.11587357181397374, "grad_norm": 280.15625, "learning_rate": 1.96157823821747e-06, "loss": 24.0938, "step": 12241 }, { "epoch": 0.11588303783568879, "grad_norm": 602.9181518554688, "learning_rate": 1.961569821094525e-06, "loss": 61.0938, "step": 12242 }, { "epoch": 0.11589250385740385, "grad_norm": 164.56219482421875, "learning_rate": 1.9615614030677676e-06, "loss": 20.3906, "step": 12243 }, { "epoch": 0.1159019698791189, "grad_norm": 524.3125610351562, "learning_rate": 1.9615529841372046e-06, "loss": 26.3594, "step": 12244 }, { "epoch": 0.11591143590083396, "grad_norm": 495.5349426269531, "learning_rate": 1.961544564302845e-06, "loss": 53.0, "step": 12245 }, { "epoch": 0.11592090192254902, "grad_norm": 553.4345703125, "learning_rate": 1.9615361435646954e-06, "loss": 44.1562, "step": 12246 }, { "epoch": 0.11593036794426406, "grad_norm": 519.32080078125, "learning_rate": 1.9615277219227647e-06, "loss": 16.5234, "step": 12247 }, { "epoch": 0.11593983396597912, "grad_norm": 364.38525390625, "learning_rate": 1.9615192993770605e-06, "loss": 22.7734, "step": 12248 }, { "epoch": 0.11594929998769417, "grad_norm": 558.1373291015625, "learning_rate": 1.9615108759275904e-06, "loss": 34.0625, "step": 12249 }, { "epoch": 0.11595876600940923, "grad_norm": 734.47314453125, "learning_rate": 1.961502451574363e-06, "loss": 28.8047, "step": 12250 }, { "epoch": 0.11596823203112427, "grad_norm": 3.3440606594085693, "learning_rate": 1.9614940263173857e-06, "loss": 0.9299, "step": 12251 }, { "epoch": 0.11597769805283933, "grad_norm": 257.0387268066406, "learning_rate": 1.9614856001566664e-06, "loss": 32.5859, "step": 12252 }, { "epoch": 0.11598716407455438, "grad_norm": 450.5187072753906, "learning_rate": 1.9614771730922134e-06, "loss": 27.7266, "step": 12253 }, { "epoch": 0.11599663009626944, "grad_norm": 678.5233764648438, "learning_rate": 1.961468745124034e-06, "loss": 53.5078, "step": 12254 }, { "epoch": 0.1160060961179845, "grad_norm": 366.83526611328125, "learning_rate": 1.9614603162521374e-06, "loss": 18.3555, "step": 12255 }, { "epoch": 0.11601556213969955, "grad_norm": 378.1244812011719, "learning_rate": 1.96145188647653e-06, "loss": 23.7109, "step": 12256 }, { "epoch": 0.1160250281614146, "grad_norm": 223.63829040527344, "learning_rate": 1.9614434557972204e-06, "loss": 21.9141, "step": 12257 }, { "epoch": 0.11603449418312965, "grad_norm": 238.5475311279297, "learning_rate": 1.9614350242142167e-06, "loss": 20.3594, "step": 12258 }, { "epoch": 0.11604396020484471, "grad_norm": 606.815185546875, "learning_rate": 1.9614265917275267e-06, "loss": 46.4375, "step": 12259 }, { "epoch": 0.11605342622655976, "grad_norm": 734.6270141601562, "learning_rate": 1.9614181583371576e-06, "loss": 44.8594, "step": 12260 }, { "epoch": 0.11606289224827482, "grad_norm": 3.1053690910339355, "learning_rate": 1.9614097240431187e-06, "loss": 0.8496, "step": 12261 }, { "epoch": 0.11607235826998988, "grad_norm": 205.0736541748047, "learning_rate": 1.961401288845417e-06, "loss": 28.0469, "step": 12262 }, { "epoch": 0.11608182429170492, "grad_norm": 137.6976776123047, "learning_rate": 1.9613928527440607e-06, "loss": 19.1953, "step": 12263 }, { "epoch": 0.11609129031341998, "grad_norm": 576.6903686523438, "learning_rate": 1.9613844157390577e-06, "loss": 55.5156, "step": 12264 }, { "epoch": 0.11610075633513503, "grad_norm": 400.9349670410156, "learning_rate": 1.9613759778304154e-06, "loss": 41.3359, "step": 12265 }, { "epoch": 0.11611022235685009, "grad_norm": 253.93438720703125, "learning_rate": 1.9613675390181427e-06, "loss": 37.1641, "step": 12266 }, { "epoch": 0.11611968837856514, "grad_norm": 158.55166625976562, "learning_rate": 1.9613590993022467e-06, "loss": 17.8984, "step": 12267 }, { "epoch": 0.1161291544002802, "grad_norm": 159.2113800048828, "learning_rate": 1.961350658682736e-06, "loss": 21.6016, "step": 12268 }, { "epoch": 0.11613862042199524, "grad_norm": 229.28143310546875, "learning_rate": 1.961342217159618e-06, "loss": 21.7422, "step": 12269 }, { "epoch": 0.1161480864437103, "grad_norm": 472.16998291015625, "learning_rate": 1.9613337747329013e-06, "loss": 15.9453, "step": 12270 }, { "epoch": 0.11615755246542536, "grad_norm": 331.57318115234375, "learning_rate": 1.961325331402593e-06, "loss": 17.8203, "step": 12271 }, { "epoch": 0.11616701848714041, "grad_norm": 287.1399230957031, "learning_rate": 1.9613168871687016e-06, "loss": 17.1562, "step": 12272 }, { "epoch": 0.11617648450885547, "grad_norm": 532.5968017578125, "learning_rate": 1.9613084420312344e-06, "loss": 23.0469, "step": 12273 }, { "epoch": 0.11618595053057051, "grad_norm": 3.2366533279418945, "learning_rate": 1.9612999959902004e-06, "loss": 0.9258, "step": 12274 }, { "epoch": 0.11619541655228557, "grad_norm": 312.8558044433594, "learning_rate": 1.9612915490456067e-06, "loss": 20.0078, "step": 12275 }, { "epoch": 0.11620488257400062, "grad_norm": 495.7078552246094, "learning_rate": 1.9612831011974613e-06, "loss": 34.8906, "step": 12276 }, { "epoch": 0.11621434859571568, "grad_norm": 454.44927978515625, "learning_rate": 1.9612746524457724e-06, "loss": 38.2773, "step": 12277 }, { "epoch": 0.11622381461743073, "grad_norm": 406.00238037109375, "learning_rate": 1.961266202790548e-06, "loss": 21.9141, "step": 12278 }, { "epoch": 0.11623328063914579, "grad_norm": 664.888427734375, "learning_rate": 1.961257752231796e-06, "loss": 54.2656, "step": 12279 }, { "epoch": 0.11624274666086085, "grad_norm": 361.5773620605469, "learning_rate": 1.961249300769524e-06, "loss": 17.5703, "step": 12280 }, { "epoch": 0.11625221268257589, "grad_norm": 3.6167778968811035, "learning_rate": 1.9612408484037403e-06, "loss": 0.9795, "step": 12281 }, { "epoch": 0.11626167870429095, "grad_norm": 643.734130859375, "learning_rate": 1.9612323951344528e-06, "loss": 42.4688, "step": 12282 }, { "epoch": 0.116271144726006, "grad_norm": 249.290771484375, "learning_rate": 1.9612239409616695e-06, "loss": 19.0781, "step": 12283 }, { "epoch": 0.11628061074772106, "grad_norm": 243.54495239257812, "learning_rate": 1.9612154858853977e-06, "loss": 22.9688, "step": 12284 }, { "epoch": 0.1162900767694361, "grad_norm": 317.0309753417969, "learning_rate": 1.9612070299056463e-06, "loss": 19.4609, "step": 12285 }, { "epoch": 0.11629954279115116, "grad_norm": 160.337646484375, "learning_rate": 1.961198573022423e-06, "loss": 17.5859, "step": 12286 }, { "epoch": 0.11630900881286621, "grad_norm": 978.6708374023438, "learning_rate": 1.961190115235735e-06, "loss": 66.375, "step": 12287 }, { "epoch": 0.11631847483458127, "grad_norm": 350.0788879394531, "learning_rate": 1.9611816565455916e-06, "loss": 34.7969, "step": 12288 }, { "epoch": 0.11632794085629633, "grad_norm": 221.34735107421875, "learning_rate": 1.961173196951999e-06, "loss": 24.1641, "step": 12289 }, { "epoch": 0.11633740687801138, "grad_norm": 368.393798828125, "learning_rate": 1.9611647364549667e-06, "loss": 26.7656, "step": 12290 }, { "epoch": 0.11634687289972644, "grad_norm": 353.0790100097656, "learning_rate": 1.961156275054502e-06, "loss": 43.8438, "step": 12291 }, { "epoch": 0.11635633892144148, "grad_norm": 420.1966552734375, "learning_rate": 1.961147812750613e-06, "loss": 25.4688, "step": 12292 }, { "epoch": 0.11636580494315654, "grad_norm": 241.77587890625, "learning_rate": 1.9611393495433073e-06, "loss": 10.6055, "step": 12293 }, { "epoch": 0.11637527096487159, "grad_norm": 387.2259521484375, "learning_rate": 1.9611308854325936e-06, "loss": 31.6875, "step": 12294 }, { "epoch": 0.11638473698658665, "grad_norm": 306.1914367675781, "learning_rate": 1.961122420418479e-06, "loss": 30.2656, "step": 12295 }, { "epoch": 0.1163942030083017, "grad_norm": 264.3464660644531, "learning_rate": 1.961113954500972e-06, "loss": 22.4531, "step": 12296 }, { "epoch": 0.11640366903001675, "grad_norm": 296.158203125, "learning_rate": 1.9611054876800805e-06, "loss": 21.5312, "step": 12297 }, { "epoch": 0.11641313505173181, "grad_norm": 997.0905151367188, "learning_rate": 1.9610970199558124e-06, "loss": 58.6406, "step": 12298 }, { "epoch": 0.11642260107344686, "grad_norm": 314.2774963378906, "learning_rate": 1.9610885513281755e-06, "loss": 23.8281, "step": 12299 }, { "epoch": 0.11643206709516192, "grad_norm": 553.2461547851562, "learning_rate": 1.9610800817971777e-06, "loss": 38.3281, "step": 12300 }, { "epoch": 0.11644153311687697, "grad_norm": 219.14501953125, "learning_rate": 1.961071611362827e-06, "loss": 18.4219, "step": 12301 }, { "epoch": 0.11645099913859203, "grad_norm": 422.981689453125, "learning_rate": 1.961063140025132e-06, "loss": 16.5078, "step": 12302 }, { "epoch": 0.11646046516030707, "grad_norm": 373.5162658691406, "learning_rate": 1.9610546677841002e-06, "loss": 32.6406, "step": 12303 }, { "epoch": 0.11646993118202213, "grad_norm": 336.98138427734375, "learning_rate": 1.9610461946397394e-06, "loss": 15.4453, "step": 12304 }, { "epoch": 0.11647939720373719, "grad_norm": 430.7448425292969, "learning_rate": 1.961037720592058e-06, "loss": 38.8125, "step": 12305 }, { "epoch": 0.11648886322545224, "grad_norm": 360.6861877441406, "learning_rate": 1.961029245641063e-06, "loss": 9.3359, "step": 12306 }, { "epoch": 0.1164983292471673, "grad_norm": 689.6343994140625, "learning_rate": 1.961020769786763e-06, "loss": 41.0469, "step": 12307 }, { "epoch": 0.11650779526888234, "grad_norm": 442.7722473144531, "learning_rate": 1.9610122930291667e-06, "loss": 47.7969, "step": 12308 }, { "epoch": 0.1165172612905974, "grad_norm": 475.2471923828125, "learning_rate": 1.961003815368281e-06, "loss": 42.6562, "step": 12309 }, { "epoch": 0.11652672731231245, "grad_norm": 260.97808837890625, "learning_rate": 1.960995336804114e-06, "loss": 25.1484, "step": 12310 }, { "epoch": 0.11653619333402751, "grad_norm": 399.7123107910156, "learning_rate": 1.960986857336674e-06, "loss": 27.4062, "step": 12311 }, { "epoch": 0.11654565935574256, "grad_norm": 483.0610046386719, "learning_rate": 1.960978376965969e-06, "loss": 45.8906, "step": 12312 }, { "epoch": 0.11655512537745762, "grad_norm": 242.87887573242188, "learning_rate": 1.960969895692007e-06, "loss": 22.8828, "step": 12313 }, { "epoch": 0.11656459139917268, "grad_norm": 358.8490905761719, "learning_rate": 1.9609614135147958e-06, "loss": 34.2188, "step": 12314 }, { "epoch": 0.11657405742088772, "grad_norm": 704.482177734375, "learning_rate": 1.960952930434343e-06, "loss": 37.9688, "step": 12315 }, { "epoch": 0.11658352344260278, "grad_norm": 234.72039794921875, "learning_rate": 1.9609444464506573e-06, "loss": 19.2422, "step": 12316 }, { "epoch": 0.11659298946431783, "grad_norm": 3.4725899696350098, "learning_rate": 1.9609359615637462e-06, "loss": 0.9775, "step": 12317 }, { "epoch": 0.11660245548603289, "grad_norm": 426.37841796875, "learning_rate": 1.960927475773618e-06, "loss": 33.1094, "step": 12318 }, { "epoch": 0.11661192150774793, "grad_norm": 370.58197021484375, "learning_rate": 1.9609189890802804e-06, "loss": 25.3438, "step": 12319 }, { "epoch": 0.116621387529463, "grad_norm": 344.8568115234375, "learning_rate": 1.960910501483741e-06, "loss": 9.6523, "step": 12320 }, { "epoch": 0.11663085355117804, "grad_norm": 683.9117431640625, "learning_rate": 1.960902012984009e-06, "loss": 23.6484, "step": 12321 }, { "epoch": 0.1166403195728931, "grad_norm": 566.2504272460938, "learning_rate": 1.960893523581091e-06, "loss": 26.668, "step": 12322 }, { "epoch": 0.11664978559460816, "grad_norm": 721.2491455078125, "learning_rate": 1.9608850332749956e-06, "loss": 44.7969, "step": 12323 }, { "epoch": 0.1166592516163232, "grad_norm": 190.11752319335938, "learning_rate": 1.960876542065731e-06, "loss": 8.5742, "step": 12324 }, { "epoch": 0.11666871763803827, "grad_norm": 572.5359497070312, "learning_rate": 1.960868049953305e-06, "loss": 29.2578, "step": 12325 }, { "epoch": 0.11667818365975331, "grad_norm": 169.81248474121094, "learning_rate": 1.9608595569377256e-06, "loss": 17.9219, "step": 12326 }, { "epoch": 0.11668764968146837, "grad_norm": 266.4734191894531, "learning_rate": 1.960851063019001e-06, "loss": 17.2344, "step": 12327 }, { "epoch": 0.11669711570318342, "grad_norm": 315.4670715332031, "learning_rate": 1.960842568197138e-06, "loss": 36.5, "step": 12328 }, { "epoch": 0.11670658172489848, "grad_norm": 2.9358694553375244, "learning_rate": 1.960834072472146e-06, "loss": 0.9717, "step": 12329 }, { "epoch": 0.11671604774661352, "grad_norm": 323.96734619140625, "learning_rate": 1.960825575844033e-06, "loss": 24.7656, "step": 12330 }, { "epoch": 0.11672551376832858, "grad_norm": 759.3746337890625, "learning_rate": 1.9608170783128057e-06, "loss": 38.5938, "step": 12331 }, { "epoch": 0.11673497979004364, "grad_norm": 378.3561706542969, "learning_rate": 1.9608085798784734e-06, "loss": 11.418, "step": 12332 }, { "epoch": 0.11674444581175869, "grad_norm": 988.5194702148438, "learning_rate": 1.960800080541043e-06, "loss": 20.2383, "step": 12333 }, { "epoch": 0.11675391183347375, "grad_norm": 500.0921325683594, "learning_rate": 1.9607915803005234e-06, "loss": 45.8906, "step": 12334 }, { "epoch": 0.1167633778551888, "grad_norm": 488.25640869140625, "learning_rate": 1.960783079156922e-06, "loss": 32.7969, "step": 12335 }, { "epoch": 0.11677284387690386, "grad_norm": 376.09881591796875, "learning_rate": 1.960774577110247e-06, "loss": 33.5469, "step": 12336 }, { "epoch": 0.1167823098986189, "grad_norm": 478.92889404296875, "learning_rate": 1.9607660741605067e-06, "loss": 45.5312, "step": 12337 }, { "epoch": 0.11679177592033396, "grad_norm": 268.62835693359375, "learning_rate": 1.9607575703077084e-06, "loss": 9.0312, "step": 12338 }, { "epoch": 0.11680124194204901, "grad_norm": 230.94735717773438, "learning_rate": 1.960749065551861e-06, "loss": 22.4219, "step": 12339 }, { "epoch": 0.11681070796376407, "grad_norm": 219.03610229492188, "learning_rate": 1.9607405598929718e-06, "loss": 19.5547, "step": 12340 }, { "epoch": 0.11682017398547913, "grad_norm": 212.8760223388672, "learning_rate": 1.9607320533310487e-06, "loss": 22.9141, "step": 12341 }, { "epoch": 0.11682964000719417, "grad_norm": 294.4521789550781, "learning_rate": 1.9607235458661e-06, "loss": 12.8047, "step": 12342 }, { "epoch": 0.11683910602890923, "grad_norm": 407.64306640625, "learning_rate": 1.9607150374981337e-06, "loss": 21.8359, "step": 12343 }, { "epoch": 0.11684857205062428, "grad_norm": 281.12567138671875, "learning_rate": 1.960706528227158e-06, "loss": 18.8516, "step": 12344 }, { "epoch": 0.11685803807233934, "grad_norm": 642.4627685546875, "learning_rate": 1.9606980180531805e-06, "loss": 30.1094, "step": 12345 }, { "epoch": 0.11686750409405439, "grad_norm": 301.5412902832031, "learning_rate": 1.9606895069762093e-06, "loss": 19.1797, "step": 12346 }, { "epoch": 0.11687697011576945, "grad_norm": 446.1008605957031, "learning_rate": 1.9606809949962526e-06, "loss": 42.5938, "step": 12347 }, { "epoch": 0.1168864361374845, "grad_norm": 226.6786651611328, "learning_rate": 1.9606724821133182e-06, "loss": 22.4531, "step": 12348 }, { "epoch": 0.11689590215919955, "grad_norm": 183.2830810546875, "learning_rate": 1.960663968327414e-06, "loss": 12.3359, "step": 12349 }, { "epoch": 0.11690536818091461, "grad_norm": 166.5606231689453, "learning_rate": 1.9606554536385483e-06, "loss": 15.8516, "step": 12350 }, { "epoch": 0.11691483420262966, "grad_norm": 268.03277587890625, "learning_rate": 1.9606469380467293e-06, "loss": 34.9531, "step": 12351 }, { "epoch": 0.11692430022434472, "grad_norm": 226.70071411132812, "learning_rate": 1.960638421551964e-06, "loss": 17.6641, "step": 12352 }, { "epoch": 0.11693376624605976, "grad_norm": 374.13177490234375, "learning_rate": 1.9606299041542616e-06, "loss": 30.2656, "step": 12353 }, { "epoch": 0.11694323226777482, "grad_norm": 214.8070831298828, "learning_rate": 1.9606213858536295e-06, "loss": 19.2578, "step": 12354 }, { "epoch": 0.11695269828948987, "grad_norm": 1758.8787841796875, "learning_rate": 1.960612866650076e-06, "loss": 25.8477, "step": 12355 }, { "epoch": 0.11696216431120493, "grad_norm": 327.05633544921875, "learning_rate": 1.9606043465436086e-06, "loss": 22.0391, "step": 12356 }, { "epoch": 0.11697163033291999, "grad_norm": 1172.8447265625, "learning_rate": 1.9605958255342354e-06, "loss": 54.375, "step": 12357 }, { "epoch": 0.11698109635463504, "grad_norm": 451.2493896484375, "learning_rate": 1.960587303621965e-06, "loss": 17.5273, "step": 12358 }, { "epoch": 0.1169905623763501, "grad_norm": 404.15252685546875, "learning_rate": 1.960578780806805e-06, "loss": 50.8594, "step": 12359 }, { "epoch": 0.11700002839806514, "grad_norm": 586.1253662109375, "learning_rate": 1.960570257088763e-06, "loss": 43.6992, "step": 12360 }, { "epoch": 0.1170094944197802, "grad_norm": 234.57632446289062, "learning_rate": 1.9605617324678476e-06, "loss": 18.8281, "step": 12361 }, { "epoch": 0.11701896044149525, "grad_norm": 370.0729064941406, "learning_rate": 1.960553206944067e-06, "loss": 46.3633, "step": 12362 }, { "epoch": 0.11702842646321031, "grad_norm": 158.50160217285156, "learning_rate": 1.9605446805174287e-06, "loss": 13.7773, "step": 12363 }, { "epoch": 0.11703789248492535, "grad_norm": 433.57208251953125, "learning_rate": 1.960536153187941e-06, "loss": 24.3125, "step": 12364 }, { "epoch": 0.11704735850664041, "grad_norm": 318.03680419921875, "learning_rate": 1.9605276249556115e-06, "loss": 19.3594, "step": 12365 }, { "epoch": 0.11705682452835547, "grad_norm": 444.23065185546875, "learning_rate": 1.960519095820449e-06, "loss": 50.8906, "step": 12366 }, { "epoch": 0.11706629055007052, "grad_norm": 259.27960205078125, "learning_rate": 1.9605105657824605e-06, "loss": 23.9219, "step": 12367 }, { "epoch": 0.11707575657178558, "grad_norm": 625.4547119140625, "learning_rate": 1.960502034841655e-06, "loss": 40.8359, "step": 12368 }, { "epoch": 0.11708522259350063, "grad_norm": 494.7817687988281, "learning_rate": 1.9604935029980395e-06, "loss": 33.0234, "step": 12369 }, { "epoch": 0.11709468861521569, "grad_norm": 294.5086364746094, "learning_rate": 1.960484970251623e-06, "loss": 17.6562, "step": 12370 }, { "epoch": 0.11710415463693073, "grad_norm": 3.6723670959472656, "learning_rate": 1.960476436602413e-06, "loss": 0.9531, "step": 12371 }, { "epoch": 0.11711362065864579, "grad_norm": 1389.734130859375, "learning_rate": 1.960467902050418e-06, "loss": 61.9062, "step": 12372 }, { "epoch": 0.11712308668036084, "grad_norm": 235.50791931152344, "learning_rate": 1.960459366595645e-06, "loss": 28.8047, "step": 12373 }, { "epoch": 0.1171325527020759, "grad_norm": 1134.1956787109375, "learning_rate": 1.960450830238103e-06, "loss": 45.7031, "step": 12374 }, { "epoch": 0.11714201872379096, "grad_norm": 240.77517700195312, "learning_rate": 1.9604422929777996e-06, "loss": 14.2852, "step": 12375 }, { "epoch": 0.117151484745506, "grad_norm": 385.39898681640625, "learning_rate": 1.9604337548147434e-06, "loss": 42.7422, "step": 12376 }, { "epoch": 0.11716095076722106, "grad_norm": 188.64707946777344, "learning_rate": 1.9604252157489414e-06, "loss": 23.2031, "step": 12377 }, { "epoch": 0.11717041678893611, "grad_norm": 183.63308715820312, "learning_rate": 1.9604166757804023e-06, "loss": 16.9531, "step": 12378 }, { "epoch": 0.11717988281065117, "grad_norm": 494.67193603515625, "learning_rate": 1.960408134909134e-06, "loss": 47.1016, "step": 12379 }, { "epoch": 0.11718934883236622, "grad_norm": 306.45196533203125, "learning_rate": 1.9603995931351445e-06, "loss": 17.1094, "step": 12380 }, { "epoch": 0.11719881485408128, "grad_norm": 635.1378784179688, "learning_rate": 1.960391050458442e-06, "loss": 47.6094, "step": 12381 }, { "epoch": 0.11720828087579632, "grad_norm": 513.6050415039062, "learning_rate": 1.960382506879035e-06, "loss": 37.2188, "step": 12382 }, { "epoch": 0.11721774689751138, "grad_norm": 495.2779541015625, "learning_rate": 1.96037396239693e-06, "loss": 41.418, "step": 12383 }, { "epoch": 0.11722721291922644, "grad_norm": 176.64419555664062, "learning_rate": 1.960365417012136e-06, "loss": 18.2188, "step": 12384 }, { "epoch": 0.11723667894094149, "grad_norm": 279.817138671875, "learning_rate": 1.960356870724661e-06, "loss": 11.1992, "step": 12385 }, { "epoch": 0.11724614496265655, "grad_norm": 233.22576904296875, "learning_rate": 1.9603483235345135e-06, "loss": 16.5391, "step": 12386 }, { "epoch": 0.1172556109843716, "grad_norm": 842.42333984375, "learning_rate": 1.9603397754417007e-06, "loss": 25.0078, "step": 12387 }, { "epoch": 0.11726507700608665, "grad_norm": 321.392822265625, "learning_rate": 1.9603312264462314e-06, "loss": 24.7578, "step": 12388 }, { "epoch": 0.1172745430278017, "grad_norm": 315.1275329589844, "learning_rate": 1.9603226765481127e-06, "loss": 19.3047, "step": 12389 }, { "epoch": 0.11728400904951676, "grad_norm": 731.4098510742188, "learning_rate": 1.9603141257473533e-06, "loss": 52.3906, "step": 12390 }, { "epoch": 0.11729347507123182, "grad_norm": 163.94154357910156, "learning_rate": 1.9603055740439612e-06, "loss": 20.75, "step": 12391 }, { "epoch": 0.11730294109294687, "grad_norm": 586.4993286132812, "learning_rate": 1.960297021437944e-06, "loss": 38.6328, "step": 12392 }, { "epoch": 0.11731240711466193, "grad_norm": 447.51849365234375, "learning_rate": 1.9602884679293106e-06, "loss": 39.5312, "step": 12393 }, { "epoch": 0.11732187313637697, "grad_norm": 233.42306518554688, "learning_rate": 1.960279913518068e-06, "loss": 11.9688, "step": 12394 }, { "epoch": 0.11733133915809203, "grad_norm": 487.4021301269531, "learning_rate": 1.9602713582042253e-06, "loss": 27.4492, "step": 12395 }, { "epoch": 0.11734080517980708, "grad_norm": 376.8360595703125, "learning_rate": 1.9602628019877897e-06, "loss": 18.8359, "step": 12396 }, { "epoch": 0.11735027120152214, "grad_norm": 542.5848388671875, "learning_rate": 1.9602542448687696e-06, "loss": 49.0, "step": 12397 }, { "epoch": 0.11735973722323718, "grad_norm": 232.25555419921875, "learning_rate": 1.960245686847173e-06, "loss": 18.9844, "step": 12398 }, { "epoch": 0.11736920324495224, "grad_norm": 285.953857421875, "learning_rate": 1.9602371279230076e-06, "loss": 19.0781, "step": 12399 }, { "epoch": 0.1173786692666673, "grad_norm": 419.6654052734375, "learning_rate": 1.9602285680962824e-06, "loss": 22.8125, "step": 12400 }, { "epoch": 0.11738813528838235, "grad_norm": 316.36590576171875, "learning_rate": 1.9602200073670043e-06, "loss": 28.875, "step": 12401 }, { "epoch": 0.11739760131009741, "grad_norm": 3.524275779724121, "learning_rate": 1.9602114457351816e-06, "loss": 0.8794, "step": 12402 }, { "epoch": 0.11740706733181246, "grad_norm": 522.2174682617188, "learning_rate": 1.9602028832008233e-06, "loss": 32.3906, "step": 12403 }, { "epoch": 0.11741653335352752, "grad_norm": 682.33984375, "learning_rate": 1.960194319763936e-06, "loss": 60.8516, "step": 12404 }, { "epoch": 0.11742599937524256, "grad_norm": 829.5569458007812, "learning_rate": 1.960185755424529e-06, "loss": 53.8594, "step": 12405 }, { "epoch": 0.11743546539695762, "grad_norm": 296.19146728515625, "learning_rate": 1.96017719018261e-06, "loss": 28.7891, "step": 12406 }, { "epoch": 0.11744493141867267, "grad_norm": 406.997314453125, "learning_rate": 1.9601686240381868e-06, "loss": 25.0234, "step": 12407 }, { "epoch": 0.11745439744038773, "grad_norm": 3.209547996520996, "learning_rate": 1.9601600569912672e-06, "loss": 0.8462, "step": 12408 }, { "epoch": 0.11746386346210279, "grad_norm": 950.633544921875, "learning_rate": 1.96015148904186e-06, "loss": 44.8203, "step": 12409 }, { "epoch": 0.11747332948381783, "grad_norm": 393.5982360839844, "learning_rate": 1.960142920189973e-06, "loss": 23.1797, "step": 12410 }, { "epoch": 0.1174827955055329, "grad_norm": 237.84645080566406, "learning_rate": 1.9601343504356138e-06, "loss": 15.4648, "step": 12411 }, { "epoch": 0.11749226152724794, "grad_norm": 387.9979553222656, "learning_rate": 1.9601257797787906e-06, "loss": 38.1875, "step": 12412 }, { "epoch": 0.117501727548963, "grad_norm": 429.833251953125, "learning_rate": 1.960117208219512e-06, "loss": 27.3438, "step": 12413 }, { "epoch": 0.11751119357067805, "grad_norm": 483.8321838378906, "learning_rate": 1.9601086357577855e-06, "loss": 35.3125, "step": 12414 }, { "epoch": 0.1175206595923931, "grad_norm": 374.3832702636719, "learning_rate": 1.9601000623936193e-06, "loss": 12.2461, "step": 12415 }, { "epoch": 0.11753012561410815, "grad_norm": 268.9271545410156, "learning_rate": 1.9600914881270216e-06, "loss": 27.0625, "step": 12416 }, { "epoch": 0.11753959163582321, "grad_norm": 551.5211181640625, "learning_rate": 1.960082912958e-06, "loss": 31.375, "step": 12417 }, { "epoch": 0.11754905765753827, "grad_norm": 2.946882963180542, "learning_rate": 1.9600743368865637e-06, "loss": 0.9072, "step": 12418 }, { "epoch": 0.11755852367925332, "grad_norm": 364.9281311035156, "learning_rate": 1.9600657599127196e-06, "loss": 38.5469, "step": 12419 }, { "epoch": 0.11756798970096838, "grad_norm": 379.7557067871094, "learning_rate": 1.9600571820364758e-06, "loss": 22.1484, "step": 12420 }, { "epoch": 0.11757745572268342, "grad_norm": 353.2918701171875, "learning_rate": 1.9600486032578407e-06, "loss": 30.1094, "step": 12421 }, { "epoch": 0.11758692174439848, "grad_norm": 3.765556812286377, "learning_rate": 1.960040023576823e-06, "loss": 1.0791, "step": 12422 }, { "epoch": 0.11759638776611353, "grad_norm": 321.40576171875, "learning_rate": 1.9600314429934297e-06, "loss": 21.2969, "step": 12423 }, { "epoch": 0.11760585378782859, "grad_norm": 408.2489929199219, "learning_rate": 1.9600228615076693e-06, "loss": 29.7422, "step": 12424 }, { "epoch": 0.11761531980954364, "grad_norm": 537.9639282226562, "learning_rate": 1.9600142791195503e-06, "loss": 38.9844, "step": 12425 }, { "epoch": 0.1176247858312587, "grad_norm": 175.30056762695312, "learning_rate": 1.96000569582908e-06, "loss": 20.0391, "step": 12426 }, { "epoch": 0.11763425185297376, "grad_norm": 391.8290710449219, "learning_rate": 1.9599971116362662e-06, "loss": 45.8906, "step": 12427 }, { "epoch": 0.1176437178746888, "grad_norm": 3.3886630535125732, "learning_rate": 1.9599885265411185e-06, "loss": 0.9143, "step": 12428 }, { "epoch": 0.11765318389640386, "grad_norm": 560.2987670898438, "learning_rate": 1.9599799405436435e-06, "loss": 38.5, "step": 12429 }, { "epoch": 0.11766264991811891, "grad_norm": 192.4230194091797, "learning_rate": 1.95997135364385e-06, "loss": 24.8594, "step": 12430 }, { "epoch": 0.11767211593983397, "grad_norm": 232.3142547607422, "learning_rate": 1.959962765841746e-06, "loss": 21.6172, "step": 12431 }, { "epoch": 0.11768158196154901, "grad_norm": 307.54107666015625, "learning_rate": 1.959954177137339e-06, "loss": 25.4336, "step": 12432 }, { "epoch": 0.11769104798326407, "grad_norm": 201.4305419921875, "learning_rate": 1.959945587530638e-06, "loss": 19.1328, "step": 12433 }, { "epoch": 0.11770051400497913, "grad_norm": 366.0924072265625, "learning_rate": 1.9599369970216505e-06, "loss": 21.8906, "step": 12434 }, { "epoch": 0.11770998002669418, "grad_norm": 282.861328125, "learning_rate": 1.9599284056103847e-06, "loss": 23.8594, "step": 12435 }, { "epoch": 0.11771944604840924, "grad_norm": 877.1126708984375, "learning_rate": 1.9599198132968485e-06, "loss": 44.7031, "step": 12436 }, { "epoch": 0.11772891207012429, "grad_norm": 353.4941711425781, "learning_rate": 1.95991122008105e-06, "loss": 11.7383, "step": 12437 }, { "epoch": 0.11773837809183935, "grad_norm": 408.1835021972656, "learning_rate": 1.9599026259629974e-06, "loss": 38.9297, "step": 12438 }, { "epoch": 0.11774784411355439, "grad_norm": 2.9579129219055176, "learning_rate": 1.959894030942699e-06, "loss": 0.936, "step": 12439 }, { "epoch": 0.11775731013526945, "grad_norm": 3.474785327911377, "learning_rate": 1.9598854350201626e-06, "loss": 0.938, "step": 12440 }, { "epoch": 0.1177667761569845, "grad_norm": 3.377340078353882, "learning_rate": 1.9598768381953966e-06, "loss": 0.9202, "step": 12441 }, { "epoch": 0.11777624217869956, "grad_norm": 461.2964782714844, "learning_rate": 1.9598682404684084e-06, "loss": 18.6641, "step": 12442 }, { "epoch": 0.11778570820041462, "grad_norm": 314.5745544433594, "learning_rate": 1.959859641839207e-06, "loss": 29.5, "step": 12443 }, { "epoch": 0.11779517422212966, "grad_norm": 234.4283447265625, "learning_rate": 1.959851042307799e-06, "loss": 22.9531, "step": 12444 }, { "epoch": 0.11780464024384472, "grad_norm": 589.3839111328125, "learning_rate": 1.9598424418741947e-06, "loss": 14.9609, "step": 12445 }, { "epoch": 0.11781410626555977, "grad_norm": 837.8843383789062, "learning_rate": 1.9598338405384e-06, "loss": 48.7344, "step": 12446 }, { "epoch": 0.11782357228727483, "grad_norm": 175.9969024658203, "learning_rate": 1.9598252383004245e-06, "loss": 16.2891, "step": 12447 }, { "epoch": 0.11783303830898988, "grad_norm": 474.00469970703125, "learning_rate": 1.9598166351602753e-06, "loss": 17.6797, "step": 12448 }, { "epoch": 0.11784250433070494, "grad_norm": 318.4284973144531, "learning_rate": 1.959808031117961e-06, "loss": 21.7031, "step": 12449 }, { "epoch": 0.11785197035241998, "grad_norm": 238.19793701171875, "learning_rate": 1.95979942617349e-06, "loss": 21.0078, "step": 12450 }, { "epoch": 0.11786143637413504, "grad_norm": 389.4842834472656, "learning_rate": 1.959790820326869e-06, "loss": 26.1797, "step": 12451 }, { "epoch": 0.1178709023958501, "grad_norm": 345.2535400390625, "learning_rate": 1.959782213578108e-06, "loss": 23.2344, "step": 12452 }, { "epoch": 0.11788036841756515, "grad_norm": 256.04901123046875, "learning_rate": 1.959773605927214e-06, "loss": 16.9023, "step": 12453 }, { "epoch": 0.11788983443928021, "grad_norm": 647.462890625, "learning_rate": 1.9597649973741953e-06, "loss": 18.6797, "step": 12454 }, { "epoch": 0.11789930046099525, "grad_norm": 403.5340270996094, "learning_rate": 1.9597563879190597e-06, "loss": 21.6953, "step": 12455 }, { "epoch": 0.11790876648271031, "grad_norm": 364.8856201171875, "learning_rate": 1.9597477775618154e-06, "loss": 48.0547, "step": 12456 }, { "epoch": 0.11791823250442536, "grad_norm": 342.1200866699219, "learning_rate": 1.959739166302471e-06, "loss": 39.0156, "step": 12457 }, { "epoch": 0.11792769852614042, "grad_norm": 255.42831420898438, "learning_rate": 1.959730554141034e-06, "loss": 19.6719, "step": 12458 }, { "epoch": 0.11793716454785547, "grad_norm": 250.64439392089844, "learning_rate": 1.9597219410775128e-06, "loss": 20.3242, "step": 12459 }, { "epoch": 0.11794663056957053, "grad_norm": 481.6292419433594, "learning_rate": 1.9597133271119153e-06, "loss": 23.6641, "step": 12460 }, { "epoch": 0.11795609659128559, "grad_norm": 307.3537292480469, "learning_rate": 1.9597047122442498e-06, "loss": 13.6953, "step": 12461 }, { "epoch": 0.11796556261300063, "grad_norm": 340.2870788574219, "learning_rate": 1.9596960964745242e-06, "loss": 22.8281, "step": 12462 }, { "epoch": 0.11797502863471569, "grad_norm": 341.7785949707031, "learning_rate": 1.959687479802747e-06, "loss": 21.625, "step": 12463 }, { "epoch": 0.11798449465643074, "grad_norm": 417.7615051269531, "learning_rate": 1.9596788622289257e-06, "loss": 23.8047, "step": 12464 }, { "epoch": 0.1179939606781458, "grad_norm": 289.7916259765625, "learning_rate": 1.959670243753069e-06, "loss": 18.418, "step": 12465 }, { "epoch": 0.11800342669986084, "grad_norm": 248.77560424804688, "learning_rate": 1.959661624375184e-06, "loss": 30.1875, "step": 12466 }, { "epoch": 0.1180128927215759, "grad_norm": 270.22125244140625, "learning_rate": 1.9596530040952805e-06, "loss": 17.8672, "step": 12467 }, { "epoch": 0.11802235874329095, "grad_norm": 376.4422607421875, "learning_rate": 1.959644382913365e-06, "loss": 38.0938, "step": 12468 }, { "epoch": 0.11803182476500601, "grad_norm": 666.9505004882812, "learning_rate": 1.959635760829446e-06, "loss": 72.0625, "step": 12469 }, { "epoch": 0.11804129078672107, "grad_norm": 326.1933288574219, "learning_rate": 1.959627137843532e-06, "loss": 20.4336, "step": 12470 }, { "epoch": 0.11805075680843612, "grad_norm": 428.2121887207031, "learning_rate": 1.959618513955631e-06, "loss": 27.3828, "step": 12471 }, { "epoch": 0.11806022283015118, "grad_norm": 211.4190673828125, "learning_rate": 1.9596098891657512e-06, "loss": 8.2344, "step": 12472 }, { "epoch": 0.11806968885186622, "grad_norm": 502.3570861816406, "learning_rate": 1.9596012634739004e-06, "loss": 48.3125, "step": 12473 }, { "epoch": 0.11807915487358128, "grad_norm": 484.7079772949219, "learning_rate": 1.959592636880087e-06, "loss": 53.2188, "step": 12474 }, { "epoch": 0.11808862089529633, "grad_norm": 643.5562133789062, "learning_rate": 1.9595840093843185e-06, "loss": 45.7344, "step": 12475 }, { "epoch": 0.11809808691701139, "grad_norm": 355.9582824707031, "learning_rate": 1.959575380986604e-06, "loss": 38.0156, "step": 12476 }, { "epoch": 0.11810755293872645, "grad_norm": 803.7570190429688, "learning_rate": 1.9595667516869505e-06, "loss": 41.2422, "step": 12477 }, { "epoch": 0.1181170189604415, "grad_norm": 177.93548583984375, "learning_rate": 1.959558121485367e-06, "loss": 20.6875, "step": 12478 }, { "epoch": 0.11812648498215655, "grad_norm": 3.6072168350219727, "learning_rate": 1.9595494903818613e-06, "loss": 1.0024, "step": 12479 }, { "epoch": 0.1181359510038716, "grad_norm": 789.9855346679688, "learning_rate": 1.9595408583764416e-06, "loss": 23.6328, "step": 12480 }, { "epoch": 0.11814541702558666, "grad_norm": 772.53173828125, "learning_rate": 1.9595322254691156e-06, "loss": 74.9375, "step": 12481 }, { "epoch": 0.1181548830473017, "grad_norm": 224.50225830078125, "learning_rate": 1.959523591659892e-06, "loss": 19.9297, "step": 12482 }, { "epoch": 0.11816434906901677, "grad_norm": 219.3191680908203, "learning_rate": 1.9595149569487786e-06, "loss": 19.5469, "step": 12483 }, { "epoch": 0.11817381509073181, "grad_norm": 333.986328125, "learning_rate": 1.9595063213357833e-06, "loss": 30.2969, "step": 12484 }, { "epoch": 0.11818328111244687, "grad_norm": 308.228515625, "learning_rate": 1.959497684820915e-06, "loss": 20.3438, "step": 12485 }, { "epoch": 0.11819274713416193, "grad_norm": 340.3928527832031, "learning_rate": 1.959489047404181e-06, "loss": 34.4062, "step": 12486 }, { "epoch": 0.11820221315587698, "grad_norm": 246.6578369140625, "learning_rate": 1.9594804090855893e-06, "loss": 27.0391, "step": 12487 }, { "epoch": 0.11821167917759204, "grad_norm": 442.4824523925781, "learning_rate": 1.959471769865149e-06, "loss": 14.9062, "step": 12488 }, { "epoch": 0.11822114519930708, "grad_norm": 3.0629212856292725, "learning_rate": 1.9594631297428674e-06, "loss": 0.8789, "step": 12489 }, { "epoch": 0.11823061122102214, "grad_norm": 428.0545959472656, "learning_rate": 1.9594544887187533e-06, "loss": 24.7422, "step": 12490 }, { "epoch": 0.11824007724273719, "grad_norm": 241.12693786621094, "learning_rate": 1.959445846792814e-06, "loss": 13.4219, "step": 12491 }, { "epoch": 0.11824954326445225, "grad_norm": 490.22406005859375, "learning_rate": 1.959437203965058e-06, "loss": 38.0547, "step": 12492 }, { "epoch": 0.1182590092861673, "grad_norm": 2.8312249183654785, "learning_rate": 1.9594285602354935e-06, "loss": 0.97, "step": 12493 }, { "epoch": 0.11826847530788236, "grad_norm": 262.7889709472656, "learning_rate": 1.9594199156041286e-06, "loss": 15.9375, "step": 12494 }, { "epoch": 0.11827794132959742, "grad_norm": 226.67605590820312, "learning_rate": 1.9594112700709715e-06, "loss": 22.1641, "step": 12495 }, { "epoch": 0.11828740735131246, "grad_norm": 3.186171054840088, "learning_rate": 1.95940262363603e-06, "loss": 0.9941, "step": 12496 }, { "epoch": 0.11829687337302752, "grad_norm": 567.8453979492188, "learning_rate": 1.9593939762993126e-06, "loss": 39.5156, "step": 12497 }, { "epoch": 0.11830633939474257, "grad_norm": 338.14703369140625, "learning_rate": 1.959385328060827e-06, "loss": 33.6406, "step": 12498 }, { "epoch": 0.11831580541645763, "grad_norm": 656.4912109375, "learning_rate": 1.9593766789205822e-06, "loss": 39.5352, "step": 12499 }, { "epoch": 0.11832527143817267, "grad_norm": 606.309326171875, "learning_rate": 1.9593680288785852e-06, "loss": 20.9453, "step": 12500 }, { "epoch": 0.11833473745988773, "grad_norm": 252.27044677734375, "learning_rate": 1.9593593779348446e-06, "loss": 15.2188, "step": 12501 }, { "epoch": 0.11834420348160278, "grad_norm": 616.5584106445312, "learning_rate": 1.959350726089369e-06, "loss": 21.3203, "step": 12502 }, { "epoch": 0.11835366950331784, "grad_norm": 270.822998046875, "learning_rate": 1.959342073342166e-06, "loss": 22.2578, "step": 12503 }, { "epoch": 0.1183631355250329, "grad_norm": 339.8631591796875, "learning_rate": 1.9593334196932438e-06, "loss": 12.6992, "step": 12504 }, { "epoch": 0.11837260154674795, "grad_norm": 3.312018394470215, "learning_rate": 1.9593247651426107e-06, "loss": 1.1294, "step": 12505 }, { "epoch": 0.118382067568463, "grad_norm": 257.2930603027344, "learning_rate": 1.9593161096902746e-06, "loss": 24.2188, "step": 12506 }, { "epoch": 0.11839153359017805, "grad_norm": 852.0159912109375, "learning_rate": 1.959307453336244e-06, "loss": 37.0781, "step": 12507 }, { "epoch": 0.11840099961189311, "grad_norm": 251.69371032714844, "learning_rate": 1.9592987960805265e-06, "loss": 23.1758, "step": 12508 }, { "epoch": 0.11841046563360816, "grad_norm": 475.1164245605469, "learning_rate": 1.9592901379231305e-06, "loss": 20.9688, "step": 12509 }, { "epoch": 0.11841993165532322, "grad_norm": 630.6978759765625, "learning_rate": 1.959281478864064e-06, "loss": 26.0078, "step": 12510 }, { "epoch": 0.11842939767703826, "grad_norm": 267.9891052246094, "learning_rate": 1.959272818903336e-06, "loss": 18.9609, "step": 12511 }, { "epoch": 0.11843886369875332, "grad_norm": 352.7592468261719, "learning_rate": 1.959264158040954e-06, "loss": 20.1641, "step": 12512 }, { "epoch": 0.11844832972046838, "grad_norm": 712.119140625, "learning_rate": 1.959255496276925e-06, "loss": 38.9844, "step": 12513 }, { "epoch": 0.11845779574218343, "grad_norm": 708.5888061523438, "learning_rate": 1.959246833611259e-06, "loss": 43.8828, "step": 12514 }, { "epoch": 0.11846726176389849, "grad_norm": 296.8065185546875, "learning_rate": 1.959238170043963e-06, "loss": 24.9844, "step": 12515 }, { "epoch": 0.11847672778561354, "grad_norm": 188.07882690429688, "learning_rate": 1.9592295055750464e-06, "loss": 19.8125, "step": 12516 }, { "epoch": 0.1184861938073286, "grad_norm": 387.41314697265625, "learning_rate": 1.9592208402045157e-06, "loss": 16.7734, "step": 12517 }, { "epoch": 0.11849565982904364, "grad_norm": 559.8042602539062, "learning_rate": 1.95921217393238e-06, "loss": 27.8438, "step": 12518 }, { "epoch": 0.1185051258507587, "grad_norm": 315.2022399902344, "learning_rate": 1.9592035067586476e-06, "loss": 32.5156, "step": 12519 }, { "epoch": 0.11851459187247376, "grad_norm": 236.75318908691406, "learning_rate": 1.9591948386833255e-06, "loss": 15.6758, "step": 12520 }, { "epoch": 0.11852405789418881, "grad_norm": 251.50946044921875, "learning_rate": 1.9591861697064233e-06, "loss": 15.4922, "step": 12521 }, { "epoch": 0.11853352391590387, "grad_norm": 348.7432556152344, "learning_rate": 1.959177499827948e-06, "loss": 33.4688, "step": 12522 }, { "epoch": 0.11854298993761891, "grad_norm": 1606.3087158203125, "learning_rate": 1.9591688290479087e-06, "loss": 63.625, "step": 12523 }, { "epoch": 0.11855245595933397, "grad_norm": 241.6333770751953, "learning_rate": 1.959160157366313e-06, "loss": 22.7031, "step": 12524 }, { "epoch": 0.11856192198104902, "grad_norm": 403.9260559082031, "learning_rate": 1.959151484783169e-06, "loss": 30.7188, "step": 12525 }, { "epoch": 0.11857138800276408, "grad_norm": 346.64520263671875, "learning_rate": 1.9591428112984855e-06, "loss": 21.4688, "step": 12526 }, { "epoch": 0.11858085402447913, "grad_norm": 267.21246337890625, "learning_rate": 1.9591341369122695e-06, "loss": 17.2734, "step": 12527 }, { "epoch": 0.11859032004619419, "grad_norm": 291.94482421875, "learning_rate": 1.9591254616245305e-06, "loss": 36.8125, "step": 12528 }, { "epoch": 0.11859978606790925, "grad_norm": 216.09115600585938, "learning_rate": 1.9591167854352752e-06, "loss": 11.1016, "step": 12529 }, { "epoch": 0.11860925208962429, "grad_norm": 3.05702543258667, "learning_rate": 1.959108108344513e-06, "loss": 0.9248, "step": 12530 }, { "epoch": 0.11861871811133935, "grad_norm": 400.4447326660156, "learning_rate": 1.959099430352251e-06, "loss": 21.0664, "step": 12531 }, { "epoch": 0.1186281841330544, "grad_norm": 3.297555685043335, "learning_rate": 1.9590907514584985e-06, "loss": 0.8975, "step": 12532 }, { "epoch": 0.11863765015476946, "grad_norm": 213.8272705078125, "learning_rate": 1.959082071663263e-06, "loss": 13.8203, "step": 12533 }, { "epoch": 0.1186471161764845, "grad_norm": 590.6350708007812, "learning_rate": 1.959073390966553e-06, "loss": 60.9531, "step": 12534 }, { "epoch": 0.11865658219819956, "grad_norm": 222.63308715820312, "learning_rate": 1.9590647093683757e-06, "loss": 24.3125, "step": 12535 }, { "epoch": 0.11866604821991461, "grad_norm": 367.02911376953125, "learning_rate": 1.9590560268687402e-06, "loss": 16.875, "step": 12536 }, { "epoch": 0.11867551424162967, "grad_norm": 3.512831211090088, "learning_rate": 1.959047343467655e-06, "loss": 0.9253, "step": 12537 }, { "epoch": 0.11868498026334473, "grad_norm": 297.1220703125, "learning_rate": 1.959038659165127e-06, "loss": 13.5117, "step": 12538 }, { "epoch": 0.11869444628505978, "grad_norm": 3.5804595947265625, "learning_rate": 1.9590299739611655e-06, "loss": 0.9209, "step": 12539 }, { "epoch": 0.11870391230677484, "grad_norm": 995.9786987304688, "learning_rate": 1.9590212878557778e-06, "loss": 27.1016, "step": 12540 }, { "epoch": 0.11871337832848988, "grad_norm": 437.1162109375, "learning_rate": 1.959012600848973e-06, "loss": 29.5, "step": 12541 }, { "epoch": 0.11872284435020494, "grad_norm": 240.74998474121094, "learning_rate": 1.9590039129407584e-06, "loss": 18.2148, "step": 12542 }, { "epoch": 0.11873231037191999, "grad_norm": 4.232676029205322, "learning_rate": 1.9589952241311426e-06, "loss": 1.1045, "step": 12543 }, { "epoch": 0.11874177639363505, "grad_norm": 2.8563342094421387, "learning_rate": 1.9589865344201337e-06, "loss": 1.0029, "step": 12544 }, { "epoch": 0.1187512424153501, "grad_norm": 510.369140625, "learning_rate": 1.9589778438077395e-06, "loss": 45.0625, "step": 12545 }, { "epoch": 0.11876070843706515, "grad_norm": 604.0240478515625, "learning_rate": 1.958969152293969e-06, "loss": 34.625, "step": 12546 }, { "epoch": 0.11877017445878021, "grad_norm": 388.9475402832031, "learning_rate": 1.9589604598788293e-06, "loss": 17.2539, "step": 12547 }, { "epoch": 0.11877964048049526, "grad_norm": 298.2950134277344, "learning_rate": 1.95895176656233e-06, "loss": 15.8203, "step": 12548 }, { "epoch": 0.11878910650221032, "grad_norm": 241.49867248535156, "learning_rate": 1.9589430723444777e-06, "loss": 29.8438, "step": 12549 }, { "epoch": 0.11879857252392537, "grad_norm": 541.2489013671875, "learning_rate": 1.9589343772252816e-06, "loss": 38.6797, "step": 12550 }, { "epoch": 0.11880803854564043, "grad_norm": 431.486083984375, "learning_rate": 1.9589256812047493e-06, "loss": 34.1719, "step": 12551 }, { "epoch": 0.11881750456735547, "grad_norm": 212.97634887695312, "learning_rate": 1.9589169842828897e-06, "loss": 19.5312, "step": 12552 }, { "epoch": 0.11882697058907053, "grad_norm": 207.6826171875, "learning_rate": 1.9589082864597104e-06, "loss": 12.875, "step": 12553 }, { "epoch": 0.11883643661078558, "grad_norm": 415.3200988769531, "learning_rate": 1.9588995877352194e-06, "loss": 57.9531, "step": 12554 }, { "epoch": 0.11884590263250064, "grad_norm": 702.1846313476562, "learning_rate": 1.958890888109425e-06, "loss": 56.9062, "step": 12555 }, { "epoch": 0.1188553686542157, "grad_norm": 560.6973266601562, "learning_rate": 1.9588821875823363e-06, "loss": 41.2344, "step": 12556 }, { "epoch": 0.11886483467593074, "grad_norm": 404.33599853515625, "learning_rate": 1.9588734861539603e-06, "loss": 39.7578, "step": 12557 }, { "epoch": 0.1188743006976458, "grad_norm": 473.55450439453125, "learning_rate": 1.9588647838243056e-06, "loss": 18.4375, "step": 12558 }, { "epoch": 0.11888376671936085, "grad_norm": 634.216552734375, "learning_rate": 1.9588560805933804e-06, "loss": 30.8906, "step": 12559 }, { "epoch": 0.11889323274107591, "grad_norm": 753.8851318359375, "learning_rate": 1.9588473764611926e-06, "loss": 45.3906, "step": 12560 }, { "epoch": 0.11890269876279096, "grad_norm": 374.0474853515625, "learning_rate": 1.9588386714277508e-06, "loss": 20.9297, "step": 12561 }, { "epoch": 0.11891216478450602, "grad_norm": 316.0805358886719, "learning_rate": 1.9588299654930633e-06, "loss": 21.5, "step": 12562 }, { "epoch": 0.11892163080622108, "grad_norm": 593.03564453125, "learning_rate": 1.958821258657138e-06, "loss": 47.9688, "step": 12563 }, { "epoch": 0.11893109682793612, "grad_norm": 418.44781494140625, "learning_rate": 1.958812550919983e-06, "loss": 29.5781, "step": 12564 }, { "epoch": 0.11894056284965118, "grad_norm": 474.8135070800781, "learning_rate": 1.9588038422816062e-06, "loss": 23.7734, "step": 12565 }, { "epoch": 0.11895002887136623, "grad_norm": 317.1666564941406, "learning_rate": 1.9587951327420164e-06, "loss": 41.0938, "step": 12566 }, { "epoch": 0.11895949489308129, "grad_norm": 193.60215759277344, "learning_rate": 1.9587864223012217e-06, "loss": 21.9531, "step": 12567 }, { "epoch": 0.11896896091479633, "grad_norm": 242.1050567626953, "learning_rate": 1.95877771095923e-06, "loss": 16.4805, "step": 12568 }, { "epoch": 0.1189784269365114, "grad_norm": 3.840545892715454, "learning_rate": 1.9587689987160497e-06, "loss": 1.0161, "step": 12569 }, { "epoch": 0.11898789295822644, "grad_norm": 250.2222442626953, "learning_rate": 1.958760285571689e-06, "loss": 22.9766, "step": 12570 }, { "epoch": 0.1189973589799415, "grad_norm": 259.39788818359375, "learning_rate": 1.9587515715261557e-06, "loss": 19.9062, "step": 12571 }, { "epoch": 0.11900682500165656, "grad_norm": 198.34207153320312, "learning_rate": 1.9587428565794582e-06, "loss": 17.1797, "step": 12572 }, { "epoch": 0.1190162910233716, "grad_norm": 3.1476030349731445, "learning_rate": 1.9587341407316054e-06, "loss": 0.9268, "step": 12573 }, { "epoch": 0.11902575704508667, "grad_norm": 230.71533203125, "learning_rate": 1.9587254239826044e-06, "loss": 19.0, "step": 12574 }, { "epoch": 0.11903522306680171, "grad_norm": 4.077130317687988, "learning_rate": 1.958716706332464e-06, "loss": 0.9453, "step": 12575 }, { "epoch": 0.11904468908851677, "grad_norm": 523.7610473632812, "learning_rate": 1.9587079877811925e-06, "loss": 21.9453, "step": 12576 }, { "epoch": 0.11905415511023182, "grad_norm": 209.10943603515625, "learning_rate": 1.9586992683287973e-06, "loss": 23.1719, "step": 12577 }, { "epoch": 0.11906362113194688, "grad_norm": 559.7361450195312, "learning_rate": 1.9586905479752874e-06, "loss": 48.1562, "step": 12578 }, { "epoch": 0.11907308715366192, "grad_norm": 690.4771118164062, "learning_rate": 1.9586818267206712e-06, "loss": 37.6758, "step": 12579 }, { "epoch": 0.11908255317537698, "grad_norm": 863.7325439453125, "learning_rate": 1.958673104564956e-06, "loss": 53.5195, "step": 12580 }, { "epoch": 0.11909201919709204, "grad_norm": 240.59878540039062, "learning_rate": 1.9586643815081506e-06, "loss": 20.6719, "step": 12581 }, { "epoch": 0.11910148521880709, "grad_norm": 2239.25732421875, "learning_rate": 1.958655657550263e-06, "loss": 19.1797, "step": 12582 }, { "epoch": 0.11911095124052215, "grad_norm": 283.17095947265625, "learning_rate": 1.9586469326913015e-06, "loss": 21.1875, "step": 12583 }, { "epoch": 0.1191204172622372, "grad_norm": 350.9524841308594, "learning_rate": 1.958638206931274e-06, "loss": 15.8047, "step": 12584 }, { "epoch": 0.11912988328395226, "grad_norm": 513.2554321289062, "learning_rate": 1.958629480270189e-06, "loss": 42.1719, "step": 12585 }, { "epoch": 0.1191393493056673, "grad_norm": 463.993408203125, "learning_rate": 1.958620752708055e-06, "loss": 46.7031, "step": 12586 }, { "epoch": 0.11914881532738236, "grad_norm": 215.9068603515625, "learning_rate": 1.9586120242448795e-06, "loss": 17.9102, "step": 12587 }, { "epoch": 0.11915828134909741, "grad_norm": 599.3005981445312, "learning_rate": 1.9586032948806712e-06, "loss": 38.1797, "step": 12588 }, { "epoch": 0.11916774737081247, "grad_norm": 438.133544921875, "learning_rate": 1.958594564615438e-06, "loss": 47.8281, "step": 12589 }, { "epoch": 0.11917721339252753, "grad_norm": 414.16058349609375, "learning_rate": 1.9585858334491883e-06, "loss": 25.2812, "step": 12590 }, { "epoch": 0.11918667941424257, "grad_norm": 278.4576110839844, "learning_rate": 1.9585771013819306e-06, "loss": 21.168, "step": 12591 }, { "epoch": 0.11919614543595763, "grad_norm": 264.0628356933594, "learning_rate": 1.9585683684136725e-06, "loss": 20.5781, "step": 12592 }, { "epoch": 0.11920561145767268, "grad_norm": 331.333251953125, "learning_rate": 1.958559634544423e-06, "loss": 17.4844, "step": 12593 }, { "epoch": 0.11921507747938774, "grad_norm": 3.0175671577453613, "learning_rate": 1.958550899774189e-06, "loss": 0.8643, "step": 12594 }, { "epoch": 0.11922454350110279, "grad_norm": 371.89935302734375, "learning_rate": 1.95854216410298e-06, "loss": 47.3438, "step": 12595 }, { "epoch": 0.11923400952281785, "grad_norm": 1402.785888671875, "learning_rate": 1.9585334275308036e-06, "loss": 88.5312, "step": 12596 }, { "epoch": 0.11924347554453289, "grad_norm": 450.6885986328125, "learning_rate": 1.958524690057668e-06, "loss": 23.1016, "step": 12597 }, { "epoch": 0.11925294156624795, "grad_norm": 258.53363037109375, "learning_rate": 1.9585159516835818e-06, "loss": 22.7188, "step": 12598 }, { "epoch": 0.11926240758796301, "grad_norm": 638.29443359375, "learning_rate": 1.958507212408553e-06, "loss": 51.5469, "step": 12599 }, { "epoch": 0.11927187360967806, "grad_norm": 290.3292541503906, "learning_rate": 1.9584984722325892e-06, "loss": 27.0938, "step": 12600 }, { "epoch": 0.11928133963139312, "grad_norm": 239.4757537841797, "learning_rate": 1.9584897311557e-06, "loss": 16.3516, "step": 12601 }, { "epoch": 0.11929080565310816, "grad_norm": 572.6786499023438, "learning_rate": 1.9584809891778925e-06, "loss": 23.9062, "step": 12602 }, { "epoch": 0.11930027167482322, "grad_norm": 457.0514831542969, "learning_rate": 1.9584722462991753e-06, "loss": 50.4375, "step": 12603 }, { "epoch": 0.11930973769653827, "grad_norm": 305.94866943359375, "learning_rate": 1.958463502519556e-06, "loss": 20.2852, "step": 12604 }, { "epoch": 0.11931920371825333, "grad_norm": 363.7744445800781, "learning_rate": 1.9584547578390442e-06, "loss": 20.2734, "step": 12605 }, { "epoch": 0.11932866973996838, "grad_norm": 207.93844604492188, "learning_rate": 1.958446012257647e-06, "loss": 28.0625, "step": 12606 }, { "epoch": 0.11933813576168344, "grad_norm": 950.7186279296875, "learning_rate": 1.9584372657753724e-06, "loss": 66.9609, "step": 12607 }, { "epoch": 0.1193476017833985, "grad_norm": 360.4522705078125, "learning_rate": 1.9584285183922293e-06, "loss": 17.5156, "step": 12608 }, { "epoch": 0.11935706780511354, "grad_norm": 363.8592529296875, "learning_rate": 1.958419770108226e-06, "loss": 36.8281, "step": 12609 }, { "epoch": 0.1193665338268286, "grad_norm": 425.701904296875, "learning_rate": 1.9584110209233704e-06, "loss": 32.2031, "step": 12610 }, { "epoch": 0.11937599984854365, "grad_norm": 242.68923950195312, "learning_rate": 1.958402270837671e-06, "loss": 19.2344, "step": 12611 }, { "epoch": 0.11938546587025871, "grad_norm": 318.7239074707031, "learning_rate": 1.958393519851135e-06, "loss": 18.9375, "step": 12612 }, { "epoch": 0.11939493189197375, "grad_norm": 296.7611999511719, "learning_rate": 1.9583847679637724e-06, "loss": 28.4844, "step": 12613 }, { "epoch": 0.11940439791368881, "grad_norm": 461.30426025390625, "learning_rate": 1.95837601517559e-06, "loss": 29.6641, "step": 12614 }, { "epoch": 0.11941386393540387, "grad_norm": 384.6173400878906, "learning_rate": 1.9583672614865966e-06, "loss": 42.4219, "step": 12615 }, { "epoch": 0.11942332995711892, "grad_norm": 173.589111328125, "learning_rate": 1.9583585068968e-06, "loss": 21.5078, "step": 12616 }, { "epoch": 0.11943279597883398, "grad_norm": 207.0529327392578, "learning_rate": 1.958349751406209e-06, "loss": 19.6172, "step": 12617 }, { "epoch": 0.11944226200054903, "grad_norm": 233.64682006835938, "learning_rate": 1.958340995014832e-06, "loss": 15.1016, "step": 12618 }, { "epoch": 0.11945172802226409, "grad_norm": 272.8251037597656, "learning_rate": 1.958332237722676e-06, "loss": 21.5781, "step": 12619 }, { "epoch": 0.11946119404397913, "grad_norm": 428.06756591796875, "learning_rate": 1.9583234795297506e-06, "loss": 31.0938, "step": 12620 }, { "epoch": 0.11947066006569419, "grad_norm": 360.3064880371094, "learning_rate": 1.958314720436063e-06, "loss": 23.3594, "step": 12621 }, { "epoch": 0.11948012608740924, "grad_norm": 686.0059814453125, "learning_rate": 1.9583059604416226e-06, "loss": 28.9609, "step": 12622 }, { "epoch": 0.1194895921091243, "grad_norm": 694.9517211914062, "learning_rate": 1.9582971995464367e-06, "loss": 34.6406, "step": 12623 }, { "epoch": 0.11949905813083936, "grad_norm": 3.250584363937378, "learning_rate": 1.9582884377505134e-06, "loss": 0.9097, "step": 12624 }, { "epoch": 0.1195085241525544, "grad_norm": 330.4075622558594, "learning_rate": 1.9582796750538616e-06, "loss": 17.1562, "step": 12625 }, { "epoch": 0.11951799017426946, "grad_norm": 298.5489807128906, "learning_rate": 1.9582709114564893e-06, "loss": 17.5391, "step": 12626 }, { "epoch": 0.11952745619598451, "grad_norm": 359.3769226074219, "learning_rate": 1.9582621469584047e-06, "loss": 23.4219, "step": 12627 }, { "epoch": 0.11953692221769957, "grad_norm": 263.82977294921875, "learning_rate": 1.9582533815596158e-06, "loss": 38.4844, "step": 12628 }, { "epoch": 0.11954638823941462, "grad_norm": 565.5381469726562, "learning_rate": 1.9582446152601313e-06, "loss": 40.9688, "step": 12629 }, { "epoch": 0.11955585426112968, "grad_norm": 157.33409118652344, "learning_rate": 1.958235848059959e-06, "loss": 21.2734, "step": 12630 }, { "epoch": 0.11956532028284472, "grad_norm": 283.2381591796875, "learning_rate": 1.9582270799591075e-06, "loss": 18.1016, "step": 12631 }, { "epoch": 0.11957478630455978, "grad_norm": 310.2468566894531, "learning_rate": 1.9582183109575846e-06, "loss": 21.7266, "step": 12632 }, { "epoch": 0.11958425232627484, "grad_norm": 497.8782653808594, "learning_rate": 1.9582095410553993e-06, "loss": 54.0, "step": 12633 }, { "epoch": 0.11959371834798989, "grad_norm": 159.71453857421875, "learning_rate": 1.9582007702525587e-06, "loss": 13.1406, "step": 12634 }, { "epoch": 0.11960318436970495, "grad_norm": 684.3792724609375, "learning_rate": 1.9581919985490723e-06, "loss": 62.8359, "step": 12635 }, { "epoch": 0.11961265039142, "grad_norm": 442.0411071777344, "learning_rate": 1.9581832259449476e-06, "loss": 19.6172, "step": 12636 }, { "epoch": 0.11962211641313505, "grad_norm": 3.187875986099243, "learning_rate": 1.9581744524401927e-06, "loss": 0.9219, "step": 12637 }, { "epoch": 0.1196315824348501, "grad_norm": 939.1146850585938, "learning_rate": 1.9581656780348164e-06, "loss": 45.6875, "step": 12638 }, { "epoch": 0.11964104845656516, "grad_norm": 3.0466225147247314, "learning_rate": 1.958156902728827e-06, "loss": 0.9688, "step": 12639 }, { "epoch": 0.1196505144782802, "grad_norm": 722.2459106445312, "learning_rate": 1.958148126522232e-06, "loss": 35.75, "step": 12640 }, { "epoch": 0.11965998049999527, "grad_norm": 485.54193115234375, "learning_rate": 1.95813934941504e-06, "loss": 20.7656, "step": 12641 }, { "epoch": 0.11966944652171033, "grad_norm": 385.2447814941406, "learning_rate": 1.9581305714072598e-06, "loss": 40.8516, "step": 12642 }, { "epoch": 0.11967891254342537, "grad_norm": 268.1156921386719, "learning_rate": 1.958121792498899e-06, "loss": 25.0312, "step": 12643 }, { "epoch": 0.11968837856514043, "grad_norm": 378.1156311035156, "learning_rate": 1.958113012689966e-06, "loss": 9.6367, "step": 12644 }, { "epoch": 0.11969784458685548, "grad_norm": 659.4326171875, "learning_rate": 1.958104231980469e-06, "loss": 19.2344, "step": 12645 }, { "epoch": 0.11970731060857054, "grad_norm": 277.9979248046875, "learning_rate": 1.9580954503704166e-06, "loss": 31.3906, "step": 12646 }, { "epoch": 0.11971677663028558, "grad_norm": 786.9117431640625, "learning_rate": 1.9580866678598163e-06, "loss": 22.6953, "step": 12647 }, { "epoch": 0.11972624265200064, "grad_norm": 487.9312744140625, "learning_rate": 1.9580778844486778e-06, "loss": 15.7188, "step": 12648 }, { "epoch": 0.11973570867371569, "grad_norm": 232.4349822998047, "learning_rate": 1.9580691001370077e-06, "loss": 28.7969, "step": 12649 }, { "epoch": 0.11974517469543075, "grad_norm": 448.3601989746094, "learning_rate": 1.958060314924815e-06, "loss": 22.875, "step": 12650 }, { "epoch": 0.11975464071714581, "grad_norm": 2662.25048828125, "learning_rate": 1.958051528812108e-06, "loss": 21.4531, "step": 12651 }, { "epoch": 0.11976410673886086, "grad_norm": 471.4386291503906, "learning_rate": 1.958042741798895e-06, "loss": 36.2969, "step": 12652 }, { "epoch": 0.11977357276057592, "grad_norm": 313.09295654296875, "learning_rate": 1.958033953885184e-06, "loss": 44.0781, "step": 12653 }, { "epoch": 0.11978303878229096, "grad_norm": 271.98748779296875, "learning_rate": 1.9580251650709837e-06, "loss": 22.8867, "step": 12654 }, { "epoch": 0.11979250480400602, "grad_norm": 346.9518127441406, "learning_rate": 1.958016375356302e-06, "loss": 25.1562, "step": 12655 }, { "epoch": 0.11980197082572107, "grad_norm": 2.9797236919403076, "learning_rate": 1.9580075847411468e-06, "loss": 1.0059, "step": 12656 }, { "epoch": 0.11981143684743613, "grad_norm": 3.3108537197113037, "learning_rate": 1.9579987932255267e-06, "loss": 0.8875, "step": 12657 }, { "epoch": 0.11982090286915119, "grad_norm": 409.4576110839844, "learning_rate": 1.957990000809451e-06, "loss": 42.6875, "step": 12658 }, { "epoch": 0.11983036889086623, "grad_norm": 273.18621826171875, "learning_rate": 1.957981207492926e-06, "loss": 29.0938, "step": 12659 }, { "epoch": 0.1198398349125813, "grad_norm": 523.5150146484375, "learning_rate": 1.9579724132759616e-06, "loss": 48.1094, "step": 12660 }, { "epoch": 0.11984930093429634, "grad_norm": 195.8870086669922, "learning_rate": 1.957963618158565e-06, "loss": 23.6641, "step": 12661 }, { "epoch": 0.1198587669560114, "grad_norm": 3.6095056533813477, "learning_rate": 1.9579548221407455e-06, "loss": 0.8057, "step": 12662 }, { "epoch": 0.11986823297772645, "grad_norm": 285.04339599609375, "learning_rate": 1.9579460252225104e-06, "loss": 18.3047, "step": 12663 }, { "epoch": 0.1198776989994415, "grad_norm": 289.2338562011719, "learning_rate": 1.9579372274038687e-06, "loss": 20.9062, "step": 12664 }, { "epoch": 0.11988716502115655, "grad_norm": 348.8600769042969, "learning_rate": 1.957928428684828e-06, "loss": 27.8125, "step": 12665 }, { "epoch": 0.11989663104287161, "grad_norm": 374.7373352050781, "learning_rate": 1.957919629065397e-06, "loss": 39.875, "step": 12666 }, { "epoch": 0.11990609706458667, "grad_norm": 298.4096984863281, "learning_rate": 1.9579108285455835e-06, "loss": 19.2031, "step": 12667 }, { "epoch": 0.11991556308630172, "grad_norm": 364.70806884765625, "learning_rate": 1.9579020271253966e-06, "loss": 36.7969, "step": 12668 }, { "epoch": 0.11992502910801678, "grad_norm": 253.83250427246094, "learning_rate": 1.9578932248048443e-06, "loss": 9.5078, "step": 12669 }, { "epoch": 0.11993449512973182, "grad_norm": 694.76708984375, "learning_rate": 1.957884421583934e-06, "loss": 52.625, "step": 12670 }, { "epoch": 0.11994396115144688, "grad_norm": 259.13482666015625, "learning_rate": 1.957875617462675e-06, "loss": 25.1875, "step": 12671 }, { "epoch": 0.11995342717316193, "grad_norm": 706.8151245117188, "learning_rate": 1.9578668124410753e-06, "loss": 28.0078, "step": 12672 }, { "epoch": 0.11996289319487699, "grad_norm": 506.1554870605469, "learning_rate": 1.9578580065191434e-06, "loss": 26.3125, "step": 12673 }, { "epoch": 0.11997235921659204, "grad_norm": 355.3739929199219, "learning_rate": 1.9578491996968866e-06, "loss": 25.2188, "step": 12674 }, { "epoch": 0.1199818252383071, "grad_norm": 487.9864807128906, "learning_rate": 1.9578403919743143e-06, "loss": 37.9062, "step": 12675 }, { "epoch": 0.11999129126002216, "grad_norm": 226.1724090576172, "learning_rate": 1.9578315833514344e-06, "loss": 14.6992, "step": 12676 }, { "epoch": 0.1200007572817372, "grad_norm": 528.4386596679688, "learning_rate": 1.9578227738282547e-06, "loss": 41.0938, "step": 12677 }, { "epoch": 0.12001022330345226, "grad_norm": 229.46368408203125, "learning_rate": 1.9578139634047844e-06, "loss": 25.4453, "step": 12678 }, { "epoch": 0.12001968932516731, "grad_norm": 2.8634090423583984, "learning_rate": 1.957805152081031e-06, "loss": 0.8379, "step": 12679 }, { "epoch": 0.12002915534688237, "grad_norm": 290.8581848144531, "learning_rate": 1.957796339857003e-06, "loss": 19.8516, "step": 12680 }, { "epoch": 0.12003862136859741, "grad_norm": 363.6197204589844, "learning_rate": 1.957787526732709e-06, "loss": 25.3906, "step": 12681 }, { "epoch": 0.12004808739031247, "grad_norm": 784.3346557617188, "learning_rate": 1.9577787127081572e-06, "loss": 56.1719, "step": 12682 }, { "epoch": 0.12005755341202752, "grad_norm": 191.24057006835938, "learning_rate": 1.9577698977833552e-06, "loss": 14.2578, "step": 12683 }, { "epoch": 0.12006701943374258, "grad_norm": 358.1613464355469, "learning_rate": 1.957761081958312e-06, "loss": 50.0469, "step": 12684 }, { "epoch": 0.12007648545545764, "grad_norm": 291.3686218261719, "learning_rate": 1.9577522652330358e-06, "loss": 20.1172, "step": 12685 }, { "epoch": 0.12008595147717269, "grad_norm": 388.2040710449219, "learning_rate": 1.957743447607535e-06, "loss": 38.9844, "step": 12686 }, { "epoch": 0.12009541749888775, "grad_norm": 337.4063415527344, "learning_rate": 1.957734629081817e-06, "loss": 44.8281, "step": 12687 }, { "epoch": 0.12010488352060279, "grad_norm": 474.8493347167969, "learning_rate": 1.957725809655891e-06, "loss": 39.8125, "step": 12688 }, { "epoch": 0.12011434954231785, "grad_norm": 549.4529418945312, "learning_rate": 1.957716989329765e-06, "loss": 32.8906, "step": 12689 }, { "epoch": 0.1201238155640329, "grad_norm": 389.65789794921875, "learning_rate": 1.9577081681034476e-06, "loss": 23.2812, "step": 12690 }, { "epoch": 0.12013328158574796, "grad_norm": 242.8768310546875, "learning_rate": 1.957699345976947e-06, "loss": 17.0195, "step": 12691 }, { "epoch": 0.120142747607463, "grad_norm": 370.0252685546875, "learning_rate": 1.957690522950271e-06, "loss": 15.9648, "step": 12692 }, { "epoch": 0.12015221362917806, "grad_norm": 3.5389516353607178, "learning_rate": 1.957681699023428e-06, "loss": 1.0073, "step": 12693 }, { "epoch": 0.12016167965089312, "grad_norm": 358.8053283691406, "learning_rate": 1.957672874196427e-06, "loss": 9.1309, "step": 12694 }, { "epoch": 0.12017114567260817, "grad_norm": 404.6819152832031, "learning_rate": 1.9576640484692757e-06, "loss": 15.2617, "step": 12695 }, { "epoch": 0.12018061169432323, "grad_norm": 452.05731201171875, "learning_rate": 1.9576552218419825e-06, "loss": 34.1719, "step": 12696 }, { "epoch": 0.12019007771603828, "grad_norm": 279.2704772949219, "learning_rate": 1.957646394314555e-06, "loss": 19.5938, "step": 12697 }, { "epoch": 0.12019954373775334, "grad_norm": 174.36550903320312, "learning_rate": 1.957637565887003e-06, "loss": 17.4766, "step": 12698 }, { "epoch": 0.12020900975946838, "grad_norm": 234.8516387939453, "learning_rate": 1.9576287365593337e-06, "loss": 19.4023, "step": 12699 }, { "epoch": 0.12021847578118344, "grad_norm": 258.3994140625, "learning_rate": 1.957619906331556e-06, "loss": 17.3125, "step": 12700 }, { "epoch": 0.1202279418028985, "grad_norm": 446.39788818359375, "learning_rate": 1.9576110752036773e-06, "loss": 12.6953, "step": 12701 }, { "epoch": 0.12023740782461355, "grad_norm": 332.29638671875, "learning_rate": 1.9576022431757067e-06, "loss": 39.6875, "step": 12702 }, { "epoch": 0.12024687384632861, "grad_norm": 585.9398193359375, "learning_rate": 1.9575934102476522e-06, "loss": 28.3203, "step": 12703 }, { "epoch": 0.12025633986804365, "grad_norm": 1278.21923828125, "learning_rate": 1.9575845764195224e-06, "loss": 65.4688, "step": 12704 }, { "epoch": 0.12026580588975871, "grad_norm": 272.8926696777344, "learning_rate": 1.9575757416913255e-06, "loss": 26.8125, "step": 12705 }, { "epoch": 0.12027527191147376, "grad_norm": 232.4720001220703, "learning_rate": 1.9575669060630698e-06, "loss": 23.4492, "step": 12706 }, { "epoch": 0.12028473793318882, "grad_norm": 2.6663765907287598, "learning_rate": 1.957558069534763e-06, "loss": 0.8984, "step": 12707 }, { "epoch": 0.12029420395490387, "grad_norm": 316.2236633300781, "learning_rate": 1.957549232106414e-06, "loss": 18.9766, "step": 12708 }, { "epoch": 0.12030366997661893, "grad_norm": 420.1238708496094, "learning_rate": 1.9575403937780313e-06, "loss": 37.8281, "step": 12709 }, { "epoch": 0.12031313599833399, "grad_norm": 402.8753356933594, "learning_rate": 1.957531554549623e-06, "loss": 37.5625, "step": 12710 }, { "epoch": 0.12032260202004903, "grad_norm": 3.206618070602417, "learning_rate": 1.9575227144211968e-06, "loss": 0.9663, "step": 12711 }, { "epoch": 0.12033206804176409, "grad_norm": 869.1317749023438, "learning_rate": 1.957513873392762e-06, "loss": 37.9297, "step": 12712 }, { "epoch": 0.12034153406347914, "grad_norm": 3.8954946994781494, "learning_rate": 1.9575050314643266e-06, "loss": 0.981, "step": 12713 }, { "epoch": 0.1203510000851942, "grad_norm": 336.65765380859375, "learning_rate": 1.957496188635898e-06, "loss": 20.3516, "step": 12714 }, { "epoch": 0.12036046610690924, "grad_norm": 320.8485107421875, "learning_rate": 1.957487344907486e-06, "loss": 20.9062, "step": 12715 }, { "epoch": 0.1203699321286243, "grad_norm": 960.0255737304688, "learning_rate": 1.957478500279098e-06, "loss": 38.6562, "step": 12716 }, { "epoch": 0.12037939815033935, "grad_norm": 579.8353271484375, "learning_rate": 1.9574696547507424e-06, "loss": 10.9961, "step": 12717 }, { "epoch": 0.12038886417205441, "grad_norm": 420.3601379394531, "learning_rate": 1.9574608083224274e-06, "loss": 26.3438, "step": 12718 }, { "epoch": 0.12039833019376947, "grad_norm": 466.46856689453125, "learning_rate": 1.957451960994162e-06, "loss": 31.75, "step": 12719 }, { "epoch": 0.12040779621548452, "grad_norm": 3.0027525424957275, "learning_rate": 1.957443112765954e-06, "loss": 0.8657, "step": 12720 }, { "epoch": 0.12041726223719958, "grad_norm": 573.90283203125, "learning_rate": 1.957434263637811e-06, "loss": 71.4688, "step": 12721 }, { "epoch": 0.12042672825891462, "grad_norm": 311.4107666015625, "learning_rate": 1.957425413609743e-06, "loss": 28.0469, "step": 12722 }, { "epoch": 0.12043619428062968, "grad_norm": 573.556884765625, "learning_rate": 1.9574165626817567e-06, "loss": 43.3125, "step": 12723 }, { "epoch": 0.12044566030234473, "grad_norm": 286.65057373046875, "learning_rate": 1.9574077108538616e-06, "loss": 15.7891, "step": 12724 }, { "epoch": 0.12045512632405979, "grad_norm": 368.9739990234375, "learning_rate": 1.9573988581260656e-06, "loss": 15.9375, "step": 12725 }, { "epoch": 0.12046459234577483, "grad_norm": 622.53662109375, "learning_rate": 1.9573900044983767e-06, "loss": 45.1094, "step": 12726 }, { "epoch": 0.1204740583674899, "grad_norm": 212.1096649169922, "learning_rate": 1.9573811499708033e-06, "loss": 12.4219, "step": 12727 }, { "epoch": 0.12048352438920495, "grad_norm": 275.3179931640625, "learning_rate": 1.9573722945433545e-06, "loss": 30.9688, "step": 12728 }, { "epoch": 0.12049299041092, "grad_norm": 344.63623046875, "learning_rate": 1.9573634382160377e-06, "loss": 24.7891, "step": 12729 }, { "epoch": 0.12050245643263506, "grad_norm": 361.9876403808594, "learning_rate": 1.957354580988861e-06, "loss": 35.5781, "step": 12730 }, { "epoch": 0.1205119224543501, "grad_norm": 235.75445556640625, "learning_rate": 1.957345722861834e-06, "loss": 20.0469, "step": 12731 }, { "epoch": 0.12052138847606517, "grad_norm": 211.69769287109375, "learning_rate": 1.9573368638349643e-06, "loss": 17.1055, "step": 12732 }, { "epoch": 0.12053085449778021, "grad_norm": 544.2467651367188, "learning_rate": 1.95732800390826e-06, "loss": 55.875, "step": 12733 }, { "epoch": 0.12054032051949527, "grad_norm": 413.5046081542969, "learning_rate": 1.9573191430817293e-06, "loss": 25.9688, "step": 12734 }, { "epoch": 0.12054978654121032, "grad_norm": 344.73724365234375, "learning_rate": 1.9573102813553814e-06, "loss": 20.9453, "step": 12735 }, { "epoch": 0.12055925256292538, "grad_norm": 647.0028076171875, "learning_rate": 1.957301418729224e-06, "loss": 23.5156, "step": 12736 }, { "epoch": 0.12056871858464044, "grad_norm": 336.85797119140625, "learning_rate": 1.957292555203265e-06, "loss": 20.5273, "step": 12737 }, { "epoch": 0.12057818460635548, "grad_norm": 260.3353576660156, "learning_rate": 1.957283690777514e-06, "loss": 26.8594, "step": 12738 }, { "epoch": 0.12058765062807054, "grad_norm": 480.9446105957031, "learning_rate": 1.957274825451979e-06, "loss": 33.8438, "step": 12739 }, { "epoch": 0.12059711664978559, "grad_norm": 549.4722900390625, "learning_rate": 1.957265959226667e-06, "loss": 36.5469, "step": 12740 }, { "epoch": 0.12060658267150065, "grad_norm": 523.4688720703125, "learning_rate": 1.9572570921015873e-06, "loss": 44.25, "step": 12741 }, { "epoch": 0.1206160486932157, "grad_norm": 334.98406982421875, "learning_rate": 1.9572482240767485e-06, "loss": 34.9141, "step": 12742 }, { "epoch": 0.12062551471493076, "grad_norm": 372.3293151855469, "learning_rate": 1.9572393551521587e-06, "loss": 24.9375, "step": 12743 }, { "epoch": 0.12063498073664582, "grad_norm": 219.6245880126953, "learning_rate": 1.9572304853278263e-06, "loss": 30.75, "step": 12744 }, { "epoch": 0.12064444675836086, "grad_norm": 290.9974365234375, "learning_rate": 1.957221614603759e-06, "loss": 21.3828, "step": 12745 }, { "epoch": 0.12065391278007592, "grad_norm": 299.5206604003906, "learning_rate": 1.9572127429799662e-06, "loss": 23.2656, "step": 12746 }, { "epoch": 0.12066337880179097, "grad_norm": 253.7771759033203, "learning_rate": 1.9572038704564554e-06, "loss": 16.8438, "step": 12747 }, { "epoch": 0.12067284482350603, "grad_norm": 2.573437452316284, "learning_rate": 1.9571949970332355e-06, "loss": 0.9023, "step": 12748 }, { "epoch": 0.12068231084522107, "grad_norm": 354.5247497558594, "learning_rate": 1.9571861227103146e-06, "loss": 26.8516, "step": 12749 }, { "epoch": 0.12069177686693613, "grad_norm": 230.3568572998047, "learning_rate": 1.9571772474877e-06, "loss": 22.6719, "step": 12750 }, { "epoch": 0.12070124288865118, "grad_norm": 422.4319152832031, "learning_rate": 1.957168371365402e-06, "loss": 43.5312, "step": 12751 }, { "epoch": 0.12071070891036624, "grad_norm": 816.5394287109375, "learning_rate": 1.957159494343428e-06, "loss": 61.8906, "step": 12752 }, { "epoch": 0.1207201749320813, "grad_norm": 222.64309692382812, "learning_rate": 1.9571506164217865e-06, "loss": 26.75, "step": 12753 }, { "epoch": 0.12072964095379635, "grad_norm": 579.9384765625, "learning_rate": 1.957141737600485e-06, "loss": 56.1016, "step": 12754 }, { "epoch": 0.1207391069755114, "grad_norm": 368.86871337890625, "learning_rate": 1.957132857879533e-06, "loss": 26.0078, "step": 12755 }, { "epoch": 0.12074857299722645, "grad_norm": 577.5422973632812, "learning_rate": 1.9571239772589383e-06, "loss": 54.1406, "step": 12756 }, { "epoch": 0.12075803901894151, "grad_norm": 203.0275115966797, "learning_rate": 1.9571150957387094e-06, "loss": 20.5977, "step": 12757 }, { "epoch": 0.12076750504065656, "grad_norm": 610.0105590820312, "learning_rate": 1.9571062133188544e-06, "loss": 57.5781, "step": 12758 }, { "epoch": 0.12077697106237162, "grad_norm": 2.9349350929260254, "learning_rate": 1.9570973299993822e-06, "loss": 0.998, "step": 12759 }, { "epoch": 0.12078643708408666, "grad_norm": 479.77056884765625, "learning_rate": 1.9570884457803005e-06, "loss": 36.3906, "step": 12760 }, { "epoch": 0.12079590310580172, "grad_norm": 343.5157470703125, "learning_rate": 1.9570795606616177e-06, "loss": 28.4922, "step": 12761 }, { "epoch": 0.12080536912751678, "grad_norm": 1119.35986328125, "learning_rate": 1.9570706746433427e-06, "loss": 18.9375, "step": 12762 }, { "epoch": 0.12081483514923183, "grad_norm": 2.911341428756714, "learning_rate": 1.9570617877254836e-06, "loss": 0.9526, "step": 12763 }, { "epoch": 0.12082430117094689, "grad_norm": 238.97930908203125, "learning_rate": 1.957052899908048e-06, "loss": 15.5156, "step": 12764 }, { "epoch": 0.12083376719266194, "grad_norm": 294.512939453125, "learning_rate": 1.9570440111910455e-06, "loss": 15.9844, "step": 12765 }, { "epoch": 0.120843233214377, "grad_norm": 798.186279296875, "learning_rate": 1.957035121574484e-06, "loss": 49.2969, "step": 12766 }, { "epoch": 0.12085269923609204, "grad_norm": 642.0094604492188, "learning_rate": 1.957026231058371e-06, "loss": 32.7812, "step": 12767 }, { "epoch": 0.1208621652578071, "grad_norm": 646.843994140625, "learning_rate": 1.9570173396427167e-06, "loss": 31.9375, "step": 12768 }, { "epoch": 0.12087163127952215, "grad_norm": 226.69871520996094, "learning_rate": 1.957008447327528e-06, "loss": 19.2188, "step": 12769 }, { "epoch": 0.12088109730123721, "grad_norm": 390.6223449707031, "learning_rate": 1.956999554112813e-06, "loss": 31.9062, "step": 12770 }, { "epoch": 0.12089056332295227, "grad_norm": 275.75982666015625, "learning_rate": 1.9569906599985813e-06, "loss": 21.5234, "step": 12771 }, { "epoch": 0.12090002934466731, "grad_norm": 269.9547119140625, "learning_rate": 1.9569817649848405e-06, "loss": 18.9766, "step": 12772 }, { "epoch": 0.12090949536638237, "grad_norm": 298.5823669433594, "learning_rate": 1.956972869071599e-06, "loss": 29.8125, "step": 12773 }, { "epoch": 0.12091896138809742, "grad_norm": 589.9330444335938, "learning_rate": 1.9569639722588654e-06, "loss": 42.0508, "step": 12774 }, { "epoch": 0.12092842740981248, "grad_norm": 574.8154907226562, "learning_rate": 1.9569550745466475e-06, "loss": 38.3516, "step": 12775 }, { "epoch": 0.12093789343152753, "grad_norm": 389.780517578125, "learning_rate": 1.9569461759349547e-06, "loss": 38.3594, "step": 12776 }, { "epoch": 0.12094735945324259, "grad_norm": 488.4895324707031, "learning_rate": 1.9569372764237943e-06, "loss": 43.0391, "step": 12777 }, { "epoch": 0.12095682547495763, "grad_norm": 1126.3643798828125, "learning_rate": 1.9569283760131755e-06, "loss": 43.5117, "step": 12778 }, { "epoch": 0.12096629149667269, "grad_norm": 1135.9547119140625, "learning_rate": 1.956919474703106e-06, "loss": 24.5312, "step": 12779 }, { "epoch": 0.12097575751838775, "grad_norm": 462.3778991699219, "learning_rate": 1.9569105724935946e-06, "loss": 36.375, "step": 12780 }, { "epoch": 0.1209852235401028, "grad_norm": 382.5169372558594, "learning_rate": 1.9569016693846494e-06, "loss": 22.5781, "step": 12781 }, { "epoch": 0.12099468956181786, "grad_norm": 254.76707458496094, "learning_rate": 1.9568927653762785e-06, "loss": 20.1875, "step": 12782 }, { "epoch": 0.1210041555835329, "grad_norm": 379.78948974609375, "learning_rate": 1.9568838604684916e-06, "loss": 23.6367, "step": 12783 }, { "epoch": 0.12101362160524796, "grad_norm": 540.61669921875, "learning_rate": 1.9568749546612954e-06, "loss": 29.4219, "step": 12784 }, { "epoch": 0.12102308762696301, "grad_norm": 337.7766418457031, "learning_rate": 1.956866047954699e-06, "loss": 23.3281, "step": 12785 }, { "epoch": 0.12103255364867807, "grad_norm": 250.90341186523438, "learning_rate": 1.956857140348711e-06, "loss": 25.7578, "step": 12786 }, { "epoch": 0.12104201967039313, "grad_norm": 385.5437316894531, "learning_rate": 1.9568482318433397e-06, "loss": 22.6797, "step": 12787 }, { "epoch": 0.12105148569210818, "grad_norm": 374.1923522949219, "learning_rate": 1.9568393224385926e-06, "loss": 26.6094, "step": 12788 }, { "epoch": 0.12106095171382324, "grad_norm": 254.6243896484375, "learning_rate": 1.9568304121344794e-06, "loss": 17.5547, "step": 12789 }, { "epoch": 0.12107041773553828, "grad_norm": 423.20220947265625, "learning_rate": 1.9568215009310075e-06, "loss": 16.8086, "step": 12790 }, { "epoch": 0.12107988375725334, "grad_norm": 445.0322265625, "learning_rate": 1.956812588828186e-06, "loss": 41.3125, "step": 12791 }, { "epoch": 0.12108934977896839, "grad_norm": 525.0223388671875, "learning_rate": 1.9568036758260227e-06, "loss": 39.1094, "step": 12792 }, { "epoch": 0.12109881580068345, "grad_norm": 259.3801574707031, "learning_rate": 1.9567947619245263e-06, "loss": 16.0078, "step": 12793 }, { "epoch": 0.1211082818223985, "grad_norm": 491.8171081542969, "learning_rate": 1.956785847123705e-06, "loss": 30.9375, "step": 12794 }, { "epoch": 0.12111774784411355, "grad_norm": 2.9948995113372803, "learning_rate": 1.956776931423567e-06, "loss": 0.9121, "step": 12795 }, { "epoch": 0.12112721386582861, "grad_norm": 465.5750427246094, "learning_rate": 1.9567680148241214e-06, "loss": 52.0938, "step": 12796 }, { "epoch": 0.12113667988754366, "grad_norm": 418.5020446777344, "learning_rate": 1.956759097325376e-06, "loss": 49.6094, "step": 12797 }, { "epoch": 0.12114614590925872, "grad_norm": 344.0052795410156, "learning_rate": 1.9567501789273392e-06, "loss": 18.7188, "step": 12798 }, { "epoch": 0.12115561193097377, "grad_norm": 3.202160596847534, "learning_rate": 1.9567412596300194e-06, "loss": 1.0024, "step": 12799 }, { "epoch": 0.12116507795268883, "grad_norm": 429.97845458984375, "learning_rate": 1.956732339433425e-06, "loss": 25.0156, "step": 12800 }, { "epoch": 0.12117454397440387, "grad_norm": 432.9606628417969, "learning_rate": 1.9567234183375645e-06, "loss": 32.7266, "step": 12801 }, { "epoch": 0.12118400999611893, "grad_norm": 444.66912841796875, "learning_rate": 1.9567144963424464e-06, "loss": 18.6484, "step": 12802 }, { "epoch": 0.12119347601783398, "grad_norm": 384.7479553222656, "learning_rate": 1.9567055734480788e-06, "loss": 20.4453, "step": 12803 }, { "epoch": 0.12120294203954904, "grad_norm": 555.91455078125, "learning_rate": 1.95669664965447e-06, "loss": 23.0547, "step": 12804 }, { "epoch": 0.1212124080612641, "grad_norm": 343.0887756347656, "learning_rate": 1.956687724961629e-06, "loss": 41.0469, "step": 12805 }, { "epoch": 0.12122187408297914, "grad_norm": 394.61810302734375, "learning_rate": 1.9566787993695636e-06, "loss": 43.625, "step": 12806 }, { "epoch": 0.1212313401046942, "grad_norm": 346.7062683105469, "learning_rate": 1.9566698728782823e-06, "loss": 20.7891, "step": 12807 }, { "epoch": 0.12124080612640925, "grad_norm": 371.4486083984375, "learning_rate": 1.9566609454877935e-06, "loss": 37.0938, "step": 12808 }, { "epoch": 0.12125027214812431, "grad_norm": 614.7605590820312, "learning_rate": 1.9566520171981055e-06, "loss": 22.8984, "step": 12809 }, { "epoch": 0.12125973816983936, "grad_norm": 292.063720703125, "learning_rate": 1.9566430880092272e-06, "loss": 26.3281, "step": 12810 }, { "epoch": 0.12126920419155442, "grad_norm": 221.84396362304688, "learning_rate": 1.9566341579211663e-06, "loss": 25.5, "step": 12811 }, { "epoch": 0.12127867021326946, "grad_norm": 3.020693063735962, "learning_rate": 1.956625226933932e-06, "loss": 0.9543, "step": 12812 }, { "epoch": 0.12128813623498452, "grad_norm": 859.072265625, "learning_rate": 1.9566162950475316e-06, "loss": 53.1328, "step": 12813 }, { "epoch": 0.12129760225669958, "grad_norm": 386.50274658203125, "learning_rate": 1.9566073622619744e-06, "loss": 35.5078, "step": 12814 }, { "epoch": 0.12130706827841463, "grad_norm": 188.4462127685547, "learning_rate": 1.956598428577269e-06, "loss": 19.3828, "step": 12815 }, { "epoch": 0.12131653430012969, "grad_norm": 301.5638427734375, "learning_rate": 1.9565894939934226e-06, "loss": 20.0234, "step": 12816 }, { "epoch": 0.12132600032184473, "grad_norm": 954.239013671875, "learning_rate": 1.9565805585104445e-06, "loss": 49.7656, "step": 12817 }, { "epoch": 0.1213354663435598, "grad_norm": 197.0551300048828, "learning_rate": 1.956571622128343e-06, "loss": 23.1094, "step": 12818 }, { "epoch": 0.12134493236527484, "grad_norm": 736.450439453125, "learning_rate": 1.9565626848471263e-06, "loss": 60.1875, "step": 12819 }, { "epoch": 0.1213543983869899, "grad_norm": 302.23455810546875, "learning_rate": 1.956553746666803e-06, "loss": 24.1328, "step": 12820 }, { "epoch": 0.12136386440870495, "grad_norm": 513.8224487304688, "learning_rate": 1.956544807587381e-06, "loss": 30.5469, "step": 12821 }, { "epoch": 0.12137333043042, "grad_norm": 593.11474609375, "learning_rate": 1.9565358676088697e-06, "loss": 21.8438, "step": 12822 }, { "epoch": 0.12138279645213507, "grad_norm": 283.11285400390625, "learning_rate": 1.956526926731277e-06, "loss": 23.0, "step": 12823 }, { "epoch": 0.12139226247385011, "grad_norm": 254.65650939941406, "learning_rate": 1.956517984954611e-06, "loss": 16.1719, "step": 12824 }, { "epoch": 0.12140172849556517, "grad_norm": 172.94398498535156, "learning_rate": 1.9565090422788803e-06, "loss": 20.6562, "step": 12825 }, { "epoch": 0.12141119451728022, "grad_norm": 347.8392028808594, "learning_rate": 1.956500098704093e-06, "loss": 39.5938, "step": 12826 }, { "epoch": 0.12142066053899528, "grad_norm": 464.4562072753906, "learning_rate": 1.956491154230258e-06, "loss": 51.8906, "step": 12827 }, { "epoch": 0.12143012656071032, "grad_norm": 640.032958984375, "learning_rate": 1.9564822088573838e-06, "loss": 46.1016, "step": 12828 }, { "epoch": 0.12143959258242538, "grad_norm": 419.6860656738281, "learning_rate": 1.9564732625854785e-06, "loss": 49.1641, "step": 12829 }, { "epoch": 0.12144905860414044, "grad_norm": 615.396240234375, "learning_rate": 1.9564643154145503e-06, "loss": 14.3516, "step": 12830 }, { "epoch": 0.12145852462585549, "grad_norm": 421.32958984375, "learning_rate": 1.956455367344608e-06, "loss": 41.5781, "step": 12831 }, { "epoch": 0.12146799064757055, "grad_norm": 335.291748046875, "learning_rate": 1.95644641837566e-06, "loss": 21.8281, "step": 12832 }, { "epoch": 0.1214774566692856, "grad_norm": 2.9722869396209717, "learning_rate": 1.9564374685077145e-06, "loss": 0.8809, "step": 12833 }, { "epoch": 0.12148692269100066, "grad_norm": 194.58734130859375, "learning_rate": 1.95642851774078e-06, "loss": 21.5352, "step": 12834 }, { "epoch": 0.1214963887127157, "grad_norm": 348.08148193359375, "learning_rate": 1.9564195660748646e-06, "loss": 28.9297, "step": 12835 }, { "epoch": 0.12150585473443076, "grad_norm": 357.331787109375, "learning_rate": 1.9564106135099772e-06, "loss": 33.5938, "step": 12836 }, { "epoch": 0.12151532075614581, "grad_norm": 495.7537536621094, "learning_rate": 1.9564016600461266e-06, "loss": 26.0156, "step": 12837 }, { "epoch": 0.12152478677786087, "grad_norm": 627.365234375, "learning_rate": 1.95639270568332e-06, "loss": 44.0312, "step": 12838 }, { "epoch": 0.12153425279957593, "grad_norm": 794.3563232421875, "learning_rate": 1.9563837504215668e-06, "loss": 64.625, "step": 12839 }, { "epoch": 0.12154371882129097, "grad_norm": 357.7215576171875, "learning_rate": 1.956374794260875e-06, "loss": 22.8125, "step": 12840 }, { "epoch": 0.12155318484300603, "grad_norm": 464.4410095214844, "learning_rate": 1.956365837201253e-06, "loss": 56.5938, "step": 12841 }, { "epoch": 0.12156265086472108, "grad_norm": 288.0769348144531, "learning_rate": 1.9563568792427092e-06, "loss": 22.4688, "step": 12842 }, { "epoch": 0.12157211688643614, "grad_norm": 259.2090759277344, "learning_rate": 1.9563479203852526e-06, "loss": 27.4688, "step": 12843 }, { "epoch": 0.12158158290815119, "grad_norm": 375.5632019042969, "learning_rate": 1.9563389606288907e-06, "loss": 25.6406, "step": 12844 }, { "epoch": 0.12159104892986625, "grad_norm": 213.68679809570312, "learning_rate": 1.9563299999736325e-06, "loss": 18.2734, "step": 12845 }, { "epoch": 0.12160051495158129, "grad_norm": 296.1316223144531, "learning_rate": 1.9563210384194864e-06, "loss": 16.7188, "step": 12846 }, { "epoch": 0.12160998097329635, "grad_norm": 361.642578125, "learning_rate": 1.9563120759664613e-06, "loss": 14.6055, "step": 12847 }, { "epoch": 0.12161944699501141, "grad_norm": 443.8876647949219, "learning_rate": 1.956303112614564e-06, "loss": 39.4844, "step": 12848 }, { "epoch": 0.12162891301672646, "grad_norm": 195.14927673339844, "learning_rate": 1.9562941483638046e-06, "loss": 18.9688, "step": 12849 }, { "epoch": 0.12163837903844152, "grad_norm": 411.8863525390625, "learning_rate": 1.9562851832141913e-06, "loss": 30.6875, "step": 12850 }, { "epoch": 0.12164784506015656, "grad_norm": 398.8284912109375, "learning_rate": 1.9562762171657316e-06, "loss": 23.3281, "step": 12851 }, { "epoch": 0.12165731108187162, "grad_norm": 316.56463623046875, "learning_rate": 1.9562672502184343e-06, "loss": 28.7891, "step": 12852 }, { "epoch": 0.12166677710358667, "grad_norm": 282.7109375, "learning_rate": 1.956258282372308e-06, "loss": 16.25, "step": 12853 }, { "epoch": 0.12167624312530173, "grad_norm": 330.50115966796875, "learning_rate": 1.9562493136273614e-06, "loss": 23.0547, "step": 12854 }, { "epoch": 0.12168570914701678, "grad_norm": 179.54234313964844, "learning_rate": 1.9562403439836025e-06, "loss": 21.4297, "step": 12855 }, { "epoch": 0.12169517516873184, "grad_norm": 338.7828063964844, "learning_rate": 1.95623137344104e-06, "loss": 26.2812, "step": 12856 }, { "epoch": 0.1217046411904469, "grad_norm": 654.740478515625, "learning_rate": 1.956222401999682e-06, "loss": 56.4375, "step": 12857 }, { "epoch": 0.12171410721216194, "grad_norm": 935.6858520507812, "learning_rate": 1.9562134296595375e-06, "loss": 30.4531, "step": 12858 }, { "epoch": 0.121723573233877, "grad_norm": 606.5556030273438, "learning_rate": 1.9562044564206146e-06, "loss": 33.4531, "step": 12859 }, { "epoch": 0.12173303925559205, "grad_norm": 245.40435791015625, "learning_rate": 1.9561954822829216e-06, "loss": 11.9844, "step": 12860 }, { "epoch": 0.12174250527730711, "grad_norm": 227.13917541503906, "learning_rate": 1.956186507246467e-06, "loss": 20.1328, "step": 12861 }, { "epoch": 0.12175197129902215, "grad_norm": 280.1566467285156, "learning_rate": 1.956177531311259e-06, "loss": 24.2656, "step": 12862 }, { "epoch": 0.12176143732073721, "grad_norm": 447.95849609375, "learning_rate": 1.956168554477307e-06, "loss": 36.6094, "step": 12863 }, { "epoch": 0.12177090334245226, "grad_norm": 231.15951538085938, "learning_rate": 1.956159576744618e-06, "loss": 20.25, "step": 12864 }, { "epoch": 0.12178036936416732, "grad_norm": 248.46237182617188, "learning_rate": 1.956150598113202e-06, "loss": 17.7266, "step": 12865 }, { "epoch": 0.12178983538588238, "grad_norm": 470.26422119140625, "learning_rate": 1.9561416185830663e-06, "loss": 17.9297, "step": 12866 }, { "epoch": 0.12179930140759743, "grad_norm": 3.353729009628296, "learning_rate": 1.9561326381542195e-06, "loss": 0.876, "step": 12867 }, { "epoch": 0.12180876742931249, "grad_norm": 648.5407104492188, "learning_rate": 1.9561236568266703e-06, "loss": 33.9375, "step": 12868 }, { "epoch": 0.12181823345102753, "grad_norm": 381.4530944824219, "learning_rate": 1.956114674600427e-06, "loss": 33.7344, "step": 12869 }, { "epoch": 0.12182769947274259, "grad_norm": 300.2816467285156, "learning_rate": 1.9561056914754982e-06, "loss": 22.7969, "step": 12870 }, { "epoch": 0.12183716549445764, "grad_norm": 568.40283203125, "learning_rate": 1.9560967074518923e-06, "loss": 25.75, "step": 12871 }, { "epoch": 0.1218466315161727, "grad_norm": 264.9549255371094, "learning_rate": 1.9560877225296174e-06, "loss": 19.3672, "step": 12872 }, { "epoch": 0.12185609753788776, "grad_norm": 338.7403869628906, "learning_rate": 1.9560787367086826e-06, "loss": 34.8281, "step": 12873 }, { "epoch": 0.1218655635596028, "grad_norm": 562.49560546875, "learning_rate": 1.956069749989096e-06, "loss": 38.6328, "step": 12874 }, { "epoch": 0.12187502958131786, "grad_norm": 297.9535217285156, "learning_rate": 1.9560607623708657e-06, "loss": 26.6875, "step": 12875 }, { "epoch": 0.12188449560303291, "grad_norm": 253.1693115234375, "learning_rate": 1.9560517738540007e-06, "loss": 24.2578, "step": 12876 }, { "epoch": 0.12189396162474797, "grad_norm": 633.6488647460938, "learning_rate": 1.956042784438509e-06, "loss": 22.1328, "step": 12877 }, { "epoch": 0.12190342764646302, "grad_norm": 666.5842895507812, "learning_rate": 1.9560337941243995e-06, "loss": 29.9688, "step": 12878 }, { "epoch": 0.12191289366817808, "grad_norm": 378.84356689453125, "learning_rate": 1.9560248029116806e-06, "loss": 23.2969, "step": 12879 }, { "epoch": 0.12192235968989312, "grad_norm": 422.3988037109375, "learning_rate": 1.9560158108003605e-06, "loss": 9.3516, "step": 12880 }, { "epoch": 0.12193182571160818, "grad_norm": 210.885498046875, "learning_rate": 1.9560068177904474e-06, "loss": 24.0156, "step": 12881 }, { "epoch": 0.12194129173332324, "grad_norm": 200.56570434570312, "learning_rate": 1.95599782388195e-06, "loss": 15.5, "step": 12882 }, { "epoch": 0.12195075775503829, "grad_norm": 286.90399169921875, "learning_rate": 1.9559888290748773e-06, "loss": 12.0117, "step": 12883 }, { "epoch": 0.12196022377675335, "grad_norm": 3.1024329662323, "learning_rate": 1.9559798333692373e-06, "loss": 0.9419, "step": 12884 }, { "epoch": 0.1219696897984684, "grad_norm": 196.38291931152344, "learning_rate": 1.955970836765038e-06, "loss": 13.1953, "step": 12885 }, { "epoch": 0.12197915582018345, "grad_norm": 917.9822387695312, "learning_rate": 1.9559618392622886e-06, "loss": 42.7148, "step": 12886 }, { "epoch": 0.1219886218418985, "grad_norm": 874.3707275390625, "learning_rate": 1.955952840860997e-06, "loss": 25.4141, "step": 12887 }, { "epoch": 0.12199808786361356, "grad_norm": 497.7139587402344, "learning_rate": 1.9559438415611723e-06, "loss": 21.4844, "step": 12888 }, { "epoch": 0.1220075538853286, "grad_norm": 743.5887451171875, "learning_rate": 1.9559348413628226e-06, "loss": 42.5078, "step": 12889 }, { "epoch": 0.12201701990704367, "grad_norm": 734.49560546875, "learning_rate": 1.955925840265956e-06, "loss": 22.875, "step": 12890 }, { "epoch": 0.12202648592875873, "grad_norm": 177.7028045654297, "learning_rate": 1.9559168382705814e-06, "loss": 22.6797, "step": 12891 }, { "epoch": 0.12203595195047377, "grad_norm": 338.0777587890625, "learning_rate": 1.9559078353767072e-06, "loss": 12.0898, "step": 12892 }, { "epoch": 0.12204541797218883, "grad_norm": 305.81951904296875, "learning_rate": 1.955898831584342e-06, "loss": 23.9453, "step": 12893 }, { "epoch": 0.12205488399390388, "grad_norm": 443.8160095214844, "learning_rate": 1.955889826893494e-06, "loss": 26.6016, "step": 12894 }, { "epoch": 0.12206435001561894, "grad_norm": 383.26995849609375, "learning_rate": 1.9558808213041716e-06, "loss": 24.4609, "step": 12895 }, { "epoch": 0.12207381603733398, "grad_norm": 389.7901916503906, "learning_rate": 1.9558718148163833e-06, "loss": 20.8555, "step": 12896 }, { "epoch": 0.12208328205904904, "grad_norm": 261.6724853515625, "learning_rate": 1.955862807430138e-06, "loss": 25.5938, "step": 12897 }, { "epoch": 0.12209274808076409, "grad_norm": 1533.863525390625, "learning_rate": 1.9558537991454436e-06, "loss": 70.5156, "step": 12898 }, { "epoch": 0.12210221410247915, "grad_norm": 302.7637939453125, "learning_rate": 1.9558447899623088e-06, "loss": 24.6797, "step": 12899 }, { "epoch": 0.12211168012419421, "grad_norm": 3.008528709411621, "learning_rate": 1.955835779880742e-06, "loss": 0.834, "step": 12900 }, { "epoch": 0.12212114614590926, "grad_norm": 514.1102905273438, "learning_rate": 1.9558267689007523e-06, "loss": 52.625, "step": 12901 }, { "epoch": 0.12213061216762432, "grad_norm": 297.7421569824219, "learning_rate": 1.955817757022347e-06, "loss": 33.0469, "step": 12902 }, { "epoch": 0.12214007818933936, "grad_norm": 347.09967041015625, "learning_rate": 1.9558087442455357e-06, "loss": 56.0156, "step": 12903 }, { "epoch": 0.12214954421105442, "grad_norm": 375.9217529296875, "learning_rate": 1.9557997305703264e-06, "loss": 22.6406, "step": 12904 }, { "epoch": 0.12215901023276947, "grad_norm": 481.5270080566406, "learning_rate": 1.9557907159967272e-06, "loss": 39.8281, "step": 12905 }, { "epoch": 0.12216847625448453, "grad_norm": 180.36573791503906, "learning_rate": 1.9557817005247467e-06, "loss": 8.3867, "step": 12906 }, { "epoch": 0.12217794227619957, "grad_norm": 182.1697540283203, "learning_rate": 1.9557726841543938e-06, "loss": 16.6719, "step": 12907 }, { "epoch": 0.12218740829791463, "grad_norm": 247.44134521484375, "learning_rate": 1.955763666885677e-06, "loss": 13.5547, "step": 12908 }, { "epoch": 0.1221968743196297, "grad_norm": 193.43699645996094, "learning_rate": 1.955754648718604e-06, "loss": 7.7031, "step": 12909 }, { "epoch": 0.12220634034134474, "grad_norm": 257.4752502441406, "learning_rate": 1.9557456296531844e-06, "loss": 44.3984, "step": 12910 }, { "epoch": 0.1222158063630598, "grad_norm": 339.0008850097656, "learning_rate": 1.9557366096894254e-06, "loss": 25.7422, "step": 12911 }, { "epoch": 0.12222527238477485, "grad_norm": 475.4062805175781, "learning_rate": 1.9557275888273368e-06, "loss": 32.6406, "step": 12912 }, { "epoch": 0.1222347384064899, "grad_norm": 3.213840961456299, "learning_rate": 1.955718567066926e-06, "loss": 0.9756, "step": 12913 }, { "epoch": 0.12224420442820495, "grad_norm": 314.26116943359375, "learning_rate": 1.955709544408202e-06, "loss": 24.4375, "step": 12914 }, { "epoch": 0.12225367044992001, "grad_norm": 239.54258728027344, "learning_rate": 1.9557005208511736e-06, "loss": 15.7266, "step": 12915 }, { "epoch": 0.12226313647163507, "grad_norm": 662.0048828125, "learning_rate": 1.955691496395849e-06, "loss": 50.875, "step": 12916 }, { "epoch": 0.12227260249335012, "grad_norm": 549.7974853515625, "learning_rate": 1.955682471042236e-06, "loss": 13.3789, "step": 12917 }, { "epoch": 0.12228206851506518, "grad_norm": 362.7646484375, "learning_rate": 1.9556734447903436e-06, "loss": 32.1562, "step": 12918 }, { "epoch": 0.12229153453678022, "grad_norm": 220.79637145996094, "learning_rate": 1.9556644176401805e-06, "loss": 19.8633, "step": 12919 }, { "epoch": 0.12230100055849528, "grad_norm": 195.50770568847656, "learning_rate": 1.955655389591755e-06, "loss": 13.9492, "step": 12920 }, { "epoch": 0.12231046658021033, "grad_norm": 485.8413391113281, "learning_rate": 1.9556463606450757e-06, "loss": 46.2031, "step": 12921 }, { "epoch": 0.12231993260192539, "grad_norm": 317.5955505371094, "learning_rate": 1.955637330800151e-06, "loss": 46.6094, "step": 12922 }, { "epoch": 0.12232939862364044, "grad_norm": 483.9559631347656, "learning_rate": 1.9556283000569894e-06, "loss": 25.0312, "step": 12923 }, { "epoch": 0.1223388646453555, "grad_norm": 302.4587707519531, "learning_rate": 1.9556192684155992e-06, "loss": 17.9609, "step": 12924 }, { "epoch": 0.12234833066707056, "grad_norm": 287.79644775390625, "learning_rate": 1.955610235875989e-06, "loss": 23.6328, "step": 12925 }, { "epoch": 0.1223577966887856, "grad_norm": 280.3576965332031, "learning_rate": 1.9556012024381675e-06, "loss": 30.2109, "step": 12926 }, { "epoch": 0.12236726271050066, "grad_norm": 486.7886047363281, "learning_rate": 1.955592168102143e-06, "loss": 17.0859, "step": 12927 }, { "epoch": 0.12237672873221571, "grad_norm": 214.49075317382812, "learning_rate": 1.9555831328679244e-06, "loss": 23.3203, "step": 12928 }, { "epoch": 0.12238619475393077, "grad_norm": 3.0250792503356934, "learning_rate": 1.9555740967355194e-06, "loss": 0.8203, "step": 12929 }, { "epoch": 0.12239566077564581, "grad_norm": 305.53582763671875, "learning_rate": 1.9555650597049368e-06, "loss": 22.7188, "step": 12930 }, { "epoch": 0.12240512679736087, "grad_norm": 756.9899291992188, "learning_rate": 1.9555560217761854e-06, "loss": 66.5625, "step": 12931 }, { "epoch": 0.12241459281907592, "grad_norm": 401.5094299316406, "learning_rate": 1.9555469829492733e-06, "loss": 25.1641, "step": 12932 }, { "epoch": 0.12242405884079098, "grad_norm": 453.7355651855469, "learning_rate": 1.95553794322421e-06, "loss": 36.0859, "step": 12933 }, { "epoch": 0.12243352486250604, "grad_norm": 246.26846313476562, "learning_rate": 1.9555289026010023e-06, "loss": 26.3086, "step": 12934 }, { "epoch": 0.12244299088422109, "grad_norm": 308.0872497558594, "learning_rate": 1.9555198610796597e-06, "loss": 46.5, "step": 12935 }, { "epoch": 0.12245245690593615, "grad_norm": 301.7162780761719, "learning_rate": 1.955510818660191e-06, "loss": 22.6328, "step": 12936 }, { "epoch": 0.12246192292765119, "grad_norm": 259.569091796875, "learning_rate": 1.955501775342604e-06, "loss": 21.9844, "step": 12937 }, { "epoch": 0.12247138894936625, "grad_norm": 1096.7939453125, "learning_rate": 1.9554927311269077e-06, "loss": 50.8125, "step": 12938 }, { "epoch": 0.1224808549710813, "grad_norm": 328.620849609375, "learning_rate": 1.9554836860131104e-06, "loss": 14.6719, "step": 12939 }, { "epoch": 0.12249032099279636, "grad_norm": 217.0626678466797, "learning_rate": 1.9554746400012202e-06, "loss": 16.7344, "step": 12940 }, { "epoch": 0.1224997870145114, "grad_norm": 171.02442932128906, "learning_rate": 1.9554655930912464e-06, "loss": 8.5625, "step": 12941 }, { "epoch": 0.12250925303622646, "grad_norm": 257.93890380859375, "learning_rate": 1.955456545283197e-06, "loss": 19.1562, "step": 12942 }, { "epoch": 0.12251871905794152, "grad_norm": 516.6427612304688, "learning_rate": 1.95544749657708e-06, "loss": 40.9688, "step": 12943 }, { "epoch": 0.12252818507965657, "grad_norm": 294.7338562011719, "learning_rate": 1.9554384469729053e-06, "loss": 20.9375, "step": 12944 }, { "epoch": 0.12253765110137163, "grad_norm": 305.6399230957031, "learning_rate": 1.9554293964706804e-06, "loss": 22.5078, "step": 12945 }, { "epoch": 0.12254711712308668, "grad_norm": 646.1670532226562, "learning_rate": 1.9554203450704142e-06, "loss": 54.8438, "step": 12946 }, { "epoch": 0.12255658314480174, "grad_norm": 492.43426513671875, "learning_rate": 1.955411292772115e-06, "loss": 27.1094, "step": 12947 }, { "epoch": 0.12256604916651678, "grad_norm": 949.3375244140625, "learning_rate": 1.955402239575791e-06, "loss": 23.7422, "step": 12948 }, { "epoch": 0.12257551518823184, "grad_norm": 269.2207336425781, "learning_rate": 1.955393185481451e-06, "loss": 16.875, "step": 12949 }, { "epoch": 0.12258498120994689, "grad_norm": 335.1466369628906, "learning_rate": 1.9553841304891037e-06, "loss": 19.8125, "step": 12950 }, { "epoch": 0.12259444723166195, "grad_norm": 386.48260498046875, "learning_rate": 1.955375074598758e-06, "loss": 16.3281, "step": 12951 }, { "epoch": 0.12260391325337701, "grad_norm": 342.0567321777344, "learning_rate": 1.955366017810421e-06, "loss": 29.3047, "step": 12952 }, { "epoch": 0.12261337927509205, "grad_norm": 401.46673583984375, "learning_rate": 1.9553569601241027e-06, "loss": 37.25, "step": 12953 }, { "epoch": 0.12262284529680711, "grad_norm": 620.2420654296875, "learning_rate": 1.955347901539811e-06, "loss": 44.7031, "step": 12954 }, { "epoch": 0.12263231131852216, "grad_norm": 379.7644958496094, "learning_rate": 1.955338842057554e-06, "loss": 29.7188, "step": 12955 }, { "epoch": 0.12264177734023722, "grad_norm": 545.8215942382812, "learning_rate": 1.955329781677341e-06, "loss": 37.4062, "step": 12956 }, { "epoch": 0.12265124336195227, "grad_norm": 485.3309020996094, "learning_rate": 1.95532072039918e-06, "loss": 53.4688, "step": 12957 }, { "epoch": 0.12266070938366733, "grad_norm": 1237.2706298828125, "learning_rate": 1.9553116582230796e-06, "loss": 66.7734, "step": 12958 }, { "epoch": 0.12267017540538239, "grad_norm": 505.22271728515625, "learning_rate": 1.9553025951490485e-06, "loss": 33.3516, "step": 12959 }, { "epoch": 0.12267964142709743, "grad_norm": 209.1512908935547, "learning_rate": 1.955293531177095e-06, "loss": 21.1406, "step": 12960 }, { "epoch": 0.12268910744881249, "grad_norm": 270.1877746582031, "learning_rate": 1.955284466307228e-06, "loss": 43.4062, "step": 12961 }, { "epoch": 0.12269857347052754, "grad_norm": 2.953950881958008, "learning_rate": 1.9552754005394557e-06, "loss": 0.9653, "step": 12962 }, { "epoch": 0.1227080394922426, "grad_norm": 171.2022247314453, "learning_rate": 1.9552663338737864e-06, "loss": 18.3203, "step": 12963 }, { "epoch": 0.12271750551395764, "grad_norm": 656.1267700195312, "learning_rate": 1.9552572663102287e-06, "loss": 20.6953, "step": 12964 }, { "epoch": 0.1227269715356727, "grad_norm": 191.995361328125, "learning_rate": 1.955248197848792e-06, "loss": 23.4062, "step": 12965 }, { "epoch": 0.12273643755738775, "grad_norm": 498.6728210449219, "learning_rate": 1.9552391284894834e-06, "loss": 50.1016, "step": 12966 }, { "epoch": 0.12274590357910281, "grad_norm": 295.5690002441406, "learning_rate": 1.955230058232313e-06, "loss": 23.7109, "step": 12967 }, { "epoch": 0.12275536960081787, "grad_norm": 260.24365234375, "learning_rate": 1.955220987077288e-06, "loss": 15.4805, "step": 12968 }, { "epoch": 0.12276483562253292, "grad_norm": 257.99053955078125, "learning_rate": 1.9552119150244175e-06, "loss": 25.5703, "step": 12969 }, { "epoch": 0.12277430164424798, "grad_norm": 3.412170886993408, "learning_rate": 1.9552028420737095e-06, "loss": 0.9312, "step": 12970 }, { "epoch": 0.12278376766596302, "grad_norm": 631.12646484375, "learning_rate": 1.9551937682251737e-06, "loss": 54.2578, "step": 12971 }, { "epoch": 0.12279323368767808, "grad_norm": 397.65838623046875, "learning_rate": 1.9551846934788173e-06, "loss": 38.6719, "step": 12972 }, { "epoch": 0.12280269970939313, "grad_norm": 707.7908325195312, "learning_rate": 1.9551756178346496e-06, "loss": 45.5312, "step": 12973 }, { "epoch": 0.12281216573110819, "grad_norm": 354.79876708984375, "learning_rate": 1.955166541292679e-06, "loss": 26.8125, "step": 12974 }, { "epoch": 0.12282163175282323, "grad_norm": 377.6138610839844, "learning_rate": 1.9551574638529142e-06, "loss": 40.1719, "step": 12975 }, { "epoch": 0.1228310977745383, "grad_norm": 223.18370056152344, "learning_rate": 1.9551483855153634e-06, "loss": 22.2422, "step": 12976 }, { "epoch": 0.12284056379625335, "grad_norm": 544.7758178710938, "learning_rate": 1.9551393062800352e-06, "loss": 21.3086, "step": 12977 }, { "epoch": 0.1228500298179684, "grad_norm": 279.2581481933594, "learning_rate": 1.955130226146938e-06, "loss": 14.6133, "step": 12978 }, { "epoch": 0.12285949583968346, "grad_norm": 492.7696533203125, "learning_rate": 1.955121145116081e-06, "loss": 37.4375, "step": 12979 }, { "epoch": 0.1228689618613985, "grad_norm": 514.2024536132812, "learning_rate": 1.9551120631874716e-06, "loss": 46.2031, "step": 12980 }, { "epoch": 0.12287842788311357, "grad_norm": 376.40789794921875, "learning_rate": 1.9551029803611197e-06, "loss": 31.25, "step": 12981 }, { "epoch": 0.12288789390482861, "grad_norm": 753.2674560546875, "learning_rate": 1.9550938966370327e-06, "loss": 48.6719, "step": 12982 }, { "epoch": 0.12289735992654367, "grad_norm": 324.7574157714844, "learning_rate": 1.9550848120152196e-06, "loss": 57.0312, "step": 12983 }, { "epoch": 0.12290682594825872, "grad_norm": 497.81683349609375, "learning_rate": 1.955075726495689e-06, "loss": 38.4922, "step": 12984 }, { "epoch": 0.12291629196997378, "grad_norm": 428.27984619140625, "learning_rate": 1.955066640078449e-06, "loss": 22.3281, "step": 12985 }, { "epoch": 0.12292575799168884, "grad_norm": 300.864013671875, "learning_rate": 1.955057552763509e-06, "loss": 20.5195, "step": 12986 }, { "epoch": 0.12293522401340388, "grad_norm": 234.44427490234375, "learning_rate": 1.9550484645508767e-06, "loss": 17.5469, "step": 12987 }, { "epoch": 0.12294469003511894, "grad_norm": 238.20758056640625, "learning_rate": 1.9550393754405614e-06, "loss": 17.5, "step": 12988 }, { "epoch": 0.12295415605683399, "grad_norm": 161.56642150878906, "learning_rate": 1.9550302854325712e-06, "loss": 14.7617, "step": 12989 }, { "epoch": 0.12296362207854905, "grad_norm": 3.208735227584839, "learning_rate": 1.955021194526914e-06, "loss": 0.9297, "step": 12990 }, { "epoch": 0.1229730881002641, "grad_norm": 1530.497802734375, "learning_rate": 1.9550121027236e-06, "loss": 61.7109, "step": 12991 }, { "epoch": 0.12298255412197916, "grad_norm": 352.1361389160156, "learning_rate": 1.9550030100226357e-06, "loss": 19.2031, "step": 12992 }, { "epoch": 0.1229920201436942, "grad_norm": 333.4760437011719, "learning_rate": 1.9549939164240313e-06, "loss": 9.8633, "step": 12993 }, { "epoch": 0.12300148616540926, "grad_norm": 213.24217224121094, "learning_rate": 1.9549848219277944e-06, "loss": 18.4766, "step": 12994 }, { "epoch": 0.12301095218712432, "grad_norm": 682.999267578125, "learning_rate": 1.954975726533934e-06, "loss": 35.375, "step": 12995 }, { "epoch": 0.12302041820883937, "grad_norm": 297.1202697753906, "learning_rate": 1.9549666302424587e-06, "loss": 33.625, "step": 12996 }, { "epoch": 0.12302988423055443, "grad_norm": 384.2682189941406, "learning_rate": 1.954957533053377e-06, "loss": 46.2188, "step": 12997 }, { "epoch": 0.12303935025226947, "grad_norm": 755.5827026367188, "learning_rate": 1.954948434966697e-06, "loss": 31.1719, "step": 12998 }, { "epoch": 0.12304881627398453, "grad_norm": 263.23236083984375, "learning_rate": 1.954939335982428e-06, "loss": 15.3867, "step": 12999 }, { "epoch": 0.12305828229569958, "grad_norm": 441.36810302734375, "learning_rate": 1.9549302361005777e-06, "loss": 19.5391, "step": 13000 }, { "epoch": 0.12306774831741464, "grad_norm": 580.060546875, "learning_rate": 1.9549211353211553e-06, "loss": 51.7969, "step": 13001 }, { "epoch": 0.1230772143391297, "grad_norm": 605.5339965820312, "learning_rate": 1.9549120336441694e-06, "loss": 18.9766, "step": 13002 }, { "epoch": 0.12308668036084475, "grad_norm": 288.1062316894531, "learning_rate": 1.954902931069628e-06, "loss": 18.1797, "step": 13003 }, { "epoch": 0.1230961463825598, "grad_norm": 633.3331909179688, "learning_rate": 1.95489382759754e-06, "loss": 68.6094, "step": 13004 }, { "epoch": 0.12310561240427485, "grad_norm": 827.410888671875, "learning_rate": 1.9548847232279136e-06, "loss": 36.3906, "step": 13005 }, { "epoch": 0.12311507842598991, "grad_norm": 263.91851806640625, "learning_rate": 1.9548756179607582e-06, "loss": 26.5625, "step": 13006 }, { "epoch": 0.12312454444770496, "grad_norm": 228.13125610351562, "learning_rate": 1.9548665117960817e-06, "loss": 14.3906, "step": 13007 }, { "epoch": 0.12313401046942002, "grad_norm": 262.12353515625, "learning_rate": 1.954857404733893e-06, "loss": 18.1016, "step": 13008 }, { "epoch": 0.12314347649113506, "grad_norm": 749.2021484375, "learning_rate": 1.9548482967742e-06, "loss": 45.5312, "step": 13009 }, { "epoch": 0.12315294251285012, "grad_norm": 3.2964768409729004, "learning_rate": 1.954839187917012e-06, "loss": 0.9126, "step": 13010 }, { "epoch": 0.12316240853456518, "grad_norm": 354.7107238769531, "learning_rate": 1.954830078162337e-06, "loss": 31.7188, "step": 13011 }, { "epoch": 0.12317187455628023, "grad_norm": 364.32916259765625, "learning_rate": 1.9548209675101843e-06, "loss": 20.2734, "step": 13012 }, { "epoch": 0.12318134057799529, "grad_norm": 206.02420043945312, "learning_rate": 1.954811855960562e-06, "loss": 18.0, "step": 13013 }, { "epoch": 0.12319080659971034, "grad_norm": 718.8907470703125, "learning_rate": 1.9548027435134784e-06, "loss": 46.9531, "step": 13014 }, { "epoch": 0.1232002726214254, "grad_norm": 286.7395935058594, "learning_rate": 1.9547936301689423e-06, "loss": 20.1875, "step": 13015 }, { "epoch": 0.12320973864314044, "grad_norm": 452.09747314453125, "learning_rate": 1.9547845159269625e-06, "loss": 35.4688, "step": 13016 }, { "epoch": 0.1232192046648555, "grad_norm": 220.58966064453125, "learning_rate": 1.954775400787547e-06, "loss": 20.8359, "step": 13017 }, { "epoch": 0.12322867068657055, "grad_norm": 775.610595703125, "learning_rate": 1.954766284750705e-06, "loss": 53.75, "step": 13018 }, { "epoch": 0.12323813670828561, "grad_norm": 352.4184265136719, "learning_rate": 1.954757167816445e-06, "loss": 18.9355, "step": 13019 }, { "epoch": 0.12324760273000067, "grad_norm": 180.41339111328125, "learning_rate": 1.954748049984775e-06, "loss": 14.3477, "step": 13020 }, { "epoch": 0.12325706875171571, "grad_norm": 394.61651611328125, "learning_rate": 1.954738931255704e-06, "loss": 35.875, "step": 13021 }, { "epoch": 0.12326653477343077, "grad_norm": 708.9653930664062, "learning_rate": 1.954729811629241e-06, "loss": 51.0977, "step": 13022 }, { "epoch": 0.12327600079514582, "grad_norm": 267.4477233886719, "learning_rate": 1.9547206911053937e-06, "loss": 14.2461, "step": 13023 }, { "epoch": 0.12328546681686088, "grad_norm": 789.2860717773438, "learning_rate": 1.954711569684171e-06, "loss": 19.5859, "step": 13024 }, { "epoch": 0.12329493283857593, "grad_norm": 500.17138671875, "learning_rate": 1.9547024473655817e-06, "loss": 47.3359, "step": 13025 }, { "epoch": 0.12330439886029099, "grad_norm": 290.8778076171875, "learning_rate": 1.954693324149634e-06, "loss": 31.6094, "step": 13026 }, { "epoch": 0.12331386488200603, "grad_norm": 248.3765869140625, "learning_rate": 1.9546842000363364e-06, "loss": 25.9062, "step": 13027 }, { "epoch": 0.12332333090372109, "grad_norm": 2.7818009853363037, "learning_rate": 1.9546750750256986e-06, "loss": 0.918, "step": 13028 }, { "epoch": 0.12333279692543615, "grad_norm": 559.4928588867188, "learning_rate": 1.9546659491177276e-06, "loss": 21.1875, "step": 13029 }, { "epoch": 0.1233422629471512, "grad_norm": 620.3953857421875, "learning_rate": 1.954656822312433e-06, "loss": 34.8125, "step": 13030 }, { "epoch": 0.12335172896886626, "grad_norm": 684.8478393554688, "learning_rate": 1.954647694609823e-06, "loss": 39.0312, "step": 13031 }, { "epoch": 0.1233611949905813, "grad_norm": 305.3449401855469, "learning_rate": 1.9546385660099067e-06, "loss": 17.9219, "step": 13032 }, { "epoch": 0.12337066101229636, "grad_norm": 379.7016296386719, "learning_rate": 1.954629436512692e-06, "loss": 17.3047, "step": 13033 }, { "epoch": 0.12338012703401141, "grad_norm": 307.5968933105469, "learning_rate": 1.9546203061181874e-06, "loss": 20.9219, "step": 13034 }, { "epoch": 0.12338959305572647, "grad_norm": 207.77284240722656, "learning_rate": 1.954611174826402e-06, "loss": 23.4844, "step": 13035 }, { "epoch": 0.12339905907744152, "grad_norm": 286.9342041015625, "learning_rate": 1.9546020426373443e-06, "loss": 21.6094, "step": 13036 }, { "epoch": 0.12340852509915658, "grad_norm": 340.11163330078125, "learning_rate": 1.9545929095510228e-06, "loss": 27.6875, "step": 13037 }, { "epoch": 0.12341799112087164, "grad_norm": 247.5115509033203, "learning_rate": 1.954583775567446e-06, "loss": 10.4414, "step": 13038 }, { "epoch": 0.12342745714258668, "grad_norm": 3.5297536849975586, "learning_rate": 1.9545746406866225e-06, "loss": 0.9082, "step": 13039 }, { "epoch": 0.12343692316430174, "grad_norm": 539.1509399414062, "learning_rate": 1.954565504908561e-06, "loss": 43.5781, "step": 13040 }, { "epoch": 0.12344638918601679, "grad_norm": 899.5726318359375, "learning_rate": 1.95455636823327e-06, "loss": 46.6016, "step": 13041 }, { "epoch": 0.12345585520773185, "grad_norm": 590.5903930664062, "learning_rate": 1.9545472306607582e-06, "loss": 31.6562, "step": 13042 }, { "epoch": 0.1234653212294469, "grad_norm": 584.5818481445312, "learning_rate": 1.9545380921910337e-06, "loss": 18.1758, "step": 13043 }, { "epoch": 0.12347478725116195, "grad_norm": 171.8601531982422, "learning_rate": 1.954528952824106e-06, "loss": 14.3125, "step": 13044 }, { "epoch": 0.12348425327287701, "grad_norm": 501.9474182128906, "learning_rate": 1.954519812559983e-06, "loss": 35.7812, "step": 13045 }, { "epoch": 0.12349371929459206, "grad_norm": 243.0185089111328, "learning_rate": 1.9545106713986734e-06, "loss": 16.6094, "step": 13046 }, { "epoch": 0.12350318531630712, "grad_norm": 703.3917846679688, "learning_rate": 1.954501529340186e-06, "loss": 44.5156, "step": 13047 }, { "epoch": 0.12351265133802217, "grad_norm": 261.31951904296875, "learning_rate": 1.9544923863845295e-06, "loss": 17.1875, "step": 13048 }, { "epoch": 0.12352211735973723, "grad_norm": 289.3937683105469, "learning_rate": 1.954483242531712e-06, "loss": 14.8516, "step": 13049 }, { "epoch": 0.12353158338145227, "grad_norm": 598.662841796875, "learning_rate": 1.954474097781742e-06, "loss": 26.7578, "step": 13050 }, { "epoch": 0.12354104940316733, "grad_norm": 297.1703796386719, "learning_rate": 1.9544649521346287e-06, "loss": 14.625, "step": 13051 }, { "epoch": 0.12355051542488238, "grad_norm": 252.27920532226562, "learning_rate": 1.9544558055903805e-06, "loss": 22.3672, "step": 13052 }, { "epoch": 0.12355998144659744, "grad_norm": 3.0631749629974365, "learning_rate": 1.9544466581490057e-06, "loss": 0.8076, "step": 13053 }, { "epoch": 0.1235694474683125, "grad_norm": 134.58245849609375, "learning_rate": 1.9544375098105134e-06, "loss": 19.7969, "step": 13054 }, { "epoch": 0.12357891349002754, "grad_norm": 427.4978332519531, "learning_rate": 1.954428360574912e-06, "loss": 49.6719, "step": 13055 }, { "epoch": 0.1235883795117426, "grad_norm": 248.6312255859375, "learning_rate": 1.9544192104422095e-06, "loss": 18.7969, "step": 13056 }, { "epoch": 0.12359784553345765, "grad_norm": 326.7376708984375, "learning_rate": 1.9544100594124157e-06, "loss": 32.0078, "step": 13057 }, { "epoch": 0.12360731155517271, "grad_norm": 279.945556640625, "learning_rate": 1.9544009074855382e-06, "loss": 21.7266, "step": 13058 }, { "epoch": 0.12361677757688776, "grad_norm": 421.259521484375, "learning_rate": 1.954391754661586e-06, "loss": 32.0312, "step": 13059 }, { "epoch": 0.12362624359860282, "grad_norm": 292.13714599609375, "learning_rate": 1.9543826009405677e-06, "loss": 23.7969, "step": 13060 }, { "epoch": 0.12363570962031786, "grad_norm": 829.6450805664062, "learning_rate": 1.9543734463224916e-06, "loss": 27.3594, "step": 13061 }, { "epoch": 0.12364517564203292, "grad_norm": 246.29617309570312, "learning_rate": 1.954364290807367e-06, "loss": 15.4844, "step": 13062 }, { "epoch": 0.12365464166374798, "grad_norm": 602.6741943359375, "learning_rate": 1.954355134395202e-06, "loss": 22.8359, "step": 13063 }, { "epoch": 0.12366410768546303, "grad_norm": 398.6720275878906, "learning_rate": 1.9543459770860046e-06, "loss": 40.3125, "step": 13064 }, { "epoch": 0.12367357370717809, "grad_norm": 288.4075622558594, "learning_rate": 1.9543368188797843e-06, "loss": 29.125, "step": 13065 }, { "epoch": 0.12368303972889313, "grad_norm": 243.93240356445312, "learning_rate": 1.9543276597765498e-06, "loss": 19.7344, "step": 13066 }, { "epoch": 0.1236925057506082, "grad_norm": 395.5603942871094, "learning_rate": 1.9543184997763095e-06, "loss": 23.0156, "step": 13067 }, { "epoch": 0.12370197177232324, "grad_norm": 249.8463134765625, "learning_rate": 1.9543093388790714e-06, "loss": 9.1562, "step": 13068 }, { "epoch": 0.1237114377940383, "grad_norm": 529.55859375, "learning_rate": 1.954300177084845e-06, "loss": 27.625, "step": 13069 }, { "epoch": 0.12372090381575335, "grad_norm": 405.7126159667969, "learning_rate": 1.954291014393638e-06, "loss": 40.8594, "step": 13070 }, { "epoch": 0.1237303698374684, "grad_norm": 509.0692443847656, "learning_rate": 1.9542818508054602e-06, "loss": 46.0, "step": 13071 }, { "epoch": 0.12373983585918347, "grad_norm": 301.10443115234375, "learning_rate": 1.9542726863203194e-06, "loss": 21.3203, "step": 13072 }, { "epoch": 0.12374930188089851, "grad_norm": 426.22998046875, "learning_rate": 1.954263520938224e-06, "loss": 39.6562, "step": 13073 }, { "epoch": 0.12375876790261357, "grad_norm": 393.5097351074219, "learning_rate": 1.954254354659183e-06, "loss": 19.4766, "step": 13074 }, { "epoch": 0.12376823392432862, "grad_norm": 590.9918212890625, "learning_rate": 1.954245187483205e-06, "loss": 42.0, "step": 13075 }, { "epoch": 0.12377769994604368, "grad_norm": 319.7924499511719, "learning_rate": 1.954236019410299e-06, "loss": 28.9062, "step": 13076 }, { "epoch": 0.12378716596775872, "grad_norm": 310.44000244140625, "learning_rate": 1.954226850440473e-06, "loss": 22.7031, "step": 13077 }, { "epoch": 0.12379663198947378, "grad_norm": 235.49778747558594, "learning_rate": 1.9542176805737357e-06, "loss": 21.6797, "step": 13078 }, { "epoch": 0.12380609801118883, "grad_norm": 237.10215759277344, "learning_rate": 1.954208509810096e-06, "loss": 23.7422, "step": 13079 }, { "epoch": 0.12381556403290389, "grad_norm": 624.5804443359375, "learning_rate": 1.954199338149562e-06, "loss": 40.8906, "step": 13080 }, { "epoch": 0.12382503005461895, "grad_norm": 293.75958251953125, "learning_rate": 1.9541901655921434e-06, "loss": 18.6953, "step": 13081 }, { "epoch": 0.123834496076334, "grad_norm": 590.0864868164062, "learning_rate": 1.9541809921378478e-06, "loss": 40.1719, "step": 13082 }, { "epoch": 0.12384396209804906, "grad_norm": 445.1694641113281, "learning_rate": 1.954171817786684e-06, "loss": 31.8438, "step": 13083 }, { "epoch": 0.1238534281197641, "grad_norm": 342.6171875, "learning_rate": 1.954162642538661e-06, "loss": 29.6641, "step": 13084 }, { "epoch": 0.12386289414147916, "grad_norm": 138.96507263183594, "learning_rate": 1.9541534663937867e-06, "loss": 15.0391, "step": 13085 }, { "epoch": 0.12387236016319421, "grad_norm": 581.2713012695312, "learning_rate": 1.9541442893520706e-06, "loss": 19.957, "step": 13086 }, { "epoch": 0.12388182618490927, "grad_norm": 339.0489807128906, "learning_rate": 1.954135111413521e-06, "loss": 16.3594, "step": 13087 }, { "epoch": 0.12389129220662433, "grad_norm": 408.33056640625, "learning_rate": 1.9541259325781463e-06, "loss": 49.1094, "step": 13088 }, { "epoch": 0.12390075822833937, "grad_norm": 237.13912963867188, "learning_rate": 1.9541167528459554e-06, "loss": 17.7852, "step": 13089 }, { "epoch": 0.12391022425005443, "grad_norm": 417.1427307128906, "learning_rate": 1.9541075722169565e-06, "loss": 28.2188, "step": 13090 }, { "epoch": 0.12391969027176948, "grad_norm": 462.71697998046875, "learning_rate": 1.954098390691159e-06, "loss": 31.8047, "step": 13091 }, { "epoch": 0.12392915629348454, "grad_norm": 320.5393371582031, "learning_rate": 1.9540892082685707e-06, "loss": 12.5352, "step": 13092 }, { "epoch": 0.12393862231519959, "grad_norm": 431.38067626953125, "learning_rate": 1.9540800249492012e-06, "loss": 43.625, "step": 13093 }, { "epoch": 0.12394808833691465, "grad_norm": 464.5538635253906, "learning_rate": 1.954070840733058e-06, "loss": 25.7344, "step": 13094 }, { "epoch": 0.12395755435862969, "grad_norm": 263.02667236328125, "learning_rate": 1.9540616556201505e-06, "loss": 8.4297, "step": 13095 }, { "epoch": 0.12396702038034475, "grad_norm": 514.6752319335938, "learning_rate": 1.954052469610487e-06, "loss": 46.1797, "step": 13096 }, { "epoch": 0.12397648640205981, "grad_norm": 3.259354591369629, "learning_rate": 1.954043282704076e-06, "loss": 0.9282, "step": 13097 }, { "epoch": 0.12398595242377486, "grad_norm": 805.36865234375, "learning_rate": 1.9540340949009268e-06, "loss": 57.1641, "step": 13098 }, { "epoch": 0.12399541844548992, "grad_norm": 438.97265625, "learning_rate": 1.9540249062010473e-06, "loss": 30.2031, "step": 13099 }, { "epoch": 0.12400488446720496, "grad_norm": 444.8312683105469, "learning_rate": 1.954015716604447e-06, "loss": 16.0391, "step": 13100 }, { "epoch": 0.12401435048892002, "grad_norm": 262.4571838378906, "learning_rate": 1.9540065261111337e-06, "loss": 30.125, "step": 13101 }, { "epoch": 0.12402381651063507, "grad_norm": 2.806392192840576, "learning_rate": 1.953997334721116e-06, "loss": 0.8291, "step": 13102 }, { "epoch": 0.12403328253235013, "grad_norm": 357.1257019042969, "learning_rate": 1.953988142434403e-06, "loss": 22.6328, "step": 13103 }, { "epoch": 0.12404274855406518, "grad_norm": 366.5189208984375, "learning_rate": 1.9539789492510034e-06, "loss": 10.5195, "step": 13104 }, { "epoch": 0.12405221457578024, "grad_norm": 201.60533142089844, "learning_rate": 1.9539697551709257e-06, "loss": 17.7812, "step": 13105 }, { "epoch": 0.1240616805974953, "grad_norm": 311.5675964355469, "learning_rate": 1.9539605601941784e-06, "loss": 21.707, "step": 13106 }, { "epoch": 0.12407114661921034, "grad_norm": 409.9303283691406, "learning_rate": 1.95395136432077e-06, "loss": 41.9062, "step": 13107 }, { "epoch": 0.1240806126409254, "grad_norm": 216.3126678466797, "learning_rate": 1.9539421675507097e-06, "loss": 20.8906, "step": 13108 }, { "epoch": 0.12409007866264045, "grad_norm": 442.1349182128906, "learning_rate": 1.9539329698840056e-06, "loss": 42.7812, "step": 13109 }, { "epoch": 0.12409954468435551, "grad_norm": 361.0172119140625, "learning_rate": 1.9539237713206668e-06, "loss": 22.7969, "step": 13110 }, { "epoch": 0.12410901070607055, "grad_norm": 233.58602905273438, "learning_rate": 1.9539145718607016e-06, "loss": 16.1289, "step": 13111 }, { "epoch": 0.12411847672778561, "grad_norm": 2.850527763366699, "learning_rate": 1.9539053715041186e-06, "loss": 0.8804, "step": 13112 }, { "epoch": 0.12412794274950066, "grad_norm": 601.7166137695312, "learning_rate": 1.953896170250927e-06, "loss": 57.2031, "step": 13113 }, { "epoch": 0.12413740877121572, "grad_norm": 215.30331420898438, "learning_rate": 1.9538869681011347e-06, "loss": 27.6562, "step": 13114 }, { "epoch": 0.12414687479293078, "grad_norm": 697.906982421875, "learning_rate": 1.9538777650547507e-06, "loss": 33.8203, "step": 13115 }, { "epoch": 0.12415634081464583, "grad_norm": 598.7413940429688, "learning_rate": 1.9538685611117836e-06, "loss": 36.5156, "step": 13116 }, { "epoch": 0.12416580683636089, "grad_norm": 382.5986022949219, "learning_rate": 1.9538593562722423e-06, "loss": 40.9219, "step": 13117 }, { "epoch": 0.12417527285807593, "grad_norm": 298.8688659667969, "learning_rate": 1.9538501505361353e-06, "loss": 18.2734, "step": 13118 }, { "epoch": 0.12418473887979099, "grad_norm": 326.5616455078125, "learning_rate": 1.953840943903471e-06, "loss": 25.9297, "step": 13119 }, { "epoch": 0.12419420490150604, "grad_norm": 251.8900909423828, "learning_rate": 1.9538317363742583e-06, "loss": 17.1641, "step": 13120 }, { "epoch": 0.1242036709232211, "grad_norm": 3.8504414558410645, "learning_rate": 1.953822527948506e-06, "loss": 1.0137, "step": 13121 }, { "epoch": 0.12421313694493614, "grad_norm": 357.004638671875, "learning_rate": 1.9538133186262225e-06, "loss": 25.8359, "step": 13122 }, { "epoch": 0.1242226029666512, "grad_norm": 462.01605224609375, "learning_rate": 1.9538041084074164e-06, "loss": 36.2344, "step": 13123 }, { "epoch": 0.12423206898836626, "grad_norm": 196.34217834472656, "learning_rate": 1.9537948972920966e-06, "loss": 10.0977, "step": 13124 }, { "epoch": 0.12424153501008131, "grad_norm": 239.2568359375, "learning_rate": 1.953785685280272e-06, "loss": 23.75, "step": 13125 }, { "epoch": 0.12425100103179637, "grad_norm": 299.870849609375, "learning_rate": 1.9537764723719504e-06, "loss": 22.9219, "step": 13126 }, { "epoch": 0.12426046705351142, "grad_norm": 783.8718872070312, "learning_rate": 1.953767258567141e-06, "loss": 43.1406, "step": 13127 }, { "epoch": 0.12426993307522648, "grad_norm": 382.0538635253906, "learning_rate": 1.9537580438658525e-06, "loss": 22.5, "step": 13128 }, { "epoch": 0.12427939909694152, "grad_norm": 3.5628857612609863, "learning_rate": 1.9537488282680937e-06, "loss": 0.9429, "step": 13129 }, { "epoch": 0.12428886511865658, "grad_norm": 183.29832458496094, "learning_rate": 1.953739611773873e-06, "loss": 19.0391, "step": 13130 }, { "epoch": 0.12429833114037164, "grad_norm": 1296.6287841796875, "learning_rate": 1.9537303943831985e-06, "loss": 8.3789, "step": 13131 }, { "epoch": 0.12430779716208669, "grad_norm": 263.0072021484375, "learning_rate": 1.95372117609608e-06, "loss": 31.4062, "step": 13132 }, { "epoch": 0.12431726318380175, "grad_norm": 284.3736572265625, "learning_rate": 1.9537119569125257e-06, "loss": 20.7812, "step": 13133 }, { "epoch": 0.1243267292055168, "grad_norm": 531.0763549804688, "learning_rate": 1.9537027368325442e-06, "loss": 35.5703, "step": 13134 }, { "epoch": 0.12433619522723185, "grad_norm": 234.2234649658203, "learning_rate": 1.953693515856144e-06, "loss": 11.1992, "step": 13135 }, { "epoch": 0.1243456612489469, "grad_norm": 180.91871643066406, "learning_rate": 1.953684293983334e-06, "loss": 20.0, "step": 13136 }, { "epoch": 0.12435512727066196, "grad_norm": 536.62548828125, "learning_rate": 1.9536750712141226e-06, "loss": 59.8906, "step": 13137 }, { "epoch": 0.12436459329237701, "grad_norm": 221.06919860839844, "learning_rate": 1.953665847548519e-06, "loss": 16.7812, "step": 13138 }, { "epoch": 0.12437405931409207, "grad_norm": 510.7951354980469, "learning_rate": 1.9536566229865312e-06, "loss": 64.0078, "step": 13139 }, { "epoch": 0.12438352533580713, "grad_norm": 480.8776550292969, "learning_rate": 1.9536473975281688e-06, "loss": 24.6367, "step": 13140 }, { "epoch": 0.12439299135752217, "grad_norm": 452.4254150390625, "learning_rate": 1.9536381711734396e-06, "loss": 14.4883, "step": 13141 }, { "epoch": 0.12440245737923723, "grad_norm": 203.0320587158203, "learning_rate": 1.9536289439223523e-06, "loss": 18.6016, "step": 13142 }, { "epoch": 0.12441192340095228, "grad_norm": 231.77450561523438, "learning_rate": 1.953619715774916e-06, "loss": 14.3828, "step": 13143 }, { "epoch": 0.12442138942266734, "grad_norm": 451.0575866699219, "learning_rate": 1.9536104867311393e-06, "loss": 31.1094, "step": 13144 }, { "epoch": 0.12443085544438238, "grad_norm": 557.9024047851562, "learning_rate": 1.9536012567910306e-06, "loss": 22.5625, "step": 13145 }, { "epoch": 0.12444032146609744, "grad_norm": 415.41339111328125, "learning_rate": 1.953592025954599e-06, "loss": 38.4297, "step": 13146 }, { "epoch": 0.12444978748781249, "grad_norm": 718.8209228515625, "learning_rate": 1.9535827942218526e-06, "loss": 28.0703, "step": 13147 }, { "epoch": 0.12445925350952755, "grad_norm": 453.6223449707031, "learning_rate": 1.9535735615928006e-06, "loss": 34.3281, "step": 13148 }, { "epoch": 0.12446871953124261, "grad_norm": 822.9364013671875, "learning_rate": 1.9535643280674514e-06, "loss": 34.8516, "step": 13149 }, { "epoch": 0.12447818555295766, "grad_norm": 2.9270713329315186, "learning_rate": 1.9535550936458138e-06, "loss": 0.8555, "step": 13150 }, { "epoch": 0.12448765157467272, "grad_norm": 448.3418884277344, "learning_rate": 1.9535458583278963e-06, "loss": 34.4141, "step": 13151 }, { "epoch": 0.12449711759638776, "grad_norm": 310.74713134765625, "learning_rate": 1.9535366221137083e-06, "loss": 17.8594, "step": 13152 }, { "epoch": 0.12450658361810282, "grad_norm": 267.63763427734375, "learning_rate": 1.9535273850032574e-06, "loss": 19.8516, "step": 13153 }, { "epoch": 0.12451604963981787, "grad_norm": 564.1155395507812, "learning_rate": 1.953518146996553e-06, "loss": 61.4531, "step": 13154 }, { "epoch": 0.12452551566153293, "grad_norm": 401.83380126953125, "learning_rate": 1.9535089080936032e-06, "loss": 20.9297, "step": 13155 }, { "epoch": 0.12453498168324798, "grad_norm": 3.130861759185791, "learning_rate": 1.9534996682944173e-06, "loss": 0.9634, "step": 13156 }, { "epoch": 0.12454444770496303, "grad_norm": 347.2401428222656, "learning_rate": 1.953490427599004e-06, "loss": 21.5312, "step": 13157 }, { "epoch": 0.1245539137266781, "grad_norm": 356.8335876464844, "learning_rate": 1.953481186007371e-06, "loss": 33.8594, "step": 13158 }, { "epoch": 0.12456337974839314, "grad_norm": 355.0369567871094, "learning_rate": 1.9534719435195285e-06, "loss": 34.1445, "step": 13159 }, { "epoch": 0.1245728457701082, "grad_norm": 2.941953182220459, "learning_rate": 1.953462700135484e-06, "loss": 0.7041, "step": 13160 }, { "epoch": 0.12458231179182325, "grad_norm": 310.69134521484375, "learning_rate": 1.953453455855247e-06, "loss": 29.4062, "step": 13161 }, { "epoch": 0.12459177781353831, "grad_norm": 277.0561218261719, "learning_rate": 1.953444210678825e-06, "loss": 22.1094, "step": 13162 }, { "epoch": 0.12460124383525335, "grad_norm": 381.04534912109375, "learning_rate": 1.9534349646062283e-06, "loss": 18.7734, "step": 13163 }, { "epoch": 0.12461070985696841, "grad_norm": 519.9454345703125, "learning_rate": 1.953425717637464e-06, "loss": 42.3438, "step": 13164 }, { "epoch": 0.12462017587868346, "grad_norm": 311.6893615722656, "learning_rate": 1.9534164697725425e-06, "loss": 23.7617, "step": 13165 }, { "epoch": 0.12462964190039852, "grad_norm": 199.52073669433594, "learning_rate": 1.9534072210114706e-06, "loss": 23.375, "step": 13166 }, { "epoch": 0.12463910792211358, "grad_norm": 219.6476287841797, "learning_rate": 1.9533979713542586e-06, "loss": 15.0078, "step": 13167 }, { "epoch": 0.12464857394382862, "grad_norm": 565.2183227539062, "learning_rate": 1.9533887208009145e-06, "loss": 39.8125, "step": 13168 }, { "epoch": 0.12465803996554368, "grad_norm": 166.05911254882812, "learning_rate": 1.9533794693514464e-06, "loss": 13.543, "step": 13169 }, { "epoch": 0.12466750598725873, "grad_norm": 427.6209411621094, "learning_rate": 1.953370217005864e-06, "loss": 24.4688, "step": 13170 }, { "epoch": 0.12467697200897379, "grad_norm": 434.6620788574219, "learning_rate": 1.9533609637641755e-06, "loss": 22.6641, "step": 13171 }, { "epoch": 0.12468643803068884, "grad_norm": 186.1191864013672, "learning_rate": 1.95335170962639e-06, "loss": 23.2734, "step": 13172 }, { "epoch": 0.1246959040524039, "grad_norm": 571.7578125, "learning_rate": 1.9533424545925157e-06, "loss": 35.25, "step": 13173 }, { "epoch": 0.12470537007411896, "grad_norm": 995.1732788085938, "learning_rate": 1.953333198662562e-06, "loss": 50.3125, "step": 13174 }, { "epoch": 0.124714836095834, "grad_norm": 341.9959411621094, "learning_rate": 1.9533239418365364e-06, "loss": 22.125, "step": 13175 }, { "epoch": 0.12472430211754906, "grad_norm": 452.4346923828125, "learning_rate": 1.9533146841144485e-06, "loss": 25.2656, "step": 13176 }, { "epoch": 0.12473376813926411, "grad_norm": 457.28985595703125, "learning_rate": 1.9533054254963068e-06, "loss": 44.7188, "step": 13177 }, { "epoch": 0.12474323416097917, "grad_norm": 354.29058837890625, "learning_rate": 1.9532961659821203e-06, "loss": 42.75, "step": 13178 }, { "epoch": 0.12475270018269422, "grad_norm": 2.9133503437042236, "learning_rate": 1.9532869055718974e-06, "loss": 0.9468, "step": 13179 }, { "epoch": 0.12476216620440927, "grad_norm": 1407.248779296875, "learning_rate": 1.9532776442656463e-06, "loss": 17.1562, "step": 13180 }, { "epoch": 0.12477163222612432, "grad_norm": 439.2509460449219, "learning_rate": 1.9532683820633766e-06, "loss": 32.8281, "step": 13181 }, { "epoch": 0.12478109824783938, "grad_norm": 357.13507080078125, "learning_rate": 1.9532591189650968e-06, "loss": 33.7109, "step": 13182 }, { "epoch": 0.12479056426955444, "grad_norm": 430.0354309082031, "learning_rate": 1.9532498549708154e-06, "loss": 22.3984, "step": 13183 }, { "epoch": 0.12480003029126949, "grad_norm": 861.280029296875, "learning_rate": 1.953240590080541e-06, "loss": 22.7578, "step": 13184 }, { "epoch": 0.12480949631298455, "grad_norm": 374.7713928222656, "learning_rate": 1.953231324294282e-06, "loss": 21.1172, "step": 13185 }, { "epoch": 0.12481896233469959, "grad_norm": 262.70770263671875, "learning_rate": 1.9532220576120483e-06, "loss": 8.6133, "step": 13186 }, { "epoch": 0.12482842835641465, "grad_norm": 555.9517822265625, "learning_rate": 1.9532127900338477e-06, "loss": 16.627, "step": 13187 }, { "epoch": 0.1248378943781297, "grad_norm": 593.6358032226562, "learning_rate": 1.953203521559689e-06, "loss": 17.0703, "step": 13188 }, { "epoch": 0.12484736039984476, "grad_norm": 560.9789428710938, "learning_rate": 1.953194252189581e-06, "loss": 46.0, "step": 13189 }, { "epoch": 0.1248568264215598, "grad_norm": 309.37506103515625, "learning_rate": 1.9531849819235326e-06, "loss": 31.2812, "step": 13190 }, { "epoch": 0.12486629244327486, "grad_norm": 297.4377136230469, "learning_rate": 1.953175710761552e-06, "loss": 19.9766, "step": 13191 }, { "epoch": 0.12487575846498992, "grad_norm": 1354.6513671875, "learning_rate": 1.9531664387036483e-06, "loss": 33.0586, "step": 13192 }, { "epoch": 0.12488522448670497, "grad_norm": 239.4543914794922, "learning_rate": 1.95315716574983e-06, "loss": 18.7578, "step": 13193 }, { "epoch": 0.12489469050842003, "grad_norm": 285.770751953125, "learning_rate": 1.9531478919001065e-06, "loss": 17.5312, "step": 13194 }, { "epoch": 0.12490415653013508, "grad_norm": 322.55169677734375, "learning_rate": 1.953138617154486e-06, "loss": 25.4922, "step": 13195 }, { "epoch": 0.12491362255185014, "grad_norm": 275.6960754394531, "learning_rate": 1.9531293415129767e-06, "loss": 16.9375, "step": 13196 }, { "epoch": 0.12492308857356518, "grad_norm": 672.6968383789062, "learning_rate": 1.9531200649755875e-06, "loss": 26.8984, "step": 13197 }, { "epoch": 0.12493255459528024, "grad_norm": 446.4208984375, "learning_rate": 1.9531107875423283e-06, "loss": 28.0547, "step": 13198 }, { "epoch": 0.12494202061699529, "grad_norm": 480.544189453125, "learning_rate": 1.9531015092132065e-06, "loss": 31.6172, "step": 13199 }, { "epoch": 0.12495148663871035, "grad_norm": 209.5228271484375, "learning_rate": 1.9530922299882313e-06, "loss": 16.125, "step": 13200 }, { "epoch": 0.12496095266042541, "grad_norm": 3.470914125442505, "learning_rate": 1.9530829498674116e-06, "loss": 1.1973, "step": 13201 }, { "epoch": 0.12497041868214046, "grad_norm": 3.2747113704681396, "learning_rate": 1.953073668850756e-06, "loss": 0.8635, "step": 13202 }, { "epoch": 0.12497988470385551, "grad_norm": 387.27508544921875, "learning_rate": 1.953064386938273e-06, "loss": 8.3945, "step": 13203 }, { "epoch": 0.12498935072557056, "grad_norm": 178.3929901123047, "learning_rate": 1.9530551041299715e-06, "loss": 15.1719, "step": 13204 }, { "epoch": 0.12499881674728562, "grad_norm": 293.6584167480469, "learning_rate": 1.95304582042586e-06, "loss": 20.2578, "step": 13205 }, { "epoch": 0.12500828276900067, "grad_norm": 564.47216796875, "learning_rate": 1.9530365358259476e-06, "loss": 39.2656, "step": 13206 }, { "epoch": 0.12501774879071573, "grad_norm": 195.0304412841797, "learning_rate": 1.953027250330243e-06, "loss": 23.3359, "step": 13207 }, { "epoch": 0.1250272148124308, "grad_norm": 310.2759094238281, "learning_rate": 1.9530179639387546e-06, "loss": 21.0391, "step": 13208 }, { "epoch": 0.12503668083414585, "grad_norm": 450.90380859375, "learning_rate": 1.9530086766514915e-06, "loss": 38.5625, "step": 13209 }, { "epoch": 0.12504614685586088, "grad_norm": 363.09210205078125, "learning_rate": 1.952999388468462e-06, "loss": 25.8281, "step": 13210 }, { "epoch": 0.12505561287757594, "grad_norm": 334.7377014160156, "learning_rate": 1.9529900993896753e-06, "loss": 48.3594, "step": 13211 }, { "epoch": 0.125065078899291, "grad_norm": 171.18077087402344, "learning_rate": 1.9529808094151397e-06, "loss": 19.7852, "step": 13212 }, { "epoch": 0.12507454492100606, "grad_norm": 2.8484442234039307, "learning_rate": 1.952971518544864e-06, "loss": 0.9966, "step": 13213 }, { "epoch": 0.1250840109427211, "grad_norm": 186.64215087890625, "learning_rate": 1.9529622267788576e-06, "loss": 17.0078, "step": 13214 }, { "epoch": 0.12509347696443615, "grad_norm": 245.75340270996094, "learning_rate": 1.952952934117128e-06, "loss": 18.3281, "step": 13215 }, { "epoch": 0.1251029429861512, "grad_norm": 367.38629150390625, "learning_rate": 1.952943640559685e-06, "loss": 42.8594, "step": 13216 }, { "epoch": 0.12511240900786627, "grad_norm": 400.872802734375, "learning_rate": 1.952934346106537e-06, "loss": 38.0312, "step": 13217 }, { "epoch": 0.12512187502958133, "grad_norm": 405.500244140625, "learning_rate": 1.952925050757693e-06, "loss": 32.3594, "step": 13218 }, { "epoch": 0.12513134105129636, "grad_norm": 644.8778686523438, "learning_rate": 1.952915754513161e-06, "loss": 24.1016, "step": 13219 }, { "epoch": 0.12514080707301142, "grad_norm": 983.4671630859375, "learning_rate": 1.9529064573729503e-06, "loss": 66.1719, "step": 13220 }, { "epoch": 0.12515027309472648, "grad_norm": 1231.219482421875, "learning_rate": 1.95289715933707e-06, "loss": 80.8125, "step": 13221 }, { "epoch": 0.12515973911644154, "grad_norm": 287.96124267578125, "learning_rate": 1.9528878604055276e-06, "loss": 18.0547, "step": 13222 }, { "epoch": 0.12516920513815658, "grad_norm": 180.53236389160156, "learning_rate": 1.9528785605783326e-06, "loss": 19.2891, "step": 13223 }, { "epoch": 0.12517867115987164, "grad_norm": 301.969970703125, "learning_rate": 1.952869259855494e-06, "loss": 21.9688, "step": 13224 }, { "epoch": 0.1251881371815867, "grad_norm": 835.1847534179688, "learning_rate": 1.9528599582370208e-06, "loss": 27.1445, "step": 13225 }, { "epoch": 0.12519760320330175, "grad_norm": 730.84716796875, "learning_rate": 1.9528506557229204e-06, "loss": 16.6406, "step": 13226 }, { "epoch": 0.12520706922501681, "grad_norm": 849.4359130859375, "learning_rate": 1.952841352313203e-06, "loss": 57.7188, "step": 13227 }, { "epoch": 0.12521653524673185, "grad_norm": 181.75157165527344, "learning_rate": 1.952832048007876e-06, "loss": 17.2031, "step": 13228 }, { "epoch": 0.1252260012684469, "grad_norm": 495.04486083984375, "learning_rate": 1.9528227428069495e-06, "loss": 35.5781, "step": 13229 }, { "epoch": 0.12523546729016197, "grad_norm": 220.57504272460938, "learning_rate": 1.9528134367104317e-06, "loss": 26.3711, "step": 13230 }, { "epoch": 0.12524493331187703, "grad_norm": 3.7046878337860107, "learning_rate": 1.9528041297183308e-06, "loss": 1.0625, "step": 13231 }, { "epoch": 0.12525439933359206, "grad_norm": 862.0830688476562, "learning_rate": 1.9527948218306563e-06, "loss": 61.2188, "step": 13232 }, { "epoch": 0.12526386535530712, "grad_norm": 209.0033416748047, "learning_rate": 1.9527855130474165e-06, "loss": 15.0312, "step": 13233 }, { "epoch": 0.12527333137702218, "grad_norm": 167.66302490234375, "learning_rate": 1.95277620336862e-06, "loss": 18.0781, "step": 13234 }, { "epoch": 0.12528279739873724, "grad_norm": 303.3922424316406, "learning_rate": 1.9527668927942763e-06, "loss": 17.6016, "step": 13235 }, { "epoch": 0.1252922634204523, "grad_norm": 443.21124267578125, "learning_rate": 1.9527575813243936e-06, "loss": 19.0, "step": 13236 }, { "epoch": 0.12530172944216733, "grad_norm": 228.22808837890625, "learning_rate": 1.9527482689589808e-06, "loss": 17.7891, "step": 13237 }, { "epoch": 0.1253111954638824, "grad_norm": 472.8309020996094, "learning_rate": 1.9527389556980465e-06, "loss": 45.6562, "step": 13238 }, { "epoch": 0.12532066148559745, "grad_norm": 204.62933349609375, "learning_rate": 1.9527296415415994e-06, "loss": 29.7969, "step": 13239 }, { "epoch": 0.1253301275073125, "grad_norm": 3.2004945278167725, "learning_rate": 1.9527203264896488e-06, "loss": 1.0107, "step": 13240 }, { "epoch": 0.12533959352902754, "grad_norm": 387.3117370605469, "learning_rate": 1.952711010542203e-06, "loss": 24.5625, "step": 13241 }, { "epoch": 0.1253490595507426, "grad_norm": 460.4173583984375, "learning_rate": 1.9527016936992707e-06, "loss": 33.1953, "step": 13242 }, { "epoch": 0.12535852557245766, "grad_norm": 992.9196166992188, "learning_rate": 1.9526923759608606e-06, "loss": 45.3594, "step": 13243 }, { "epoch": 0.12536799159417272, "grad_norm": 271.3359069824219, "learning_rate": 1.952683057326982e-06, "loss": 16.0352, "step": 13244 }, { "epoch": 0.12537745761588778, "grad_norm": 494.3929443359375, "learning_rate": 1.952673737797643e-06, "loss": 45.0938, "step": 13245 }, { "epoch": 0.12538692363760282, "grad_norm": 323.263427734375, "learning_rate": 1.952664417372853e-06, "loss": 15.6172, "step": 13246 }, { "epoch": 0.12539638965931788, "grad_norm": 562.8699340820312, "learning_rate": 1.95265509605262e-06, "loss": 46.75, "step": 13247 }, { "epoch": 0.12540585568103294, "grad_norm": 631.8035278320312, "learning_rate": 1.9526457738369533e-06, "loss": 56.6172, "step": 13248 }, { "epoch": 0.125415321702748, "grad_norm": 700.7542114257812, "learning_rate": 1.952636450725862e-06, "loss": 32.7344, "step": 13249 }, { "epoch": 0.12542478772446303, "grad_norm": 367.641845703125, "learning_rate": 1.952627126719354e-06, "loss": 41.625, "step": 13250 }, { "epoch": 0.1254342537461781, "grad_norm": 568.1749267578125, "learning_rate": 1.9526178018174387e-06, "loss": 14.9805, "step": 13251 }, { "epoch": 0.12544371976789315, "grad_norm": 556.494873046875, "learning_rate": 1.9526084760201242e-06, "loss": 34.2656, "step": 13252 }, { "epoch": 0.1254531857896082, "grad_norm": 3.4260456562042236, "learning_rate": 1.9525991493274197e-06, "loss": 1.0464, "step": 13253 }, { "epoch": 0.12546265181132327, "grad_norm": 725.4529418945312, "learning_rate": 1.9525898217393344e-06, "loss": 17.2891, "step": 13254 }, { "epoch": 0.1254721178330383, "grad_norm": 737.802978515625, "learning_rate": 1.952580493255877e-06, "loss": 44.0938, "step": 13255 }, { "epoch": 0.12548158385475336, "grad_norm": 648.1580810546875, "learning_rate": 1.952571163877055e-06, "loss": 42.9531, "step": 13256 }, { "epoch": 0.12549104987646842, "grad_norm": 272.7605895996094, "learning_rate": 1.9525618336028783e-06, "loss": 22.125, "step": 13257 }, { "epoch": 0.12550051589818348, "grad_norm": 288.2515869140625, "learning_rate": 1.9525525024333556e-06, "loss": 15.4375, "step": 13258 }, { "epoch": 0.1255099819198985, "grad_norm": 3.5435352325439453, "learning_rate": 1.952543170368496e-06, "loss": 0.9263, "step": 13259 }, { "epoch": 0.12551944794161357, "grad_norm": 252.89366149902344, "learning_rate": 1.9525338374083073e-06, "loss": 17.5938, "step": 13260 }, { "epoch": 0.12552891396332863, "grad_norm": 605.7354736328125, "learning_rate": 1.9525245035527983e-06, "loss": 15.3242, "step": 13261 }, { "epoch": 0.1255383799850437, "grad_norm": 565.1336669921875, "learning_rate": 1.952515168801979e-06, "loss": 25.9453, "step": 13262 }, { "epoch": 0.12554784600675875, "grad_norm": 297.6282043457031, "learning_rate": 1.952505833155857e-06, "loss": 18.1094, "step": 13263 }, { "epoch": 0.12555731202847378, "grad_norm": 508.16650390625, "learning_rate": 1.952496496614442e-06, "loss": 19.3906, "step": 13264 }, { "epoch": 0.12556677805018884, "grad_norm": 359.8111877441406, "learning_rate": 1.9524871591777414e-06, "loss": 21.6641, "step": 13265 }, { "epoch": 0.1255762440719039, "grad_norm": 480.2612609863281, "learning_rate": 1.9524778208457653e-06, "loss": 20.4062, "step": 13266 }, { "epoch": 0.12558571009361896, "grad_norm": 567.8670043945312, "learning_rate": 1.952468481618522e-06, "loss": 32.5508, "step": 13267 }, { "epoch": 0.125595176115334, "grad_norm": 517.4393310546875, "learning_rate": 1.9524591414960202e-06, "loss": 29.75, "step": 13268 }, { "epoch": 0.12560464213704906, "grad_norm": 363.524658203125, "learning_rate": 1.952449800478269e-06, "loss": 19.7227, "step": 13269 }, { "epoch": 0.12561410815876412, "grad_norm": 568.1856689453125, "learning_rate": 1.952440458565277e-06, "loss": 18.8359, "step": 13270 }, { "epoch": 0.12562357418047918, "grad_norm": 223.76426696777344, "learning_rate": 1.9524311157570527e-06, "loss": 10.7188, "step": 13271 }, { "epoch": 0.12563304020219423, "grad_norm": 904.0764770507812, "learning_rate": 1.9524217720536053e-06, "loss": 47.5781, "step": 13272 }, { "epoch": 0.12564250622390927, "grad_norm": 217.19467163085938, "learning_rate": 1.952412427454943e-06, "loss": 13.7695, "step": 13273 }, { "epoch": 0.12565197224562433, "grad_norm": 266.0425109863281, "learning_rate": 1.952403081961075e-06, "loss": 30.375, "step": 13274 }, { "epoch": 0.1256614382673394, "grad_norm": 570.1168212890625, "learning_rate": 1.9523937355720106e-06, "loss": 18.4844, "step": 13275 }, { "epoch": 0.12567090428905445, "grad_norm": 342.38433837890625, "learning_rate": 1.9523843882877577e-06, "loss": 41.4844, "step": 13276 }, { "epoch": 0.12568037031076948, "grad_norm": 733.8587036132812, "learning_rate": 1.9523750401083256e-06, "loss": 49.7812, "step": 13277 }, { "epoch": 0.12568983633248454, "grad_norm": 375.4632873535156, "learning_rate": 1.9523656910337224e-06, "loss": 17.6719, "step": 13278 }, { "epoch": 0.1256993023541996, "grad_norm": 460.0033874511719, "learning_rate": 1.952356341063958e-06, "loss": 62.5, "step": 13279 }, { "epoch": 0.12570876837591466, "grad_norm": 457.8906555175781, "learning_rate": 1.9523469901990404e-06, "loss": 29.0938, "step": 13280 }, { "epoch": 0.12571823439762972, "grad_norm": 241.46829223632812, "learning_rate": 1.9523376384389785e-06, "loss": 21.9375, "step": 13281 }, { "epoch": 0.12572770041934475, "grad_norm": 218.89822387695312, "learning_rate": 1.952328285783781e-06, "loss": 10.332, "step": 13282 }, { "epoch": 0.1257371664410598, "grad_norm": 486.468017578125, "learning_rate": 1.952318932233457e-06, "loss": 41.6094, "step": 13283 }, { "epoch": 0.12574663246277487, "grad_norm": 161.72080993652344, "learning_rate": 1.9523095777880158e-06, "loss": 19.9844, "step": 13284 }, { "epoch": 0.12575609848448993, "grad_norm": 649.1815795898438, "learning_rate": 1.9523002224474647e-06, "loss": 24.2734, "step": 13285 }, { "epoch": 0.125765564506205, "grad_norm": 273.2587585449219, "learning_rate": 1.9522908662118138e-06, "loss": 27.8281, "step": 13286 }, { "epoch": 0.12577503052792002, "grad_norm": 292.28741455078125, "learning_rate": 1.9522815090810713e-06, "loss": 10.3672, "step": 13287 }, { "epoch": 0.12578449654963508, "grad_norm": 217.14840698242188, "learning_rate": 1.952272151055246e-06, "loss": 20.7188, "step": 13288 }, { "epoch": 0.12579396257135014, "grad_norm": 441.19140625, "learning_rate": 1.9522627921343467e-06, "loss": 36.6406, "step": 13289 }, { "epoch": 0.1258034285930652, "grad_norm": 289.10833740234375, "learning_rate": 1.9522534323183827e-06, "loss": 15.8203, "step": 13290 }, { "epoch": 0.12581289461478024, "grad_norm": 212.0904541015625, "learning_rate": 1.9522440716073624e-06, "loss": 22.5, "step": 13291 }, { "epoch": 0.1258223606364953, "grad_norm": 464.1608581542969, "learning_rate": 1.9522347100012943e-06, "loss": 35.5625, "step": 13292 }, { "epoch": 0.12583182665821036, "grad_norm": 370.44012451171875, "learning_rate": 1.952225347500188e-06, "loss": 44.3984, "step": 13293 }, { "epoch": 0.12584129267992542, "grad_norm": 233.53030395507812, "learning_rate": 1.9522159841040513e-06, "loss": 21.4609, "step": 13294 }, { "epoch": 0.12585075870164047, "grad_norm": 362.10498046875, "learning_rate": 1.952206619812894e-06, "loss": 32.8359, "step": 13295 }, { "epoch": 0.1258602247233555, "grad_norm": 332.592529296875, "learning_rate": 1.952197254626724e-06, "loss": 35.1016, "step": 13296 }, { "epoch": 0.12586969074507057, "grad_norm": 468.2342834472656, "learning_rate": 1.952187888545551e-06, "loss": 27.0781, "step": 13297 }, { "epoch": 0.12587915676678563, "grad_norm": 3.282233715057373, "learning_rate": 1.9521785215693828e-06, "loss": 0.9531, "step": 13298 }, { "epoch": 0.1258886227885007, "grad_norm": 875.9244384765625, "learning_rate": 1.9521691536982287e-06, "loss": 57.5312, "step": 13299 }, { "epoch": 0.12589808881021572, "grad_norm": 569.4166259765625, "learning_rate": 1.952159784932098e-06, "loss": 41.2188, "step": 13300 }, { "epoch": 0.12590755483193078, "grad_norm": 251.54425048828125, "learning_rate": 1.952150415270999e-06, "loss": 21.4414, "step": 13301 }, { "epoch": 0.12591702085364584, "grad_norm": 657.74267578125, "learning_rate": 1.9521410447149404e-06, "loss": 38.8906, "step": 13302 }, { "epoch": 0.1259264868753609, "grad_norm": 878.3411865234375, "learning_rate": 1.952131673263931e-06, "loss": 68.6406, "step": 13303 }, { "epoch": 0.12593595289707596, "grad_norm": 3.410975933074951, "learning_rate": 1.95212230091798e-06, "loss": 0.9141, "step": 13304 }, { "epoch": 0.125945418918791, "grad_norm": 3.7468132972717285, "learning_rate": 1.952112927677096e-06, "loss": 1.1133, "step": 13305 }, { "epoch": 0.12595488494050605, "grad_norm": 545.3897705078125, "learning_rate": 1.9521035535412875e-06, "loss": 8.6055, "step": 13306 }, { "epoch": 0.1259643509622211, "grad_norm": 446.0579528808594, "learning_rate": 1.9520941785105637e-06, "loss": 37.4609, "step": 13307 }, { "epoch": 0.12597381698393617, "grad_norm": 409.2267761230469, "learning_rate": 1.952084802584934e-06, "loss": 57.4062, "step": 13308 }, { "epoch": 0.1259832830056512, "grad_norm": 339.2450866699219, "learning_rate": 1.9520754257644058e-06, "loss": 24.375, "step": 13309 }, { "epoch": 0.12599274902736626, "grad_norm": 424.84552001953125, "learning_rate": 1.9520660480489886e-06, "loss": 26.5859, "step": 13310 }, { "epoch": 0.12600221504908132, "grad_norm": 359.7779846191406, "learning_rate": 1.9520566694386912e-06, "loss": 37.4844, "step": 13311 }, { "epoch": 0.12601168107079638, "grad_norm": 896.7194213867188, "learning_rate": 1.9520472899335227e-06, "loss": 45.2695, "step": 13312 }, { "epoch": 0.12602114709251144, "grad_norm": 422.99725341796875, "learning_rate": 1.9520379095334917e-06, "loss": 20.2734, "step": 13313 }, { "epoch": 0.12603061311422648, "grad_norm": 305.79986572265625, "learning_rate": 1.952028528238607e-06, "loss": 24.4375, "step": 13314 }, { "epoch": 0.12604007913594154, "grad_norm": 349.8025207519531, "learning_rate": 1.952019146048877e-06, "loss": 22.0312, "step": 13315 }, { "epoch": 0.1260495451576566, "grad_norm": 389.9148254394531, "learning_rate": 1.9520097629643116e-06, "loss": 16.8125, "step": 13316 }, { "epoch": 0.12605901117937166, "grad_norm": 310.0596618652344, "learning_rate": 1.952000378984919e-06, "loss": 32.9531, "step": 13317 }, { "epoch": 0.1260684772010867, "grad_norm": 200.6229705810547, "learning_rate": 1.9519909941107075e-06, "loss": 9.4102, "step": 13318 }, { "epoch": 0.12607794322280175, "grad_norm": 878.3117065429688, "learning_rate": 1.9519816083416866e-06, "loss": 31.3672, "step": 13319 }, { "epoch": 0.1260874092445168, "grad_norm": 449.05242919921875, "learning_rate": 1.951972221677865e-06, "loss": 45.8125, "step": 13320 }, { "epoch": 0.12609687526623187, "grad_norm": 420.2989196777344, "learning_rate": 1.951962834119251e-06, "loss": 53.5, "step": 13321 }, { "epoch": 0.12610634128794693, "grad_norm": 262.5774841308594, "learning_rate": 1.951953445665854e-06, "loss": 14.0586, "step": 13322 }, { "epoch": 0.12611580730966196, "grad_norm": 3.17242693901062, "learning_rate": 1.951944056317683e-06, "loss": 0.8926, "step": 13323 }, { "epoch": 0.12612527333137702, "grad_norm": 433.0061950683594, "learning_rate": 1.9519346660747465e-06, "loss": 49.8281, "step": 13324 }, { "epoch": 0.12613473935309208, "grad_norm": 219.4008026123047, "learning_rate": 1.9519252749370534e-06, "loss": 29.5391, "step": 13325 }, { "epoch": 0.12614420537480714, "grad_norm": 428.17291259765625, "learning_rate": 1.9519158829046123e-06, "loss": 47.8984, "step": 13326 }, { "epoch": 0.12615367139652217, "grad_norm": 484.4280090332031, "learning_rate": 1.951906489977432e-06, "loss": 29.2109, "step": 13327 }, { "epoch": 0.12616313741823723, "grad_norm": 340.69158935546875, "learning_rate": 1.9518970961555216e-06, "loss": 21.7734, "step": 13328 }, { "epoch": 0.1261726034399523, "grad_norm": 585.2223510742188, "learning_rate": 1.9518877014388898e-06, "loss": 32.6953, "step": 13329 }, { "epoch": 0.12618206946166735, "grad_norm": 372.00482177734375, "learning_rate": 1.9518783058275455e-06, "loss": 19.6641, "step": 13330 }, { "epoch": 0.1261915354833824, "grad_norm": 3.1769609451293945, "learning_rate": 1.951868909321498e-06, "loss": 0.8889, "step": 13331 }, { "epoch": 0.12620100150509744, "grad_norm": 386.7494812011719, "learning_rate": 1.9518595119207554e-06, "loss": 28.2188, "step": 13332 }, { "epoch": 0.1262104675268125, "grad_norm": 774.1046752929688, "learning_rate": 1.9518501136253264e-06, "loss": 39.9219, "step": 13333 }, { "epoch": 0.12621993354852756, "grad_norm": 352.9222717285156, "learning_rate": 1.9518407144352205e-06, "loss": 15.5859, "step": 13334 }, { "epoch": 0.12622939957024262, "grad_norm": 294.5710754394531, "learning_rate": 1.9518313143504463e-06, "loss": 10.3398, "step": 13335 }, { "epoch": 0.12623886559195766, "grad_norm": 348.34466552734375, "learning_rate": 1.9518219133710126e-06, "loss": 17.7266, "step": 13336 }, { "epoch": 0.12624833161367272, "grad_norm": 326.5757751464844, "learning_rate": 1.9518125114969282e-06, "loss": 8.9531, "step": 13337 }, { "epoch": 0.12625779763538778, "grad_norm": 534.9884033203125, "learning_rate": 1.9518031087282018e-06, "loss": 41.3906, "step": 13338 }, { "epoch": 0.12626726365710284, "grad_norm": 651.5226440429688, "learning_rate": 1.951793705064843e-06, "loss": 41.9844, "step": 13339 }, { "epoch": 0.1262767296788179, "grad_norm": 534.2345581054688, "learning_rate": 1.9517843005068593e-06, "loss": 43.5078, "step": 13340 }, { "epoch": 0.12628619570053293, "grad_norm": 508.1007995605469, "learning_rate": 1.9517748950542603e-06, "loss": 46.8516, "step": 13341 }, { "epoch": 0.126295661722248, "grad_norm": 264.42730712890625, "learning_rate": 1.951765488707055e-06, "loss": 18.5312, "step": 13342 }, { "epoch": 0.12630512774396305, "grad_norm": 266.14459228515625, "learning_rate": 1.9517560814652522e-06, "loss": 25.4062, "step": 13343 }, { "epoch": 0.1263145937656781, "grad_norm": 487.6197814941406, "learning_rate": 1.9517466733288606e-06, "loss": 33.0234, "step": 13344 }, { "epoch": 0.12632405978739314, "grad_norm": 3.205129384994507, "learning_rate": 1.9517372642978887e-06, "loss": 0.8643, "step": 13345 }, { "epoch": 0.1263335258091082, "grad_norm": 382.375, "learning_rate": 1.9517278543723463e-06, "loss": 32.9375, "step": 13346 }, { "epoch": 0.12634299183082326, "grad_norm": 422.7896423339844, "learning_rate": 1.9517184435522413e-06, "loss": 40.8125, "step": 13347 }, { "epoch": 0.12635245785253832, "grad_norm": 377.9723815917969, "learning_rate": 1.9517090318375827e-06, "loss": 20.6016, "step": 13348 }, { "epoch": 0.12636192387425338, "grad_norm": 363.46612548828125, "learning_rate": 1.95169961922838e-06, "loss": 31.1562, "step": 13349 }, { "epoch": 0.1263713898959684, "grad_norm": 754.9600219726562, "learning_rate": 1.9516902057246407e-06, "loss": 25.6367, "step": 13350 }, { "epoch": 0.12638085591768347, "grad_norm": 488.4947814941406, "learning_rate": 1.951680791326375e-06, "loss": 61.3047, "step": 13351 }, { "epoch": 0.12639032193939853, "grad_norm": 4.382075786590576, "learning_rate": 1.9516713760335914e-06, "loss": 0.9065, "step": 13352 }, { "epoch": 0.1263997879611136, "grad_norm": 207.63162231445312, "learning_rate": 1.9516619598462986e-06, "loss": 18.3672, "step": 13353 }, { "epoch": 0.12640925398282862, "grad_norm": 1005.9324951171875, "learning_rate": 1.9516525427645056e-06, "loss": 44.3281, "step": 13354 }, { "epoch": 0.12641872000454368, "grad_norm": 695.0509033203125, "learning_rate": 1.9516431247882207e-06, "loss": 31.2812, "step": 13355 }, { "epoch": 0.12642818602625874, "grad_norm": 756.3140258789062, "learning_rate": 1.9516337059174534e-06, "loss": 17.2148, "step": 13356 }, { "epoch": 0.1264376520479738, "grad_norm": 599.3627319335938, "learning_rate": 1.951624286152212e-06, "loss": 25.3906, "step": 13357 }, { "epoch": 0.12644711806968886, "grad_norm": 282.9970397949219, "learning_rate": 1.951614865492506e-06, "loss": 28.375, "step": 13358 }, { "epoch": 0.1264565840914039, "grad_norm": 294.9689025878906, "learning_rate": 1.951605443938344e-06, "loss": 16.2266, "step": 13359 }, { "epoch": 0.12646605011311896, "grad_norm": 197.6996307373047, "learning_rate": 1.951596021489735e-06, "loss": 23.625, "step": 13360 }, { "epoch": 0.12647551613483402, "grad_norm": 180.89208984375, "learning_rate": 1.9515865981466874e-06, "loss": 15.0938, "step": 13361 }, { "epoch": 0.12648498215654908, "grad_norm": 328.6690979003906, "learning_rate": 1.95157717390921e-06, "loss": 23.9609, "step": 13362 }, { "epoch": 0.1264944481782641, "grad_norm": 298.8408203125, "learning_rate": 1.9515677487773124e-06, "loss": 39.5469, "step": 13363 }, { "epoch": 0.12650391419997917, "grad_norm": 3.003593921661377, "learning_rate": 1.9515583227510026e-06, "loss": 0.9004, "step": 13364 }, { "epoch": 0.12651338022169423, "grad_norm": 865.0154418945312, "learning_rate": 1.95154889583029e-06, "loss": 85.8242, "step": 13365 }, { "epoch": 0.1265228462434093, "grad_norm": 292.0408020019531, "learning_rate": 1.951539468015184e-06, "loss": 22.1953, "step": 13366 }, { "epoch": 0.12653231226512435, "grad_norm": 1150.379150390625, "learning_rate": 1.9515300393056917e-06, "loss": 34.1094, "step": 13367 }, { "epoch": 0.12654177828683938, "grad_norm": 964.8262329101562, "learning_rate": 1.951520609701824e-06, "loss": 16.2305, "step": 13368 }, { "epoch": 0.12655124430855444, "grad_norm": 446.04620361328125, "learning_rate": 1.9515111792035885e-06, "loss": 46.0312, "step": 13369 }, { "epoch": 0.1265607103302695, "grad_norm": 396.1007995605469, "learning_rate": 1.951501747810994e-06, "loss": 9.375, "step": 13370 }, { "epoch": 0.12657017635198456, "grad_norm": 2.9439613819122314, "learning_rate": 1.9514923155240504e-06, "loss": 1.0103, "step": 13371 }, { "epoch": 0.12657964237369962, "grad_norm": 430.84954833984375, "learning_rate": 1.9514828823427652e-06, "loss": 24.3047, "step": 13372 }, { "epoch": 0.12658910839541465, "grad_norm": 1150.6832275390625, "learning_rate": 1.951473448267149e-06, "loss": 38.0703, "step": 13373 }, { "epoch": 0.1265985744171297, "grad_norm": 295.4119567871094, "learning_rate": 1.9514640132972087e-06, "loss": 12.6797, "step": 13374 }, { "epoch": 0.12660804043884477, "grad_norm": 202.11000061035156, "learning_rate": 1.9514545774329547e-06, "loss": 19.0781, "step": 13375 }, { "epoch": 0.12661750646055983, "grad_norm": 333.80206298828125, "learning_rate": 1.951445140674395e-06, "loss": 25.8203, "step": 13376 }, { "epoch": 0.12662697248227486, "grad_norm": 376.2691650390625, "learning_rate": 1.951435703021539e-06, "loss": 18.7891, "step": 13377 }, { "epoch": 0.12663643850398992, "grad_norm": 409.1540222167969, "learning_rate": 1.9514262644743953e-06, "loss": 36.1367, "step": 13378 }, { "epoch": 0.12664590452570498, "grad_norm": 312.83770751953125, "learning_rate": 1.9514168250329725e-06, "loss": 23.2578, "step": 13379 }, { "epoch": 0.12665537054742004, "grad_norm": 452.0498352050781, "learning_rate": 1.95140738469728e-06, "loss": 52.7969, "step": 13380 }, { "epoch": 0.1266648365691351, "grad_norm": 323.39434814453125, "learning_rate": 1.9513979434673266e-06, "loss": 20.1641, "step": 13381 }, { "epoch": 0.12667430259085014, "grad_norm": 237.63470458984375, "learning_rate": 1.9513885013431205e-06, "loss": 18.4922, "step": 13382 }, { "epoch": 0.1266837686125652, "grad_norm": 263.7359619140625, "learning_rate": 1.9513790583246712e-06, "loss": 17.9219, "step": 13383 }, { "epoch": 0.12669323463428026, "grad_norm": 337.0799560546875, "learning_rate": 1.951369614411988e-06, "loss": 44.1719, "step": 13384 }, { "epoch": 0.12670270065599532, "grad_norm": 440.5296936035156, "learning_rate": 1.951360169605079e-06, "loss": 33.8438, "step": 13385 }, { "epoch": 0.12671216667771035, "grad_norm": 184.5572052001953, "learning_rate": 1.9513507239039533e-06, "loss": 20.875, "step": 13386 }, { "epoch": 0.1267216326994254, "grad_norm": 732.8403930664062, "learning_rate": 1.95134127730862e-06, "loss": 22.3711, "step": 13387 }, { "epoch": 0.12673109872114047, "grad_norm": 3.4270551204681396, "learning_rate": 1.9513318298190872e-06, "loss": 1.1289, "step": 13388 }, { "epoch": 0.12674056474285553, "grad_norm": 447.01361083984375, "learning_rate": 1.951322381435365e-06, "loss": 37.1016, "step": 13389 }, { "epoch": 0.1267500307645706, "grad_norm": 834.6939086914062, "learning_rate": 1.9513129321574615e-06, "loss": 61.5391, "step": 13390 }, { "epoch": 0.12675949678628562, "grad_norm": 701.85498046875, "learning_rate": 1.9513034819853857e-06, "loss": 30.5703, "step": 13391 }, { "epoch": 0.12676896280800068, "grad_norm": 501.9645080566406, "learning_rate": 1.951294030919146e-06, "loss": 27.5078, "step": 13392 }, { "epoch": 0.12677842882971574, "grad_norm": 479.9990539550781, "learning_rate": 1.951284578958753e-06, "loss": 48.8594, "step": 13393 }, { "epoch": 0.1267878948514308, "grad_norm": 585.630126953125, "learning_rate": 1.951275126104213e-06, "loss": 53.3125, "step": 13394 }, { "epoch": 0.12679736087314583, "grad_norm": 303.25579833984375, "learning_rate": 1.9512656723555373e-06, "loss": 33.4766, "step": 13395 }, { "epoch": 0.1268068268948609, "grad_norm": 897.2579345703125, "learning_rate": 1.951256217712733e-06, "loss": 27.2188, "step": 13396 }, { "epoch": 0.12681629291657595, "grad_norm": 670.3174438476562, "learning_rate": 1.9512467621758102e-06, "loss": 40.5, "step": 13397 }, { "epoch": 0.126825758938291, "grad_norm": 209.9985809326172, "learning_rate": 1.951237305744777e-06, "loss": 8.6992, "step": 13398 }, { "epoch": 0.12683522496000607, "grad_norm": 385.1164245605469, "learning_rate": 1.9512278484196433e-06, "loss": 22.3594, "step": 13399 }, { "epoch": 0.1268446909817211, "grad_norm": 488.499267578125, "learning_rate": 1.9512183902004165e-06, "loss": 38.9336, "step": 13400 }, { "epoch": 0.12685415700343616, "grad_norm": 180.74627685546875, "learning_rate": 1.9512089310871067e-06, "loss": 22.1797, "step": 13401 }, { "epoch": 0.12686362302515122, "grad_norm": 823.2357788085938, "learning_rate": 1.9511994710797223e-06, "loss": 32.6797, "step": 13402 }, { "epoch": 0.12687308904686628, "grad_norm": 480.73052978515625, "learning_rate": 1.9511900101782725e-06, "loss": 42.2344, "step": 13403 }, { "epoch": 0.12688255506858132, "grad_norm": 630.1935424804688, "learning_rate": 1.9511805483827655e-06, "loss": 9.0156, "step": 13404 }, { "epoch": 0.12689202109029638, "grad_norm": 218.1253662109375, "learning_rate": 1.951171085693211e-06, "loss": 17.625, "step": 13405 }, { "epoch": 0.12690148711201144, "grad_norm": 610.4765014648438, "learning_rate": 1.9511616221096175e-06, "loss": 38.1875, "step": 13406 }, { "epoch": 0.1269109531337265, "grad_norm": 300.9467468261719, "learning_rate": 1.951152157631994e-06, "loss": 21.2109, "step": 13407 }, { "epoch": 0.12692041915544156, "grad_norm": 346.6846008300781, "learning_rate": 1.9511426922603492e-06, "loss": 30.8594, "step": 13408 }, { "epoch": 0.1269298851771566, "grad_norm": 4.109763145446777, "learning_rate": 1.9511332259946924e-06, "loss": 1.1196, "step": 13409 }, { "epoch": 0.12693935119887165, "grad_norm": 464.15228271484375, "learning_rate": 1.951123758835032e-06, "loss": 24.2031, "step": 13410 }, { "epoch": 0.1269488172205867, "grad_norm": 316.1590270996094, "learning_rate": 1.951114290781377e-06, "loss": 20.1641, "step": 13411 }, { "epoch": 0.12695828324230177, "grad_norm": 584.0719604492188, "learning_rate": 1.951104821833737e-06, "loss": 36.4219, "step": 13412 }, { "epoch": 0.1269677492640168, "grad_norm": 2.997528314590454, "learning_rate": 1.95109535199212e-06, "loss": 0.9878, "step": 13413 }, { "epoch": 0.12697721528573186, "grad_norm": 497.2213439941406, "learning_rate": 1.951085881256535e-06, "loss": 22.3438, "step": 13414 }, { "epoch": 0.12698668130744692, "grad_norm": 310.8911437988281, "learning_rate": 1.9510764096269913e-06, "loss": 16.5078, "step": 13415 }, { "epoch": 0.12699614732916198, "grad_norm": 246.42660522460938, "learning_rate": 1.951066937103498e-06, "loss": 31.5, "step": 13416 }, { "epoch": 0.12700561335087704, "grad_norm": 821.3759155273438, "learning_rate": 1.9510574636860635e-06, "loss": 47.5, "step": 13417 }, { "epoch": 0.12701507937259207, "grad_norm": 823.7952270507812, "learning_rate": 1.951047989374697e-06, "loss": 48.5469, "step": 13418 }, { "epoch": 0.12702454539430713, "grad_norm": 289.78515625, "learning_rate": 1.9510385141694064e-06, "loss": 22.7578, "step": 13419 }, { "epoch": 0.1270340114160222, "grad_norm": 212.28097534179688, "learning_rate": 1.9510290380702022e-06, "loss": 7.916, "step": 13420 }, { "epoch": 0.12704347743773725, "grad_norm": 3.089507818222046, "learning_rate": 1.9510195610770924e-06, "loss": 0.9253, "step": 13421 }, { "epoch": 0.12705294345945228, "grad_norm": 510.2340393066406, "learning_rate": 1.9510100831900864e-06, "loss": 52.3906, "step": 13422 }, { "epoch": 0.12706240948116734, "grad_norm": 220.24234008789062, "learning_rate": 1.9510006044091924e-06, "loss": 14.0625, "step": 13423 }, { "epoch": 0.1270718755028824, "grad_norm": 457.9831848144531, "learning_rate": 1.95099112473442e-06, "loss": 37.1562, "step": 13424 }, { "epoch": 0.12708134152459746, "grad_norm": 264.0520935058594, "learning_rate": 1.9509816441657774e-06, "loss": 19.9609, "step": 13425 }, { "epoch": 0.12709080754631252, "grad_norm": 358.56756591796875, "learning_rate": 1.950972162703274e-06, "loss": 28.5625, "step": 13426 }, { "epoch": 0.12710027356802756, "grad_norm": 504.8966979980469, "learning_rate": 1.9509626803469186e-06, "loss": 50.4219, "step": 13427 }, { "epoch": 0.12710973958974262, "grad_norm": 200.09573364257812, "learning_rate": 1.9509531970967206e-06, "loss": 16.9219, "step": 13428 }, { "epoch": 0.12711920561145768, "grad_norm": 197.83074951171875, "learning_rate": 1.950943712952688e-06, "loss": 15.1328, "step": 13429 }, { "epoch": 0.12712867163317274, "grad_norm": 479.802978515625, "learning_rate": 1.9509342279148303e-06, "loss": 55.9688, "step": 13430 }, { "epoch": 0.12713813765488777, "grad_norm": 455.4777526855469, "learning_rate": 1.9509247419831563e-06, "loss": 36.1406, "step": 13431 }, { "epoch": 0.12714760367660283, "grad_norm": 334.0008850097656, "learning_rate": 1.950915255157675e-06, "loss": 41.0, "step": 13432 }, { "epoch": 0.1271570696983179, "grad_norm": 407.646240234375, "learning_rate": 1.950905767438395e-06, "loss": 16.9688, "step": 13433 }, { "epoch": 0.12716653572003295, "grad_norm": 452.0950927734375, "learning_rate": 1.9508962788253257e-06, "loss": 21.8047, "step": 13434 }, { "epoch": 0.127176001741748, "grad_norm": 391.02606201171875, "learning_rate": 1.950886789318476e-06, "loss": 10.6523, "step": 13435 }, { "epoch": 0.12718546776346304, "grad_norm": 656.3055419921875, "learning_rate": 1.950877298917854e-06, "loss": 55.1406, "step": 13436 }, { "epoch": 0.1271949337851781, "grad_norm": 556.2755737304688, "learning_rate": 1.9508678076234696e-06, "loss": 44.4766, "step": 13437 }, { "epoch": 0.12720439980689316, "grad_norm": 686.4541015625, "learning_rate": 1.950858315435331e-06, "loss": 37.0156, "step": 13438 }, { "epoch": 0.12721386582860822, "grad_norm": 231.46356201171875, "learning_rate": 1.9508488223534477e-06, "loss": 22.2188, "step": 13439 }, { "epoch": 0.12722333185032325, "grad_norm": 598.1219482421875, "learning_rate": 1.9508393283778283e-06, "loss": 49.0469, "step": 13440 }, { "epoch": 0.1272327978720383, "grad_norm": 547.489501953125, "learning_rate": 1.9508298335084816e-06, "loss": 22.6328, "step": 13441 }, { "epoch": 0.12724226389375337, "grad_norm": 381.3787841796875, "learning_rate": 1.950820337745417e-06, "loss": 46.8281, "step": 13442 }, { "epoch": 0.12725172991546843, "grad_norm": 838.1392211914062, "learning_rate": 1.9508108410886427e-06, "loss": 50.2969, "step": 13443 }, { "epoch": 0.1272611959371835, "grad_norm": 471.1757507324219, "learning_rate": 1.9508013435381687e-06, "loss": 42.0391, "step": 13444 }, { "epoch": 0.12727066195889852, "grad_norm": 200.93865966796875, "learning_rate": 1.950791845094003e-06, "loss": 15.9922, "step": 13445 }, { "epoch": 0.12728012798061358, "grad_norm": 1862.4908447265625, "learning_rate": 1.9507823457561546e-06, "loss": 32.1953, "step": 13446 }, { "epoch": 0.12728959400232864, "grad_norm": 745.8229370117188, "learning_rate": 1.950772845524633e-06, "loss": 8.7266, "step": 13447 }, { "epoch": 0.1272990600240437, "grad_norm": 179.55271911621094, "learning_rate": 1.9507633443994464e-06, "loss": 22.25, "step": 13448 }, { "epoch": 0.12730852604575874, "grad_norm": 243.8939208984375, "learning_rate": 1.950753842380604e-06, "loss": 31.7969, "step": 13449 }, { "epoch": 0.1273179920674738, "grad_norm": 2.7916910648345947, "learning_rate": 1.950744339468115e-06, "loss": 0.8521, "step": 13450 }, { "epoch": 0.12732745808918886, "grad_norm": 626.037353515625, "learning_rate": 1.9507348356619885e-06, "loss": 53.0234, "step": 13451 }, { "epoch": 0.12733692411090392, "grad_norm": 307.92022705078125, "learning_rate": 1.950725330962233e-06, "loss": 22.7266, "step": 13452 }, { "epoch": 0.12734639013261898, "grad_norm": 362.7856750488281, "learning_rate": 1.9507158253688574e-06, "loss": 28.4766, "step": 13453 }, { "epoch": 0.127355856154334, "grad_norm": 286.762451171875, "learning_rate": 1.950706318881871e-06, "loss": 20.9844, "step": 13454 }, { "epoch": 0.12736532217604907, "grad_norm": 328.85614013671875, "learning_rate": 1.9506968115012823e-06, "loss": 34.8906, "step": 13455 }, { "epoch": 0.12737478819776413, "grad_norm": 665.508056640625, "learning_rate": 1.9506873032271e-06, "loss": 64.8359, "step": 13456 }, { "epoch": 0.1273842542194792, "grad_norm": 1009.60498046875, "learning_rate": 1.9506777940593346e-06, "loss": 69.3867, "step": 13457 }, { "epoch": 0.12739372024119425, "grad_norm": 422.6332702636719, "learning_rate": 1.950668283997993e-06, "loss": 36.6875, "step": 13458 }, { "epoch": 0.12740318626290928, "grad_norm": 172.31216430664062, "learning_rate": 1.9506587730430853e-06, "loss": 28.2031, "step": 13459 }, { "epoch": 0.12741265228462434, "grad_norm": 527.895751953125, "learning_rate": 1.95064926119462e-06, "loss": 30.9141, "step": 13460 }, { "epoch": 0.1274221183063394, "grad_norm": 497.868896484375, "learning_rate": 1.9506397484526067e-06, "loss": 46.4062, "step": 13461 }, { "epoch": 0.12743158432805446, "grad_norm": 1072.8255615234375, "learning_rate": 1.9506302348170537e-06, "loss": 49.125, "step": 13462 }, { "epoch": 0.1274410503497695, "grad_norm": 434.7898864746094, "learning_rate": 1.95062072028797e-06, "loss": 37.1328, "step": 13463 }, { "epoch": 0.12745051637148455, "grad_norm": 333.2665710449219, "learning_rate": 1.950611204865365e-06, "loss": 21.2422, "step": 13464 }, { "epoch": 0.1274599823931996, "grad_norm": 297.4764099121094, "learning_rate": 1.950601688549247e-06, "loss": 18.8359, "step": 13465 }, { "epoch": 0.12746944841491467, "grad_norm": 1034.263671875, "learning_rate": 1.950592171339625e-06, "loss": 39.25, "step": 13466 }, { "epoch": 0.12747891443662973, "grad_norm": 3.4627208709716797, "learning_rate": 1.9505826532365086e-06, "loss": 0.9766, "step": 13467 }, { "epoch": 0.12748838045834476, "grad_norm": 349.0146789550781, "learning_rate": 1.950573134239906e-06, "loss": 41.625, "step": 13468 }, { "epoch": 0.12749784648005982, "grad_norm": 365.6046447753906, "learning_rate": 1.9505636143498267e-06, "loss": 30.1406, "step": 13469 }, { "epoch": 0.12750731250177488, "grad_norm": 273.8702697753906, "learning_rate": 1.9505540935662795e-06, "loss": 16.5977, "step": 13470 }, { "epoch": 0.12751677852348994, "grad_norm": 511.4125061035156, "learning_rate": 1.9505445718892734e-06, "loss": 46.75, "step": 13471 }, { "epoch": 0.12752624454520498, "grad_norm": 3.337028980255127, "learning_rate": 1.950535049318817e-06, "loss": 0.9497, "step": 13472 }, { "epoch": 0.12753571056692004, "grad_norm": 238.80531311035156, "learning_rate": 1.9505255258549196e-06, "loss": 10.6016, "step": 13473 }, { "epoch": 0.1275451765886351, "grad_norm": 1093.4476318359375, "learning_rate": 1.95051600149759e-06, "loss": 56.6094, "step": 13474 }, { "epoch": 0.12755464261035016, "grad_norm": 937.4810180664062, "learning_rate": 1.9505064762468372e-06, "loss": 64.2969, "step": 13475 }, { "epoch": 0.12756410863206522, "grad_norm": 2.885780096054077, "learning_rate": 1.9504969501026705e-06, "loss": 1.0278, "step": 13476 }, { "epoch": 0.12757357465378025, "grad_norm": 3.2552037239074707, "learning_rate": 1.950487423065098e-06, "loss": 0.959, "step": 13477 }, { "epoch": 0.1275830406754953, "grad_norm": 224.10659790039062, "learning_rate": 1.950477895134129e-06, "loss": 22.1562, "step": 13478 }, { "epoch": 0.12759250669721037, "grad_norm": 532.96337890625, "learning_rate": 1.9504683663097735e-06, "loss": 43.0625, "step": 13479 }, { "epoch": 0.12760197271892543, "grad_norm": 743.4229125976562, "learning_rate": 1.9504588365920387e-06, "loss": 48.5586, "step": 13480 }, { "epoch": 0.12761143874064046, "grad_norm": 700.3218994140625, "learning_rate": 1.950449305980935e-06, "loss": 30.9219, "step": 13481 }, { "epoch": 0.12762090476235552, "grad_norm": 1029.405517578125, "learning_rate": 1.95043977447647e-06, "loss": 37.7109, "step": 13482 }, { "epoch": 0.12763037078407058, "grad_norm": 240.9330291748047, "learning_rate": 1.9504302420786544e-06, "loss": 19.8086, "step": 13483 }, { "epoch": 0.12763983680578564, "grad_norm": 284.7814025878906, "learning_rate": 1.9504207087874956e-06, "loss": 19.1328, "step": 13484 }, { "epoch": 0.1276493028275007, "grad_norm": 243.15878295898438, "learning_rate": 1.9504111746030033e-06, "loss": 17.7656, "step": 13485 }, { "epoch": 0.12765876884921573, "grad_norm": 521.3405151367188, "learning_rate": 1.9504016395251864e-06, "loss": 20.375, "step": 13486 }, { "epoch": 0.1276682348709308, "grad_norm": 667.3245849609375, "learning_rate": 1.9503921035540534e-06, "loss": 70.5469, "step": 13487 }, { "epoch": 0.12767770089264585, "grad_norm": 659.7975463867188, "learning_rate": 1.9503825666896145e-06, "loss": 49.5078, "step": 13488 }, { "epoch": 0.1276871669143609, "grad_norm": 253.30731201171875, "learning_rate": 1.9503730289318773e-06, "loss": 19.1641, "step": 13489 }, { "epoch": 0.12769663293607594, "grad_norm": 2.864840507507324, "learning_rate": 1.950363490280851e-06, "loss": 0.9688, "step": 13490 }, { "epoch": 0.127706098957791, "grad_norm": 3.092064380645752, "learning_rate": 1.950353950736545e-06, "loss": 0.957, "step": 13491 }, { "epoch": 0.12771556497950606, "grad_norm": 443.0016174316406, "learning_rate": 1.9503444102989685e-06, "loss": 9.1094, "step": 13492 }, { "epoch": 0.12772503100122112, "grad_norm": 539.2684326171875, "learning_rate": 1.9503348689681295e-06, "loss": 48.2969, "step": 13493 }, { "epoch": 0.12773449702293618, "grad_norm": 324.83251953125, "learning_rate": 1.950325326744038e-06, "loss": 21.5312, "step": 13494 }, { "epoch": 0.12774396304465122, "grad_norm": 247.7030029296875, "learning_rate": 1.9503157836267023e-06, "loss": 14.9297, "step": 13495 }, { "epoch": 0.12775342906636628, "grad_norm": 633.1151123046875, "learning_rate": 1.950306239616132e-06, "loss": 24.1094, "step": 13496 }, { "epoch": 0.12776289508808134, "grad_norm": 670.89892578125, "learning_rate": 1.950296694712335e-06, "loss": 50.7539, "step": 13497 }, { "epoch": 0.1277723611097964, "grad_norm": 488.889404296875, "learning_rate": 1.9502871489153213e-06, "loss": 27.5938, "step": 13498 }, { "epoch": 0.12778182713151143, "grad_norm": 469.6299133300781, "learning_rate": 1.9502776022250995e-06, "loss": 19.5625, "step": 13499 }, { "epoch": 0.1277912931532265, "grad_norm": 551.5736694335938, "learning_rate": 1.9502680546416784e-06, "loss": 32.875, "step": 13500 }, { "epoch": 0.12780075917494155, "grad_norm": 529.877197265625, "learning_rate": 1.9502585061650675e-06, "loss": 28.75, "step": 13501 }, { "epoch": 0.1278102251966566, "grad_norm": 462.35205078125, "learning_rate": 1.950248956795275e-06, "loss": 44.3125, "step": 13502 }, { "epoch": 0.12781969121837167, "grad_norm": 342.6333312988281, "learning_rate": 1.9502394065323106e-06, "loss": 19.1094, "step": 13503 }, { "epoch": 0.1278291572400867, "grad_norm": 283.75848388671875, "learning_rate": 1.950229855376183e-06, "loss": 18.7109, "step": 13504 }, { "epoch": 0.12783862326180176, "grad_norm": 882.9357299804688, "learning_rate": 1.9502203033269006e-06, "loss": 9.2539, "step": 13505 }, { "epoch": 0.12784808928351682, "grad_norm": 268.98297119140625, "learning_rate": 1.9502107503844736e-06, "loss": 19.9531, "step": 13506 }, { "epoch": 0.12785755530523188, "grad_norm": 538.264892578125, "learning_rate": 1.95020119654891e-06, "loss": 53.8125, "step": 13507 }, { "epoch": 0.1278670213269469, "grad_norm": 319.18878173828125, "learning_rate": 1.9501916418202193e-06, "loss": 27.9023, "step": 13508 }, { "epoch": 0.12787648734866197, "grad_norm": 305.1922607421875, "learning_rate": 1.95018208619841e-06, "loss": 14.4141, "step": 13509 }, { "epoch": 0.12788595337037703, "grad_norm": 243.99099731445312, "learning_rate": 1.9501725296834914e-06, "loss": 27.7812, "step": 13510 }, { "epoch": 0.1278954193920921, "grad_norm": 2.8619587421417236, "learning_rate": 1.950162972275473e-06, "loss": 0.9468, "step": 13511 }, { "epoch": 0.12790488541380715, "grad_norm": 690.0322265625, "learning_rate": 1.950153413974362e-06, "loss": 35.3047, "step": 13512 }, { "epoch": 0.12791435143552218, "grad_norm": 3.724895715713501, "learning_rate": 1.9501438547801692e-06, "loss": 0.8164, "step": 13513 }, { "epoch": 0.12792381745723724, "grad_norm": 904.3302612304688, "learning_rate": 1.9501342946929037e-06, "loss": 40.7109, "step": 13514 }, { "epoch": 0.1279332834789523, "grad_norm": 169.0417022705078, "learning_rate": 1.950124733712573e-06, "loss": 21.5312, "step": 13515 }, { "epoch": 0.12794274950066736, "grad_norm": 883.15771484375, "learning_rate": 1.950115171839187e-06, "loss": 41.875, "step": 13516 }, { "epoch": 0.1279522155223824, "grad_norm": 836.0418701171875, "learning_rate": 1.9501056090727547e-06, "loss": 24.8984, "step": 13517 }, { "epoch": 0.12796168154409746, "grad_norm": 3.4537808895111084, "learning_rate": 1.9500960454132844e-06, "loss": 0.8965, "step": 13518 }, { "epoch": 0.12797114756581252, "grad_norm": 760.0667114257812, "learning_rate": 1.9500864808607865e-06, "loss": 46.7812, "step": 13519 }, { "epoch": 0.12798061358752758, "grad_norm": 559.0947265625, "learning_rate": 1.9500769154152685e-06, "loss": 44.3828, "step": 13520 }, { "epoch": 0.12799007960924264, "grad_norm": 405.2442626953125, "learning_rate": 1.95006734907674e-06, "loss": 31.5391, "step": 13521 }, { "epoch": 0.12799954563095767, "grad_norm": 532.521728515625, "learning_rate": 1.9500577818452104e-06, "loss": 59.0859, "step": 13522 }, { "epoch": 0.12800901165267273, "grad_norm": 146.82122802734375, "learning_rate": 1.950048213720688e-06, "loss": 17.9375, "step": 13523 }, { "epoch": 0.1280184776743878, "grad_norm": 256.892822265625, "learning_rate": 1.950038644703182e-06, "loss": 23.6484, "step": 13524 }, { "epoch": 0.12802794369610285, "grad_norm": 300.7062683105469, "learning_rate": 1.950029074792702e-06, "loss": 19.3984, "step": 13525 }, { "epoch": 0.12803740971781788, "grad_norm": 244.6529083251953, "learning_rate": 1.950019503989256e-06, "loss": 21.5312, "step": 13526 }, { "epoch": 0.12804687573953294, "grad_norm": 308.4326171875, "learning_rate": 1.950009932292853e-06, "loss": 8.0156, "step": 13527 }, { "epoch": 0.128056341761248, "grad_norm": 399.4506530761719, "learning_rate": 1.9500003597035033e-06, "loss": 35.9062, "step": 13528 }, { "epoch": 0.12806580778296306, "grad_norm": 260.57513427734375, "learning_rate": 1.9499907862212147e-06, "loss": 17.6328, "step": 13529 }, { "epoch": 0.12807527380467812, "grad_norm": 511.81597900390625, "learning_rate": 1.9499812118459967e-06, "loss": 22.1641, "step": 13530 }, { "epoch": 0.12808473982639315, "grad_norm": 333.87744140625, "learning_rate": 1.9499716365778586e-06, "loss": 31.1719, "step": 13531 }, { "epoch": 0.1280942058481082, "grad_norm": 290.04736328125, "learning_rate": 1.949962060416808e-06, "loss": 11.2305, "step": 13532 }, { "epoch": 0.12810367186982327, "grad_norm": 392.07940673828125, "learning_rate": 1.9499524833628554e-06, "loss": 28.1641, "step": 13533 }, { "epoch": 0.12811313789153833, "grad_norm": 263.4495849609375, "learning_rate": 1.949942905416009e-06, "loss": 21.9766, "step": 13534 }, { "epoch": 0.12812260391325336, "grad_norm": 757.2841186523438, "learning_rate": 1.9499333265762786e-06, "loss": 62.4805, "step": 13535 }, { "epoch": 0.12813206993496842, "grad_norm": 204.74998474121094, "learning_rate": 1.949923746843672e-06, "loss": 20.4219, "step": 13536 }, { "epoch": 0.12814153595668348, "grad_norm": 296.0246887207031, "learning_rate": 1.9499141662181996e-06, "loss": 16.4844, "step": 13537 }, { "epoch": 0.12815100197839854, "grad_norm": 356.9522705078125, "learning_rate": 1.9499045846998693e-06, "loss": 11.5703, "step": 13538 }, { "epoch": 0.1281604680001136, "grad_norm": 245.13720703125, "learning_rate": 1.9498950022886905e-06, "loss": 22.0781, "step": 13539 }, { "epoch": 0.12816993402182864, "grad_norm": 441.192626953125, "learning_rate": 1.949885418984672e-06, "loss": 36.4062, "step": 13540 }, { "epoch": 0.1281794000435437, "grad_norm": 478.0263366699219, "learning_rate": 1.949875834787823e-06, "loss": 31.0156, "step": 13541 }, { "epoch": 0.12818886606525876, "grad_norm": 298.23956298828125, "learning_rate": 1.949866249698153e-06, "loss": 22.3594, "step": 13542 }, { "epoch": 0.12819833208697382, "grad_norm": 224.57899475097656, "learning_rate": 1.94985666371567e-06, "loss": 19.5469, "step": 13543 }, { "epoch": 0.12820779810868888, "grad_norm": 333.1011047363281, "learning_rate": 1.9498470768403838e-06, "loss": 30.5781, "step": 13544 }, { "epoch": 0.1282172641304039, "grad_norm": 298.4156494140625, "learning_rate": 1.949837489072303e-06, "loss": 23.8125, "step": 13545 }, { "epoch": 0.12822673015211897, "grad_norm": 174.7779541015625, "learning_rate": 1.949827900411437e-06, "loss": 13.8242, "step": 13546 }, { "epoch": 0.12823619617383403, "grad_norm": 812.0833740234375, "learning_rate": 1.949818310857794e-06, "loss": 35.9844, "step": 13547 }, { "epoch": 0.1282456621955491, "grad_norm": 403.6700439453125, "learning_rate": 1.9498087204113842e-06, "loss": 36.375, "step": 13548 }, { "epoch": 0.12825512821726412, "grad_norm": 3.0747480392456055, "learning_rate": 1.949799129072216e-06, "loss": 0.9087, "step": 13549 }, { "epoch": 0.12826459423897918, "grad_norm": 150.5733642578125, "learning_rate": 1.949789536840298e-06, "loss": 14.5625, "step": 13550 }, { "epoch": 0.12827406026069424, "grad_norm": 583.00537109375, "learning_rate": 1.9497799437156396e-06, "loss": 13.4297, "step": 13551 }, { "epoch": 0.1282835262824093, "grad_norm": 3.7243523597717285, "learning_rate": 1.94977034969825e-06, "loss": 0.9746, "step": 13552 }, { "epoch": 0.12829299230412436, "grad_norm": 378.07427978515625, "learning_rate": 1.9497607547881384e-06, "loss": 18.5469, "step": 13553 }, { "epoch": 0.1283024583258394, "grad_norm": 222.22792053222656, "learning_rate": 1.949751158985313e-06, "loss": 18.1094, "step": 13554 }, { "epoch": 0.12831192434755445, "grad_norm": 223.33486938476562, "learning_rate": 1.9497415622897835e-06, "loss": 25.8047, "step": 13555 }, { "epoch": 0.1283213903692695, "grad_norm": 929.2744140625, "learning_rate": 1.9497319647015585e-06, "loss": 55.5, "step": 13556 }, { "epoch": 0.12833085639098457, "grad_norm": 159.63307189941406, "learning_rate": 1.9497223662206476e-06, "loss": 16.3672, "step": 13557 }, { "epoch": 0.1283403224126996, "grad_norm": 494.2107849121094, "learning_rate": 1.9497127668470593e-06, "loss": 65.3281, "step": 13558 }, { "epoch": 0.12834978843441466, "grad_norm": 572.326416015625, "learning_rate": 1.949703166580803e-06, "loss": 42.3438, "step": 13559 }, { "epoch": 0.12835925445612972, "grad_norm": 204.17323303222656, "learning_rate": 1.9496935654218868e-06, "loss": 17.3281, "step": 13560 }, { "epoch": 0.12836872047784478, "grad_norm": 198.6623077392578, "learning_rate": 1.9496839633703212e-06, "loss": 7.9375, "step": 13561 }, { "epoch": 0.12837818649955984, "grad_norm": 4.00417423248291, "learning_rate": 1.9496743604261142e-06, "loss": 0.918, "step": 13562 }, { "epoch": 0.12838765252127488, "grad_norm": 376.5587463378906, "learning_rate": 1.9496647565892747e-06, "loss": 23.2266, "step": 13563 }, { "epoch": 0.12839711854298994, "grad_norm": 3.1182098388671875, "learning_rate": 1.9496551518598128e-06, "loss": 0.9751, "step": 13564 }, { "epoch": 0.128406584564705, "grad_norm": 629.1312255859375, "learning_rate": 1.949645546237736e-06, "loss": 42.8555, "step": 13565 }, { "epoch": 0.12841605058642006, "grad_norm": 448.35333251953125, "learning_rate": 1.949635939723055e-06, "loss": 45.0938, "step": 13566 }, { "epoch": 0.1284255166081351, "grad_norm": 743.1513061523438, "learning_rate": 1.9496263323157774e-06, "loss": 55.5625, "step": 13567 }, { "epoch": 0.12843498262985015, "grad_norm": 404.2366943359375, "learning_rate": 1.9496167240159132e-06, "loss": 45.125, "step": 13568 }, { "epoch": 0.1284444486515652, "grad_norm": 149.61697387695312, "learning_rate": 1.949607114823471e-06, "loss": 21.3047, "step": 13569 }, { "epoch": 0.12845391467328027, "grad_norm": 305.4753723144531, "learning_rate": 1.9495975047384596e-06, "loss": 20.3438, "step": 13570 }, { "epoch": 0.12846338069499533, "grad_norm": 881.8472290039062, "learning_rate": 1.9495878937608884e-06, "loss": 38.1484, "step": 13571 }, { "epoch": 0.12847284671671036, "grad_norm": 2.763662099838257, "learning_rate": 1.9495782818907663e-06, "loss": 0.8853, "step": 13572 }, { "epoch": 0.12848231273842542, "grad_norm": 623.84716796875, "learning_rate": 1.949568669128103e-06, "loss": 22.8906, "step": 13573 }, { "epoch": 0.12849177876014048, "grad_norm": 203.0635528564453, "learning_rate": 1.9495590554729063e-06, "loss": 23.5547, "step": 13574 }, { "epoch": 0.12850124478185554, "grad_norm": 410.5312805175781, "learning_rate": 1.949549440925186e-06, "loss": 43.8125, "step": 13575 }, { "epoch": 0.12851071080357057, "grad_norm": 541.30029296875, "learning_rate": 1.949539825484951e-06, "loss": 33.2188, "step": 13576 }, { "epoch": 0.12852017682528563, "grad_norm": 4.006409168243408, "learning_rate": 1.9495302091522105e-06, "loss": 0.9697, "step": 13577 }, { "epoch": 0.1285296428470007, "grad_norm": 554.4622192382812, "learning_rate": 1.9495205919269734e-06, "loss": 30.0078, "step": 13578 }, { "epoch": 0.12853910886871575, "grad_norm": 336.1119079589844, "learning_rate": 1.9495109738092485e-06, "loss": 16.6875, "step": 13579 }, { "epoch": 0.1285485748904308, "grad_norm": 311.0564270019531, "learning_rate": 1.949501354799045e-06, "loss": 21.9844, "step": 13580 }, { "epoch": 0.12855804091214584, "grad_norm": 134.73875427246094, "learning_rate": 1.949491734896372e-06, "loss": 17.3242, "step": 13581 }, { "epoch": 0.1285675069338609, "grad_norm": 388.9271240234375, "learning_rate": 1.949482114101239e-06, "loss": 52.6719, "step": 13582 }, { "epoch": 0.12857697295557596, "grad_norm": 3.2687807083129883, "learning_rate": 1.949472492413654e-06, "loss": 0.8657, "step": 13583 }, { "epoch": 0.12858643897729102, "grad_norm": 251.8640594482422, "learning_rate": 1.949462869833627e-06, "loss": 20.332, "step": 13584 }, { "epoch": 0.12859590499900606, "grad_norm": 505.8257141113281, "learning_rate": 1.9494532463611665e-06, "loss": 39.8203, "step": 13585 }, { "epoch": 0.12860537102072112, "grad_norm": 318.7044677734375, "learning_rate": 1.9494436219962815e-06, "loss": 24.3828, "step": 13586 }, { "epoch": 0.12861483704243618, "grad_norm": 514.1201782226562, "learning_rate": 1.9494339967389816e-06, "loss": 59.1875, "step": 13587 }, { "epoch": 0.12862430306415124, "grad_norm": 313.0684509277344, "learning_rate": 1.9494243705892758e-06, "loss": 20.9062, "step": 13588 }, { "epoch": 0.1286337690858663, "grad_norm": 230.2190399169922, "learning_rate": 1.949414743547172e-06, "loss": 21.4453, "step": 13589 }, { "epoch": 0.12864323510758133, "grad_norm": 434.331787109375, "learning_rate": 1.949405115612681e-06, "loss": 43.2969, "step": 13590 }, { "epoch": 0.1286527011292964, "grad_norm": 270.87353515625, "learning_rate": 1.94939548678581e-06, "loss": 34.1875, "step": 13591 }, { "epoch": 0.12866216715101145, "grad_norm": 1360.00439453125, "learning_rate": 1.9493858570665698e-06, "loss": 21.6445, "step": 13592 }, { "epoch": 0.1286716331727265, "grad_norm": 450.4429626464844, "learning_rate": 1.9493762264549684e-06, "loss": 62.3906, "step": 13593 }, { "epoch": 0.12868109919444154, "grad_norm": 309.5561218261719, "learning_rate": 1.949366594951015e-06, "loss": 24.4844, "step": 13594 }, { "epoch": 0.1286905652161566, "grad_norm": 379.7567138671875, "learning_rate": 1.9493569625547188e-06, "loss": 19.3984, "step": 13595 }, { "epoch": 0.12870003123787166, "grad_norm": 3.0304882526397705, "learning_rate": 1.949347329266089e-06, "loss": 0.8657, "step": 13596 }, { "epoch": 0.12870949725958672, "grad_norm": 364.9585876464844, "learning_rate": 1.949337695085134e-06, "loss": 19.8281, "step": 13597 }, { "epoch": 0.12871896328130178, "grad_norm": 910.9575805664062, "learning_rate": 1.949328060011864e-06, "loss": 47.4531, "step": 13598 }, { "epoch": 0.1287284293030168, "grad_norm": 3.238677978515625, "learning_rate": 1.949318424046287e-06, "loss": 0.8199, "step": 13599 }, { "epoch": 0.12873789532473187, "grad_norm": 214.78121948242188, "learning_rate": 1.9493087871884122e-06, "loss": 22.0078, "step": 13600 }, { "epoch": 0.12874736134644693, "grad_norm": 316.46038818359375, "learning_rate": 1.9492991494382497e-06, "loss": 21.2422, "step": 13601 }, { "epoch": 0.128756827368162, "grad_norm": 319.5392761230469, "learning_rate": 1.9492895107958073e-06, "loss": 22.3984, "step": 13602 }, { "epoch": 0.12876629338987702, "grad_norm": 310.3583984375, "learning_rate": 1.949279871261094e-06, "loss": 32.125, "step": 13603 }, { "epoch": 0.12877575941159208, "grad_norm": 334.4086608886719, "learning_rate": 1.94927023083412e-06, "loss": 20.2266, "step": 13604 }, { "epoch": 0.12878522543330714, "grad_norm": 418.4665222167969, "learning_rate": 1.9492605895148937e-06, "loss": 45.0938, "step": 13605 }, { "epoch": 0.1287946914550222, "grad_norm": 2.9726314544677734, "learning_rate": 1.949250947303424e-06, "loss": 0.8276, "step": 13606 }, { "epoch": 0.12880415747673726, "grad_norm": 3.337656021118164, "learning_rate": 1.94924130419972e-06, "loss": 0.9199, "step": 13607 }, { "epoch": 0.1288136234984523, "grad_norm": 379.2497253417969, "learning_rate": 1.9492316602037914e-06, "loss": 15.1953, "step": 13608 }, { "epoch": 0.12882308952016736, "grad_norm": 200.9125213623047, "learning_rate": 1.9492220153156467e-06, "loss": 18.1641, "step": 13609 }, { "epoch": 0.12883255554188242, "grad_norm": 1189.7049560546875, "learning_rate": 1.949212369535295e-06, "loss": 52.6719, "step": 13610 }, { "epoch": 0.12884202156359748, "grad_norm": 425.1764831542969, "learning_rate": 1.9492027228627452e-06, "loss": 53.6875, "step": 13611 }, { "epoch": 0.1288514875853125, "grad_norm": 662.7320556640625, "learning_rate": 1.9491930752980064e-06, "loss": 32.3594, "step": 13612 }, { "epoch": 0.12886095360702757, "grad_norm": 191.12802124023438, "learning_rate": 1.949183426841088e-06, "loss": 18.6562, "step": 13613 }, { "epoch": 0.12887041962874263, "grad_norm": 339.3927917480469, "learning_rate": 1.9491737774919995e-06, "loss": 25.3984, "step": 13614 }, { "epoch": 0.1288798856504577, "grad_norm": 419.8470458984375, "learning_rate": 1.949164127250749e-06, "loss": 33.5078, "step": 13615 }, { "epoch": 0.12888935167217275, "grad_norm": 903.5218505859375, "learning_rate": 1.9491544761173458e-06, "loss": 69.3984, "step": 13616 }, { "epoch": 0.12889881769388778, "grad_norm": 315.34613037109375, "learning_rate": 1.9491448240917994e-06, "loss": 29.4062, "step": 13617 }, { "epoch": 0.12890828371560284, "grad_norm": 444.4063415527344, "learning_rate": 1.9491351711741185e-06, "loss": 21.9062, "step": 13618 }, { "epoch": 0.1289177497373179, "grad_norm": 346.6132507324219, "learning_rate": 1.9491255173643125e-06, "loss": 20.7188, "step": 13619 }, { "epoch": 0.12892721575903296, "grad_norm": 453.7321472167969, "learning_rate": 1.9491158626623898e-06, "loss": 43.875, "step": 13620 }, { "epoch": 0.128936681780748, "grad_norm": 3.2497897148132324, "learning_rate": 1.94910620706836e-06, "loss": 0.8289, "step": 13621 }, { "epoch": 0.12894614780246305, "grad_norm": 422.89678955078125, "learning_rate": 1.9490965505822327e-06, "loss": 19.1484, "step": 13622 }, { "epoch": 0.1289556138241781, "grad_norm": 458.1832580566406, "learning_rate": 1.949086893204016e-06, "loss": 55.9531, "step": 13623 }, { "epoch": 0.12896507984589317, "grad_norm": 233.1451873779297, "learning_rate": 1.949077234933719e-06, "loss": 17.2969, "step": 13624 }, { "epoch": 0.12897454586760823, "grad_norm": 246.15724182128906, "learning_rate": 1.9490675757713517e-06, "loss": 23.2578, "step": 13625 }, { "epoch": 0.12898401188932326, "grad_norm": 228.79901123046875, "learning_rate": 1.9490579157169224e-06, "loss": 23.1641, "step": 13626 }, { "epoch": 0.12899347791103832, "grad_norm": 428.5798034667969, "learning_rate": 1.9490482547704406e-06, "loss": 17.3945, "step": 13627 }, { "epoch": 0.12900294393275338, "grad_norm": 731.0869750976562, "learning_rate": 1.9490385929319147e-06, "loss": 62.2656, "step": 13628 }, { "epoch": 0.12901240995446844, "grad_norm": 367.5238952636719, "learning_rate": 1.9490289302013546e-06, "loss": 30.3594, "step": 13629 }, { "epoch": 0.1290218759761835, "grad_norm": 3.461519479751587, "learning_rate": 1.949019266578769e-06, "loss": 0.8206, "step": 13630 }, { "epoch": 0.12903134199789854, "grad_norm": 688.2706298828125, "learning_rate": 1.949009602064167e-06, "loss": 15.168, "step": 13631 }, { "epoch": 0.1290408080196136, "grad_norm": 1077.6392822265625, "learning_rate": 1.9489999366575577e-06, "loss": 48.8281, "step": 13632 }, { "epoch": 0.12905027404132866, "grad_norm": 886.3075561523438, "learning_rate": 1.9489902703589505e-06, "loss": 30.6758, "step": 13633 }, { "epoch": 0.12905974006304372, "grad_norm": 499.2532653808594, "learning_rate": 1.9489806031683537e-06, "loss": 37.1484, "step": 13634 }, { "epoch": 0.12906920608475875, "grad_norm": 618.4593505859375, "learning_rate": 1.9489709350857774e-06, "loss": 22.875, "step": 13635 }, { "epoch": 0.1290786721064738, "grad_norm": 854.0451049804688, "learning_rate": 1.9489612661112296e-06, "loss": 57.6562, "step": 13636 }, { "epoch": 0.12908813812818887, "grad_norm": 273.0581970214844, "learning_rate": 1.94895159624472e-06, "loss": 35.3359, "step": 13637 }, { "epoch": 0.12909760414990393, "grad_norm": 3.0462872982025146, "learning_rate": 1.948941925486258e-06, "loss": 0.9614, "step": 13638 }, { "epoch": 0.129107070171619, "grad_norm": 368.6888427734375, "learning_rate": 1.948932253835852e-06, "loss": 33.6797, "step": 13639 }, { "epoch": 0.12911653619333402, "grad_norm": 1057.0565185546875, "learning_rate": 1.948922581293512e-06, "loss": 43.7578, "step": 13640 }, { "epoch": 0.12912600221504908, "grad_norm": 3.7020790576934814, "learning_rate": 1.948912907859246e-06, "loss": 1.0762, "step": 13641 }, { "epoch": 0.12913546823676414, "grad_norm": 211.9989013671875, "learning_rate": 1.9489032335330635e-06, "loss": 17.8594, "step": 13642 }, { "epoch": 0.1291449342584792, "grad_norm": 401.5303955078125, "learning_rate": 1.9488935583149737e-06, "loss": 26.6328, "step": 13643 }, { "epoch": 0.12915440028019423, "grad_norm": 547.540771484375, "learning_rate": 1.9488838822049856e-06, "loss": 33.7656, "step": 13644 }, { "epoch": 0.1291638663019093, "grad_norm": 178.40289306640625, "learning_rate": 1.9488742052031088e-06, "loss": 19.4688, "step": 13645 }, { "epoch": 0.12917333232362435, "grad_norm": 239.01910400390625, "learning_rate": 1.948864527309352e-06, "loss": 20.6914, "step": 13646 }, { "epoch": 0.1291827983453394, "grad_norm": 322.00537109375, "learning_rate": 1.948854848523724e-06, "loss": 22.5117, "step": 13647 }, { "epoch": 0.12919226436705447, "grad_norm": 369.0542297363281, "learning_rate": 1.948845168846234e-06, "loss": 8.6113, "step": 13648 }, { "epoch": 0.1292017303887695, "grad_norm": 433.33465576171875, "learning_rate": 1.9488354882768915e-06, "loss": 16.7344, "step": 13649 }, { "epoch": 0.12921119641048456, "grad_norm": 367.48516845703125, "learning_rate": 1.9488258068157055e-06, "loss": 25.7734, "step": 13650 }, { "epoch": 0.12922066243219962, "grad_norm": 220.44580078125, "learning_rate": 1.948816124462685e-06, "loss": 21.0312, "step": 13651 }, { "epoch": 0.12923012845391468, "grad_norm": 382.9694519042969, "learning_rate": 1.9488064412178385e-06, "loss": 42.0391, "step": 13652 }, { "epoch": 0.12923959447562972, "grad_norm": 181.05711364746094, "learning_rate": 1.948796757081176e-06, "loss": 21.9531, "step": 13653 }, { "epoch": 0.12924906049734478, "grad_norm": 512.38037109375, "learning_rate": 1.9487870720527066e-06, "loss": 24.4453, "step": 13654 }, { "epoch": 0.12925852651905984, "grad_norm": 154.94015502929688, "learning_rate": 1.948777386132439e-06, "loss": 9.6094, "step": 13655 }, { "epoch": 0.1292679925407749, "grad_norm": 260.02386474609375, "learning_rate": 1.9487676993203823e-06, "loss": 19.5312, "step": 13656 }, { "epoch": 0.12927745856248996, "grad_norm": 760.7427368164062, "learning_rate": 1.9487580116165452e-06, "loss": 45.6719, "step": 13657 }, { "epoch": 0.129286924584205, "grad_norm": 195.93162536621094, "learning_rate": 1.9487483230209375e-06, "loss": 17.7969, "step": 13658 }, { "epoch": 0.12929639060592005, "grad_norm": 361.14410400390625, "learning_rate": 1.9487386335335684e-06, "loss": 35.5938, "step": 13659 }, { "epoch": 0.1293058566276351, "grad_norm": 1141.0758056640625, "learning_rate": 1.948728943154447e-06, "loss": 51.4531, "step": 13660 }, { "epoch": 0.12931532264935017, "grad_norm": 407.14886474609375, "learning_rate": 1.9487192518835813e-06, "loss": 20.4297, "step": 13661 }, { "epoch": 0.1293247886710652, "grad_norm": 750.693115234375, "learning_rate": 1.9487095597209814e-06, "loss": 52.8125, "step": 13662 }, { "epoch": 0.12933425469278026, "grad_norm": 339.9523010253906, "learning_rate": 1.9486998666666567e-06, "loss": 27.0156, "step": 13663 }, { "epoch": 0.12934372071449532, "grad_norm": 503.78594970703125, "learning_rate": 1.9486901727206154e-06, "loss": 26.4141, "step": 13664 }, { "epoch": 0.12935318673621038, "grad_norm": 372.2261047363281, "learning_rate": 1.9486804778828674e-06, "loss": 23.5391, "step": 13665 }, { "epoch": 0.12936265275792544, "grad_norm": 312.7173156738281, "learning_rate": 1.9486707821534216e-06, "loss": 24.2383, "step": 13666 }, { "epoch": 0.12937211877964047, "grad_norm": 3.214834213256836, "learning_rate": 1.9486610855322864e-06, "loss": 0.8867, "step": 13667 }, { "epoch": 0.12938158480135553, "grad_norm": 754.0043334960938, "learning_rate": 1.9486513880194715e-06, "loss": 28.0312, "step": 13668 }, { "epoch": 0.1293910508230706, "grad_norm": 429.3705749511719, "learning_rate": 1.9486416896149864e-06, "loss": 28.9453, "step": 13669 }, { "epoch": 0.12940051684478565, "grad_norm": 495.70599365234375, "learning_rate": 1.94863199031884e-06, "loss": 46.5312, "step": 13670 }, { "epoch": 0.12940998286650068, "grad_norm": 243.4773712158203, "learning_rate": 1.9486222901310407e-06, "loss": 12.1914, "step": 13671 }, { "epoch": 0.12941944888821574, "grad_norm": 195.5849151611328, "learning_rate": 1.9486125890515984e-06, "loss": 13.582, "step": 13672 }, { "epoch": 0.1294289149099308, "grad_norm": 478.37744140625, "learning_rate": 1.948602887080522e-06, "loss": 40.5938, "step": 13673 }, { "epoch": 0.12943838093164586, "grad_norm": 563.712158203125, "learning_rate": 1.9485931842178206e-06, "loss": 47.8047, "step": 13674 }, { "epoch": 0.12944784695336092, "grad_norm": 308.5523376464844, "learning_rate": 1.9485834804635033e-06, "loss": 28.4531, "step": 13675 }, { "epoch": 0.12945731297507596, "grad_norm": 486.74371337890625, "learning_rate": 1.948573775817579e-06, "loss": 20.6016, "step": 13676 }, { "epoch": 0.12946677899679102, "grad_norm": 1015.3807373046875, "learning_rate": 1.9485640702800572e-06, "loss": 52.4844, "step": 13677 }, { "epoch": 0.12947624501850608, "grad_norm": 426.529296875, "learning_rate": 1.948554363850947e-06, "loss": 32.4375, "step": 13678 }, { "epoch": 0.12948571104022114, "grad_norm": 3.178098440170288, "learning_rate": 1.9485446565302577e-06, "loss": 0.9868, "step": 13679 }, { "epoch": 0.12949517706193617, "grad_norm": 397.0461120605469, "learning_rate": 1.948534948317998e-06, "loss": 46.6797, "step": 13680 }, { "epoch": 0.12950464308365123, "grad_norm": 357.3697204589844, "learning_rate": 1.9485252392141768e-06, "loss": 14.3008, "step": 13681 }, { "epoch": 0.1295141091053663, "grad_norm": 461.2657470703125, "learning_rate": 1.9485155292188034e-06, "loss": 32.7891, "step": 13682 }, { "epoch": 0.12952357512708135, "grad_norm": 5.678762435913086, "learning_rate": 1.9485058183318876e-06, "loss": 0.9321, "step": 13683 }, { "epoch": 0.1295330411487964, "grad_norm": 513.2185668945312, "learning_rate": 1.948496106553438e-06, "loss": 26.625, "step": 13684 }, { "epoch": 0.12954250717051144, "grad_norm": 232.52882385253906, "learning_rate": 1.9484863938834636e-06, "loss": 23.8047, "step": 13685 }, { "epoch": 0.1295519731922265, "grad_norm": 325.9921569824219, "learning_rate": 1.9484766803219735e-06, "loss": 16.6406, "step": 13686 }, { "epoch": 0.12956143921394156, "grad_norm": 1387.1519775390625, "learning_rate": 1.9484669658689774e-06, "loss": 24.4648, "step": 13687 }, { "epoch": 0.12957090523565662, "grad_norm": 305.33843994140625, "learning_rate": 1.948457250524484e-06, "loss": 19.332, "step": 13688 }, { "epoch": 0.12958037125737165, "grad_norm": 188.04489135742188, "learning_rate": 1.9484475342885025e-06, "loss": 19.3047, "step": 13689 }, { "epoch": 0.1295898372790867, "grad_norm": 197.13885498046875, "learning_rate": 1.948437817161042e-06, "loss": 19.9141, "step": 13690 }, { "epoch": 0.12959930330080177, "grad_norm": 395.3216857910156, "learning_rate": 1.948428099142111e-06, "loss": 7.3438, "step": 13691 }, { "epoch": 0.12960876932251683, "grad_norm": 408.4391784667969, "learning_rate": 1.9484183802317203e-06, "loss": 25.3398, "step": 13692 }, { "epoch": 0.1296182353442319, "grad_norm": 248.34498596191406, "learning_rate": 1.9484086604298776e-06, "loss": 19.1172, "step": 13693 }, { "epoch": 0.12962770136594692, "grad_norm": 564.5699462890625, "learning_rate": 1.9483989397365925e-06, "loss": 36.7188, "step": 13694 }, { "epoch": 0.12963716738766198, "grad_norm": 454.06182861328125, "learning_rate": 1.9483892181518738e-06, "loss": 60.6094, "step": 13695 }, { "epoch": 0.12964663340937704, "grad_norm": 702.4337768554688, "learning_rate": 1.9483794956757312e-06, "loss": 45.6094, "step": 13696 }, { "epoch": 0.1296560994310921, "grad_norm": 234.65603637695312, "learning_rate": 1.9483697723081738e-06, "loss": 17.2344, "step": 13697 }, { "epoch": 0.12966556545280714, "grad_norm": 460.9067687988281, "learning_rate": 1.94836004804921e-06, "loss": 30.1562, "step": 13698 }, { "epoch": 0.1296750314745222, "grad_norm": 262.3610534667969, "learning_rate": 1.9483503228988496e-06, "loss": 20.3672, "step": 13699 }, { "epoch": 0.12968449749623726, "grad_norm": 3.4084267616271973, "learning_rate": 1.948340596857102e-06, "loss": 0.8286, "step": 13700 }, { "epoch": 0.12969396351795232, "grad_norm": 1155.7418212890625, "learning_rate": 1.9483308699239754e-06, "loss": 61.7188, "step": 13701 }, { "epoch": 0.12970342953966738, "grad_norm": 3.316438913345337, "learning_rate": 1.9483211420994797e-06, "loss": 0.8977, "step": 13702 }, { "epoch": 0.1297128955613824, "grad_norm": 290.56732177734375, "learning_rate": 1.948311413383624e-06, "loss": 21.5625, "step": 13703 }, { "epoch": 0.12972236158309747, "grad_norm": 367.9682922363281, "learning_rate": 1.948301683776417e-06, "loss": 25.1172, "step": 13704 }, { "epoch": 0.12973182760481253, "grad_norm": 712.9367065429688, "learning_rate": 1.9482919532778683e-06, "loss": 27.9219, "step": 13705 }, { "epoch": 0.1297412936265276, "grad_norm": 587.9979248046875, "learning_rate": 1.9482822218879864e-06, "loss": 57.3281, "step": 13706 }, { "epoch": 0.12975075964824262, "grad_norm": 405.4967956542969, "learning_rate": 1.9482724896067815e-06, "loss": 29.1797, "step": 13707 }, { "epoch": 0.12976022566995768, "grad_norm": 311.7378845214844, "learning_rate": 1.9482627564342616e-06, "loss": 14.957, "step": 13708 }, { "epoch": 0.12976969169167274, "grad_norm": 286.0611267089844, "learning_rate": 1.948253022370437e-06, "loss": 15.7695, "step": 13709 }, { "epoch": 0.1297791577133878, "grad_norm": 497.6700134277344, "learning_rate": 1.9482432874153158e-06, "loss": 19.5859, "step": 13710 }, { "epoch": 0.12978862373510286, "grad_norm": 793.4006958007812, "learning_rate": 1.948233551568908e-06, "loss": 25.125, "step": 13711 }, { "epoch": 0.1297980897568179, "grad_norm": 384.1619567871094, "learning_rate": 1.948223814831222e-06, "loss": 35.5078, "step": 13712 }, { "epoch": 0.12980755577853295, "grad_norm": 331.5847473144531, "learning_rate": 1.948214077202267e-06, "loss": 24.5703, "step": 13713 }, { "epoch": 0.129817021800248, "grad_norm": 521.7432861328125, "learning_rate": 1.948204338682053e-06, "loss": 33.2031, "step": 13714 }, { "epoch": 0.12982648782196307, "grad_norm": 235.61404418945312, "learning_rate": 1.9481945992705887e-06, "loss": 17.9062, "step": 13715 }, { "epoch": 0.1298359538436781, "grad_norm": 502.8157958984375, "learning_rate": 1.9481848589678827e-06, "loss": 53.8906, "step": 13716 }, { "epoch": 0.12984541986539316, "grad_norm": 148.44696044921875, "learning_rate": 1.9481751177739447e-06, "loss": 18.6719, "step": 13717 }, { "epoch": 0.12985488588710822, "grad_norm": 501.31103515625, "learning_rate": 1.948165375688784e-06, "loss": 10.7539, "step": 13718 }, { "epoch": 0.12986435190882328, "grad_norm": 553.6865844726562, "learning_rate": 1.948155632712409e-06, "loss": 28.75, "step": 13719 }, { "epoch": 0.12987381793053834, "grad_norm": 3.2859911918640137, "learning_rate": 1.9481458888448303e-06, "loss": 0.9316, "step": 13720 }, { "epoch": 0.12988328395225338, "grad_norm": 2.6172268390655518, "learning_rate": 1.9481361440860554e-06, "loss": 0.8486, "step": 13721 }, { "epoch": 0.12989274997396844, "grad_norm": 374.82977294921875, "learning_rate": 1.9481263984360944e-06, "loss": 19.3281, "step": 13722 }, { "epoch": 0.1299022159956835, "grad_norm": 300.0610046386719, "learning_rate": 1.9481166518949564e-06, "loss": 15.9297, "step": 13723 }, { "epoch": 0.12991168201739856, "grad_norm": 363.79937744140625, "learning_rate": 1.9481069044626508e-06, "loss": 21.3438, "step": 13724 }, { "epoch": 0.12992114803911362, "grad_norm": 265.367431640625, "learning_rate": 1.9480971561391856e-06, "loss": 30.625, "step": 13725 }, { "epoch": 0.12993061406082865, "grad_norm": 354.7070007324219, "learning_rate": 1.948087406924571e-06, "loss": 35.4297, "step": 13726 }, { "epoch": 0.1299400800825437, "grad_norm": 212.59730529785156, "learning_rate": 1.948077656818816e-06, "loss": 16.5078, "step": 13727 }, { "epoch": 0.12994954610425877, "grad_norm": 1030.2716064453125, "learning_rate": 1.94806790582193e-06, "loss": 76.4766, "step": 13728 }, { "epoch": 0.12995901212597383, "grad_norm": 395.3304443359375, "learning_rate": 1.9480581539339213e-06, "loss": 20.4219, "step": 13729 }, { "epoch": 0.12996847814768886, "grad_norm": 431.3189392089844, "learning_rate": 1.9480484011548e-06, "loss": 19.9844, "step": 13730 }, { "epoch": 0.12997794416940392, "grad_norm": 237.58885192871094, "learning_rate": 1.9480386474845743e-06, "loss": 25.9531, "step": 13731 }, { "epoch": 0.12998741019111898, "grad_norm": 432.94049072265625, "learning_rate": 1.9480288929232545e-06, "loss": 32.0, "step": 13732 }, { "epoch": 0.12999687621283404, "grad_norm": 515.9557495117188, "learning_rate": 1.948019137470849e-06, "loss": 29.6797, "step": 13733 }, { "epoch": 0.1300063422345491, "grad_norm": 309.56378173828125, "learning_rate": 1.9480093811273674e-06, "loss": 25.5156, "step": 13734 }, { "epoch": 0.13001580825626413, "grad_norm": 526.5560913085938, "learning_rate": 1.9479996238928183e-06, "loss": 59.0781, "step": 13735 }, { "epoch": 0.1300252742779792, "grad_norm": 370.5536804199219, "learning_rate": 1.9479898657672117e-06, "loss": 18.3672, "step": 13736 }, { "epoch": 0.13003474029969425, "grad_norm": 711.9592895507812, "learning_rate": 1.9479801067505558e-06, "loss": 25.4609, "step": 13737 }, { "epoch": 0.1300442063214093, "grad_norm": 281.97222900390625, "learning_rate": 1.947970346842861e-06, "loss": 16.4375, "step": 13738 }, { "epoch": 0.13005367234312434, "grad_norm": 686.52294921875, "learning_rate": 1.947960586044135e-06, "loss": 22.6094, "step": 13739 }, { "epoch": 0.1300631383648394, "grad_norm": 215.41891479492188, "learning_rate": 1.9479508243543877e-06, "loss": 19.1953, "step": 13740 }, { "epoch": 0.13007260438655446, "grad_norm": 1099.165283203125, "learning_rate": 1.947941061773629e-06, "loss": 24.9062, "step": 13741 }, { "epoch": 0.13008207040826952, "grad_norm": 175.18055725097656, "learning_rate": 1.9479312983018667e-06, "loss": 18.4062, "step": 13742 }, { "epoch": 0.13009153642998458, "grad_norm": 721.9402465820312, "learning_rate": 1.9479215339391106e-06, "loss": 52.1719, "step": 13743 }, { "epoch": 0.13010100245169962, "grad_norm": 308.0867004394531, "learning_rate": 1.9479117686853703e-06, "loss": 13.4062, "step": 13744 }, { "epoch": 0.13011046847341468, "grad_norm": 808.90771484375, "learning_rate": 1.9479020025406546e-06, "loss": 64.4922, "step": 13745 }, { "epoch": 0.13011993449512974, "grad_norm": 281.0667724609375, "learning_rate": 1.9478922355049726e-06, "loss": 21.2109, "step": 13746 }, { "epoch": 0.1301294005168448, "grad_norm": 512.1596069335938, "learning_rate": 1.947882467578334e-06, "loss": 30.25, "step": 13747 }, { "epoch": 0.13013886653855983, "grad_norm": 170.10411071777344, "learning_rate": 1.9478726987607464e-06, "loss": 16.0469, "step": 13748 }, { "epoch": 0.1301483325602749, "grad_norm": 300.6065368652344, "learning_rate": 1.947862929052221e-06, "loss": 35.1562, "step": 13749 }, { "epoch": 0.13015779858198995, "grad_norm": 429.9144592285156, "learning_rate": 1.947853158452766e-06, "loss": 31.3125, "step": 13750 }, { "epoch": 0.130167264603705, "grad_norm": 429.28594970703125, "learning_rate": 1.9478433869623906e-06, "loss": 16.0625, "step": 13751 }, { "epoch": 0.13017673062542007, "grad_norm": 386.48052978515625, "learning_rate": 1.947833614581104e-06, "loss": 26.0547, "step": 13752 }, { "epoch": 0.1301861966471351, "grad_norm": 328.0283203125, "learning_rate": 1.9478238413089152e-06, "loss": 16.2031, "step": 13753 }, { "epoch": 0.13019566266885016, "grad_norm": 235.47731018066406, "learning_rate": 1.947814067145834e-06, "loss": 17.7344, "step": 13754 }, { "epoch": 0.13020512869056522, "grad_norm": 870.3026733398438, "learning_rate": 1.9478042920918697e-06, "loss": 77.7656, "step": 13755 }, { "epoch": 0.13021459471228028, "grad_norm": 431.19635009765625, "learning_rate": 1.9477945161470303e-06, "loss": 22.4844, "step": 13756 }, { "epoch": 0.1302240607339953, "grad_norm": 384.0390625, "learning_rate": 1.947784739311326e-06, "loss": 27.3984, "step": 13757 }, { "epoch": 0.13023352675571037, "grad_norm": 265.17242431640625, "learning_rate": 1.9477749615847656e-06, "loss": 19.3984, "step": 13758 }, { "epoch": 0.13024299277742543, "grad_norm": 316.0339050292969, "learning_rate": 1.9477651829673586e-06, "loss": 36.6328, "step": 13759 }, { "epoch": 0.1302524587991405, "grad_norm": 506.13958740234375, "learning_rate": 1.947755403459114e-06, "loss": 21.3906, "step": 13760 }, { "epoch": 0.13026192482085555, "grad_norm": 715.7947387695312, "learning_rate": 1.9477456230600407e-06, "loss": 9.4766, "step": 13761 }, { "epoch": 0.13027139084257058, "grad_norm": 230.68238830566406, "learning_rate": 1.9477358417701485e-06, "loss": 9.6836, "step": 13762 }, { "epoch": 0.13028085686428564, "grad_norm": 457.3471984863281, "learning_rate": 1.947726059589446e-06, "loss": 46.1562, "step": 13763 }, { "epoch": 0.1302903228860007, "grad_norm": 320.5682067871094, "learning_rate": 1.9477162765179426e-06, "loss": 16.0156, "step": 13764 }, { "epoch": 0.13029978890771576, "grad_norm": 254.42092895507812, "learning_rate": 1.947706492555648e-06, "loss": 20.9297, "step": 13765 }, { "epoch": 0.1303092549294308, "grad_norm": 434.3868103027344, "learning_rate": 1.9476967077025702e-06, "loss": 30.9141, "step": 13766 }, { "epoch": 0.13031872095114586, "grad_norm": 230.12210083007812, "learning_rate": 1.9476869219587196e-06, "loss": 30.7812, "step": 13767 }, { "epoch": 0.13032818697286092, "grad_norm": 176.3850860595703, "learning_rate": 1.947677135324105e-06, "loss": 17.793, "step": 13768 }, { "epoch": 0.13033765299457598, "grad_norm": 3.206979274749756, "learning_rate": 1.9476673477987354e-06, "loss": 0.9517, "step": 13769 }, { "epoch": 0.13034711901629104, "grad_norm": 192.40838623046875, "learning_rate": 1.9476575593826203e-06, "loss": 19.0, "step": 13770 }, { "epoch": 0.13035658503800607, "grad_norm": 268.4371337890625, "learning_rate": 1.947647770075769e-06, "loss": 10.7734, "step": 13771 }, { "epoch": 0.13036605105972113, "grad_norm": 618.6311645507812, "learning_rate": 1.94763797987819e-06, "loss": 19.1836, "step": 13772 }, { "epoch": 0.1303755170814362, "grad_norm": 292.6095275878906, "learning_rate": 1.947628188789893e-06, "loss": 17.0352, "step": 13773 }, { "epoch": 0.13038498310315125, "grad_norm": 149.7078399658203, "learning_rate": 1.947618396810887e-06, "loss": 18.1641, "step": 13774 }, { "epoch": 0.13039444912486628, "grad_norm": 328.9084777832031, "learning_rate": 1.9476086039411817e-06, "loss": 17.4453, "step": 13775 }, { "epoch": 0.13040391514658134, "grad_norm": 323.46832275390625, "learning_rate": 1.9475988101807855e-06, "loss": 30.1875, "step": 13776 }, { "epoch": 0.1304133811682964, "grad_norm": 398.6884765625, "learning_rate": 1.9475890155297084e-06, "loss": 46.5156, "step": 13777 }, { "epoch": 0.13042284719001146, "grad_norm": 486.7673645019531, "learning_rate": 1.9475792199879596e-06, "loss": 39.75, "step": 13778 }, { "epoch": 0.13043231321172652, "grad_norm": 311.9058837890625, "learning_rate": 1.9475694235555477e-06, "loss": 16.3828, "step": 13779 }, { "epoch": 0.13044177923344155, "grad_norm": 677.853759765625, "learning_rate": 1.947559626232482e-06, "loss": 31.0078, "step": 13780 }, { "epoch": 0.1304512452551566, "grad_norm": 524.8007202148438, "learning_rate": 1.947549828018772e-06, "loss": 53.3125, "step": 13781 }, { "epoch": 0.13046071127687167, "grad_norm": 214.2718048095703, "learning_rate": 1.9475400289144266e-06, "loss": 21.5, "step": 13782 }, { "epoch": 0.13047017729858673, "grad_norm": 209.82102966308594, "learning_rate": 1.9475302289194555e-06, "loss": 7.957, "step": 13783 }, { "epoch": 0.13047964332030176, "grad_norm": 3.1465718746185303, "learning_rate": 1.9475204280338673e-06, "loss": 0.894, "step": 13784 }, { "epoch": 0.13048910934201682, "grad_norm": 568.29248046875, "learning_rate": 1.947510626257672e-06, "loss": 31.3203, "step": 13785 }, { "epoch": 0.13049857536373188, "grad_norm": 267.66357421875, "learning_rate": 1.947500823590878e-06, "loss": 34.875, "step": 13786 }, { "epoch": 0.13050804138544694, "grad_norm": 281.4431457519531, "learning_rate": 1.947491020033495e-06, "loss": 37.0938, "step": 13787 }, { "epoch": 0.130517507407162, "grad_norm": 2.999124765396118, "learning_rate": 1.9474812155855324e-06, "loss": 0.8262, "step": 13788 }, { "epoch": 0.13052697342887704, "grad_norm": 309.942626953125, "learning_rate": 1.9474714102469984e-06, "loss": 18.9609, "step": 13789 }, { "epoch": 0.1305364394505921, "grad_norm": 960.1173706054688, "learning_rate": 1.9474616040179034e-06, "loss": 32.1094, "step": 13790 }, { "epoch": 0.13054590547230716, "grad_norm": 193.498046875, "learning_rate": 1.9474517968982556e-06, "loss": 16.2109, "step": 13791 }, { "epoch": 0.13055537149402222, "grad_norm": 3.676231861114502, "learning_rate": 1.9474419888880654e-06, "loss": 0.8496, "step": 13792 }, { "epoch": 0.13056483751573725, "grad_norm": 258.36383056640625, "learning_rate": 1.947432179987341e-06, "loss": 40.0938, "step": 13793 }, { "epoch": 0.1305743035374523, "grad_norm": 302.2572021484375, "learning_rate": 1.947422370196092e-06, "loss": 17.3086, "step": 13794 }, { "epoch": 0.13058376955916737, "grad_norm": 852.1910400390625, "learning_rate": 1.947412559514328e-06, "loss": 71.7344, "step": 13795 }, { "epoch": 0.13059323558088243, "grad_norm": 183.38291931152344, "learning_rate": 1.9474027479420574e-06, "loss": 20.7891, "step": 13796 }, { "epoch": 0.1306027016025975, "grad_norm": 222.1390838623047, "learning_rate": 1.9473929354792903e-06, "loss": 18.7031, "step": 13797 }, { "epoch": 0.13061216762431252, "grad_norm": 172.75775146484375, "learning_rate": 1.947383122126035e-06, "loss": 18.5703, "step": 13798 }, { "epoch": 0.13062163364602758, "grad_norm": 561.1577758789062, "learning_rate": 1.9473733078823013e-06, "loss": 26.6172, "step": 13799 }, { "epoch": 0.13063109966774264, "grad_norm": 379.38970947265625, "learning_rate": 1.9473634927480985e-06, "loss": 13.2695, "step": 13800 }, { "epoch": 0.1306405656894577, "grad_norm": 322.15472412109375, "learning_rate": 1.9473536767234355e-06, "loss": 19.9062, "step": 13801 }, { "epoch": 0.13065003171117273, "grad_norm": 352.7821350097656, "learning_rate": 1.9473438598083218e-06, "loss": 22.0312, "step": 13802 }, { "epoch": 0.1306594977328878, "grad_norm": 389.6063537597656, "learning_rate": 1.947334042002766e-06, "loss": 46.7266, "step": 13803 }, { "epoch": 0.13066896375460285, "grad_norm": 214.15542602539062, "learning_rate": 1.9473242233067782e-06, "loss": 14.2227, "step": 13804 }, { "epoch": 0.1306784297763179, "grad_norm": 650.0516967773438, "learning_rate": 1.9473144037203675e-06, "loss": 84.2188, "step": 13805 }, { "epoch": 0.13068789579803297, "grad_norm": 369.3839111328125, "learning_rate": 1.9473045832435425e-06, "loss": 18.7969, "step": 13806 }, { "epoch": 0.130697361819748, "grad_norm": 740.2755126953125, "learning_rate": 1.9472947618763135e-06, "loss": 53.0, "step": 13807 }, { "epoch": 0.13070682784146306, "grad_norm": 525.0451049804688, "learning_rate": 1.947284939618688e-06, "loss": 39.9141, "step": 13808 }, { "epoch": 0.13071629386317812, "grad_norm": 441.5844421386719, "learning_rate": 1.947275116470677e-06, "loss": 39.8125, "step": 13809 }, { "epoch": 0.13072575988489318, "grad_norm": 240.27952575683594, "learning_rate": 1.9472652924322886e-06, "loss": 16.7734, "step": 13810 }, { "epoch": 0.13073522590660824, "grad_norm": 318.4177551269531, "learning_rate": 1.947255467503533e-06, "loss": 9.2539, "step": 13811 }, { "epoch": 0.13074469192832328, "grad_norm": 433.6068115234375, "learning_rate": 1.9472456416844183e-06, "loss": 36.3281, "step": 13812 }, { "epoch": 0.13075415795003834, "grad_norm": 700.8360595703125, "learning_rate": 1.9472358149749547e-06, "loss": 20.3242, "step": 13813 }, { "epoch": 0.1307636239717534, "grad_norm": 228.82296752929688, "learning_rate": 1.947225987375151e-06, "loss": 21.7969, "step": 13814 }, { "epoch": 0.13077308999346846, "grad_norm": 328.58575439453125, "learning_rate": 1.9472161588850166e-06, "loss": 20.4297, "step": 13815 }, { "epoch": 0.1307825560151835, "grad_norm": 362.2098083496094, "learning_rate": 1.9472063295045603e-06, "loss": 43.0, "step": 13816 }, { "epoch": 0.13079202203689855, "grad_norm": 606.9033203125, "learning_rate": 1.947196499233792e-06, "loss": 20.6133, "step": 13817 }, { "epoch": 0.1308014880586136, "grad_norm": 442.9356384277344, "learning_rate": 1.9471866680727204e-06, "loss": 46.1406, "step": 13818 }, { "epoch": 0.13081095408032867, "grad_norm": 422.4750671386719, "learning_rate": 1.947176836021355e-06, "loss": 18.7109, "step": 13819 }, { "epoch": 0.13082042010204373, "grad_norm": 688.1630249023438, "learning_rate": 1.947167003079705e-06, "loss": 40.9453, "step": 13820 }, { "epoch": 0.13082988612375876, "grad_norm": 251.02310180664062, "learning_rate": 1.9471571692477797e-06, "loss": 19.4883, "step": 13821 }, { "epoch": 0.13083935214547382, "grad_norm": 390.55474853515625, "learning_rate": 1.947147334525588e-06, "loss": 42.2031, "step": 13822 }, { "epoch": 0.13084881816718888, "grad_norm": 3.8216464519500732, "learning_rate": 1.94713749891314e-06, "loss": 0.936, "step": 13823 }, { "epoch": 0.13085828418890394, "grad_norm": 2.6980273723602295, "learning_rate": 1.947127662410444e-06, "loss": 0.7969, "step": 13824 }, { "epoch": 0.13086775021061897, "grad_norm": 3.501498222351074, "learning_rate": 1.947117825017509e-06, "loss": 1.0415, "step": 13825 }, { "epoch": 0.13087721623233403, "grad_norm": 3.0993666648864746, "learning_rate": 1.9471079867343457e-06, "loss": 0.9392, "step": 13826 }, { "epoch": 0.1308866822540491, "grad_norm": 478.1569519042969, "learning_rate": 1.9470981475609625e-06, "loss": 29.4375, "step": 13827 }, { "epoch": 0.13089614827576415, "grad_norm": 547.1470947265625, "learning_rate": 1.9470883074973684e-06, "loss": 35.4453, "step": 13828 }, { "epoch": 0.1309056142974792, "grad_norm": 208.38470458984375, "learning_rate": 1.947078466543573e-06, "loss": 21.4531, "step": 13829 }, { "epoch": 0.13091508031919424, "grad_norm": 283.24127197265625, "learning_rate": 1.947068624699585e-06, "loss": 16.5352, "step": 13830 }, { "epoch": 0.1309245463409093, "grad_norm": 694.0178833007812, "learning_rate": 1.9470587819654145e-06, "loss": 32.25, "step": 13831 }, { "epoch": 0.13093401236262436, "grad_norm": 776.2769165039062, "learning_rate": 1.9470489383410705e-06, "loss": 48.9688, "step": 13832 }, { "epoch": 0.13094347838433942, "grad_norm": 199.5000762939453, "learning_rate": 1.947039093826562e-06, "loss": 28.3398, "step": 13833 }, { "epoch": 0.13095294440605446, "grad_norm": 518.2146606445312, "learning_rate": 1.947029248421898e-06, "loss": 54.6094, "step": 13834 }, { "epoch": 0.13096241042776952, "grad_norm": 271.1068420410156, "learning_rate": 1.9470194021270884e-06, "loss": 20.2188, "step": 13835 }, { "epoch": 0.13097187644948458, "grad_norm": 304.0566711425781, "learning_rate": 1.9470095549421423e-06, "loss": 24.3438, "step": 13836 }, { "epoch": 0.13098134247119964, "grad_norm": 951.1771240234375, "learning_rate": 1.946999706867069e-06, "loss": 29.7969, "step": 13837 }, { "epoch": 0.1309908084929147, "grad_norm": 262.57733154296875, "learning_rate": 1.946989857901877e-06, "loss": 20.2266, "step": 13838 }, { "epoch": 0.13100027451462973, "grad_norm": 690.8037719726562, "learning_rate": 1.9469800080465767e-06, "loss": 50.7344, "step": 13839 }, { "epoch": 0.1310097405363448, "grad_norm": 600.9609985351562, "learning_rate": 1.9469701573011763e-06, "loss": 17.6016, "step": 13840 }, { "epoch": 0.13101920655805985, "grad_norm": 1386.604736328125, "learning_rate": 1.946960305665686e-06, "loss": 9.4297, "step": 13841 }, { "epoch": 0.1310286725797749, "grad_norm": 299.7195129394531, "learning_rate": 1.9469504531401144e-06, "loss": 31.9609, "step": 13842 }, { "epoch": 0.13103813860148994, "grad_norm": 425.6907958984375, "learning_rate": 1.946940599724471e-06, "loss": 29.5625, "step": 13843 }, { "epoch": 0.131047604623205, "grad_norm": 546.6639404296875, "learning_rate": 1.946930745418765e-06, "loss": 53.0781, "step": 13844 }, { "epoch": 0.13105707064492006, "grad_norm": 120.85346984863281, "learning_rate": 1.9469208902230053e-06, "loss": 16.6328, "step": 13845 }, { "epoch": 0.13106653666663512, "grad_norm": 395.3578186035156, "learning_rate": 1.9469110341372023e-06, "loss": 50.4688, "step": 13846 }, { "epoch": 0.13107600268835018, "grad_norm": 279.63812255859375, "learning_rate": 1.9469011771613643e-06, "loss": 17.6328, "step": 13847 }, { "epoch": 0.1310854687100652, "grad_norm": 315.69476318359375, "learning_rate": 1.9468913192955005e-06, "loss": 17.5156, "step": 13848 }, { "epoch": 0.13109493473178027, "grad_norm": 239.50277709960938, "learning_rate": 1.946881460539621e-06, "loss": 12.1914, "step": 13849 }, { "epoch": 0.13110440075349533, "grad_norm": 235.89039611816406, "learning_rate": 1.946871600893734e-06, "loss": 19.6914, "step": 13850 }, { "epoch": 0.1311138667752104, "grad_norm": 281.43389892578125, "learning_rate": 1.9468617403578495e-06, "loss": 32.0938, "step": 13851 }, { "epoch": 0.13112333279692542, "grad_norm": 194.07205200195312, "learning_rate": 1.9468518789319765e-06, "loss": 16.6484, "step": 13852 }, { "epoch": 0.13113279881864048, "grad_norm": 406.2918701171875, "learning_rate": 1.9468420166161242e-06, "loss": 45.4219, "step": 13853 }, { "epoch": 0.13114226484035554, "grad_norm": 433.3636474609375, "learning_rate": 1.946832153410302e-06, "loss": 38.5312, "step": 13854 }, { "epoch": 0.1311517308620706, "grad_norm": 273.7806396484375, "learning_rate": 1.9468222893145194e-06, "loss": 17.5547, "step": 13855 }, { "epoch": 0.13116119688378566, "grad_norm": 133.23446655273438, "learning_rate": 1.946812424328786e-06, "loss": 12.4102, "step": 13856 }, { "epoch": 0.1311706629055007, "grad_norm": 242.21112060546875, "learning_rate": 1.9468025584531096e-06, "loss": 21.7656, "step": 13857 }, { "epoch": 0.13118012892721576, "grad_norm": 403.0055847167969, "learning_rate": 1.9467926916875006e-06, "loss": 28.8516, "step": 13858 }, { "epoch": 0.13118959494893082, "grad_norm": 377.13922119140625, "learning_rate": 1.946782824031968e-06, "loss": 29.5547, "step": 13859 }, { "epoch": 0.13119906097064588, "grad_norm": 501.4432067871094, "learning_rate": 1.9467729554865217e-06, "loss": 46.8672, "step": 13860 }, { "epoch": 0.1312085269923609, "grad_norm": 266.2618103027344, "learning_rate": 1.94676308605117e-06, "loss": 14.0469, "step": 13861 }, { "epoch": 0.13121799301407597, "grad_norm": 434.32379150390625, "learning_rate": 1.9467532157259224e-06, "loss": 33.6016, "step": 13862 }, { "epoch": 0.13122745903579103, "grad_norm": 459.33984375, "learning_rate": 1.9467433445107884e-06, "loss": 19.1641, "step": 13863 }, { "epoch": 0.1312369250575061, "grad_norm": 150.46087646484375, "learning_rate": 1.9467334724057777e-06, "loss": 18.3086, "step": 13864 }, { "epoch": 0.13124639107922115, "grad_norm": 382.0835876464844, "learning_rate": 1.9467235994108987e-06, "loss": 19.5078, "step": 13865 }, { "epoch": 0.13125585710093618, "grad_norm": 3.1390624046325684, "learning_rate": 1.9467137255261613e-06, "loss": 0.9512, "step": 13866 }, { "epoch": 0.13126532312265124, "grad_norm": 278.6279296875, "learning_rate": 1.9467038507515746e-06, "loss": 16.3047, "step": 13867 }, { "epoch": 0.1312747891443663, "grad_norm": 259.8035888671875, "learning_rate": 1.9466939750871476e-06, "loss": 18.4531, "step": 13868 }, { "epoch": 0.13128425516608136, "grad_norm": 542.2779541015625, "learning_rate": 1.94668409853289e-06, "loss": 34.8438, "step": 13869 }, { "epoch": 0.1312937211877964, "grad_norm": 532.5531005859375, "learning_rate": 1.9466742210888113e-06, "loss": 40.8281, "step": 13870 }, { "epoch": 0.13130318720951145, "grad_norm": 213.27151489257812, "learning_rate": 1.94666434275492e-06, "loss": 22.0859, "step": 13871 }, { "epoch": 0.1313126532312265, "grad_norm": 424.1556701660156, "learning_rate": 1.946654463531226e-06, "loss": 11.7266, "step": 13872 }, { "epoch": 0.13132211925294157, "grad_norm": 355.64404296875, "learning_rate": 1.946644583417738e-06, "loss": 34.375, "step": 13873 }, { "epoch": 0.13133158527465663, "grad_norm": 266.498046875, "learning_rate": 1.946634702414466e-06, "loss": 31.168, "step": 13874 }, { "epoch": 0.13134105129637166, "grad_norm": 482.2874755859375, "learning_rate": 1.946624820521419e-06, "loss": 30.8125, "step": 13875 }, { "epoch": 0.13135051731808672, "grad_norm": 311.1220703125, "learning_rate": 1.9466149377386064e-06, "loss": 21.4297, "step": 13876 }, { "epoch": 0.13135998333980178, "grad_norm": 452.15447998046875, "learning_rate": 1.946605054066037e-06, "loss": 45.1719, "step": 13877 }, { "epoch": 0.13136944936151684, "grad_norm": 602.1753540039062, "learning_rate": 1.9465951695037206e-06, "loss": 53.5156, "step": 13878 }, { "epoch": 0.13137891538323188, "grad_norm": 223.1750946044922, "learning_rate": 1.946585284051666e-06, "loss": 21.4922, "step": 13879 }, { "epoch": 0.13138838140494694, "grad_norm": 393.846923828125, "learning_rate": 1.946575397709883e-06, "loss": 19.3359, "step": 13880 }, { "epoch": 0.131397847426662, "grad_norm": 395.7481384277344, "learning_rate": 1.946565510478381e-06, "loss": 20.9375, "step": 13881 }, { "epoch": 0.13140731344837706, "grad_norm": 537.3142700195312, "learning_rate": 1.9465556223571687e-06, "loss": 49.875, "step": 13882 }, { "epoch": 0.13141677947009212, "grad_norm": 355.25189208984375, "learning_rate": 1.946545733346256e-06, "loss": 24.6094, "step": 13883 }, { "epoch": 0.13142624549180715, "grad_norm": 3.3584041595458984, "learning_rate": 1.9465358434456516e-06, "loss": 1.1641, "step": 13884 }, { "epoch": 0.1314357115135222, "grad_norm": 291.0155944824219, "learning_rate": 1.946525952655365e-06, "loss": 24.8516, "step": 13885 }, { "epoch": 0.13144517753523727, "grad_norm": 3.346717357635498, "learning_rate": 1.946516060975406e-06, "loss": 0.9624, "step": 13886 }, { "epoch": 0.13145464355695233, "grad_norm": 884.9251098632812, "learning_rate": 1.9465061684057833e-06, "loss": 46.75, "step": 13887 }, { "epoch": 0.13146410957866736, "grad_norm": 2.367349863052368, "learning_rate": 1.946496274946506e-06, "loss": 0.7676, "step": 13888 }, { "epoch": 0.13147357560038242, "grad_norm": 719.207275390625, "learning_rate": 1.9464863805975843e-06, "loss": 60.5312, "step": 13889 }, { "epoch": 0.13148304162209748, "grad_norm": 8.703301429748535, "learning_rate": 1.9464764853590264e-06, "loss": 0.8872, "step": 13890 }, { "epoch": 0.13149250764381254, "grad_norm": 159.3852081298828, "learning_rate": 1.946466589230843e-06, "loss": 14.2422, "step": 13891 }, { "epoch": 0.1315019736655276, "grad_norm": 174.36770629882812, "learning_rate": 1.946456692213042e-06, "loss": 16.4062, "step": 13892 }, { "epoch": 0.13151143968724263, "grad_norm": 3.0013692378997803, "learning_rate": 1.946446794305633e-06, "loss": 0.9751, "step": 13893 }, { "epoch": 0.1315209057089577, "grad_norm": 3.134794235229492, "learning_rate": 1.946436895508626e-06, "loss": 0.7861, "step": 13894 }, { "epoch": 0.13153037173067275, "grad_norm": 3.127345085144043, "learning_rate": 1.9464269958220297e-06, "loss": 0.9312, "step": 13895 }, { "epoch": 0.1315398377523878, "grad_norm": 224.1414337158203, "learning_rate": 1.9464170952458543e-06, "loss": 16.5156, "step": 13896 }, { "epoch": 0.13154930377410287, "grad_norm": 590.51708984375, "learning_rate": 1.9464071937801074e-06, "loss": 46.4531, "step": 13897 }, { "epoch": 0.1315587697958179, "grad_norm": 343.673095703125, "learning_rate": 1.9463972914248e-06, "loss": 42.0312, "step": 13898 }, { "epoch": 0.13156823581753296, "grad_norm": 434.4111022949219, "learning_rate": 1.9463873881799404e-06, "loss": 37.7656, "step": 13899 }, { "epoch": 0.13157770183924802, "grad_norm": 225.1030731201172, "learning_rate": 1.9463774840455384e-06, "loss": 10.1367, "step": 13900 }, { "epoch": 0.13158716786096308, "grad_norm": 530.70654296875, "learning_rate": 1.9463675790216027e-06, "loss": 40.1133, "step": 13901 }, { "epoch": 0.13159663388267812, "grad_norm": 945.7662353515625, "learning_rate": 1.9463576731081434e-06, "loss": 47.4141, "step": 13902 }, { "epoch": 0.13160609990439318, "grad_norm": 166.4092559814453, "learning_rate": 1.946347766305169e-06, "loss": 20.8984, "step": 13903 }, { "epoch": 0.13161556592610824, "grad_norm": 261.262939453125, "learning_rate": 1.94633785861269e-06, "loss": 19.9609, "step": 13904 }, { "epoch": 0.1316250319478233, "grad_norm": 216.20059204101562, "learning_rate": 1.9463279500307145e-06, "loss": 18.3125, "step": 13905 }, { "epoch": 0.13163449796953836, "grad_norm": 432.1065368652344, "learning_rate": 1.9463180405592523e-06, "loss": 48.0078, "step": 13906 }, { "epoch": 0.1316439639912534, "grad_norm": 515.6941528320312, "learning_rate": 1.9463081301983132e-06, "loss": 21.8906, "step": 13907 }, { "epoch": 0.13165343001296845, "grad_norm": 257.58551025390625, "learning_rate": 1.9462982189479057e-06, "loss": 17.6172, "step": 13908 }, { "epoch": 0.1316628960346835, "grad_norm": 364.2214660644531, "learning_rate": 1.9462883068080394e-06, "loss": 21.7656, "step": 13909 }, { "epoch": 0.13167236205639857, "grad_norm": 483.4699401855469, "learning_rate": 1.9462783937787233e-06, "loss": 45.5, "step": 13910 }, { "epoch": 0.1316818280781136, "grad_norm": 223.99476623535156, "learning_rate": 1.9462684798599676e-06, "loss": 19.1484, "step": 13911 }, { "epoch": 0.13169129409982866, "grad_norm": 2.883776903152466, "learning_rate": 1.946258565051781e-06, "loss": 0.9331, "step": 13912 }, { "epoch": 0.13170076012154372, "grad_norm": 2.6931827068328857, "learning_rate": 1.9462486493541727e-06, "loss": 0.854, "step": 13913 }, { "epoch": 0.13171022614325878, "grad_norm": 259.9792785644531, "learning_rate": 1.9462387327671522e-06, "loss": 19.1875, "step": 13914 }, { "epoch": 0.13171969216497384, "grad_norm": 437.6722412109375, "learning_rate": 1.946228815290729e-06, "loss": 32.2734, "step": 13915 }, { "epoch": 0.13172915818668887, "grad_norm": 1036.3748779296875, "learning_rate": 1.946218896924912e-06, "loss": 9.6602, "step": 13916 }, { "epoch": 0.13173862420840393, "grad_norm": 1362.1121826171875, "learning_rate": 1.946208977669711e-06, "loss": 29.8125, "step": 13917 }, { "epoch": 0.131748090230119, "grad_norm": 255.0959014892578, "learning_rate": 1.9461990575251354e-06, "loss": 39.3438, "step": 13918 }, { "epoch": 0.13175755625183405, "grad_norm": 510.7918701171875, "learning_rate": 1.946189136491194e-06, "loss": 10.2227, "step": 13919 }, { "epoch": 0.13176702227354908, "grad_norm": 390.7464599609375, "learning_rate": 1.9461792145678965e-06, "loss": 44.4219, "step": 13920 }, { "epoch": 0.13177648829526414, "grad_norm": 381.519287109375, "learning_rate": 1.946169291755252e-06, "loss": 19.0625, "step": 13921 }, { "epoch": 0.1317859543169792, "grad_norm": 201.6819610595703, "learning_rate": 1.9461593680532696e-06, "loss": 20.6055, "step": 13922 }, { "epoch": 0.13179542033869426, "grad_norm": 197.1574249267578, "learning_rate": 1.9461494434619593e-06, "loss": 14.5859, "step": 13923 }, { "epoch": 0.13180488636040932, "grad_norm": 894.3849487304688, "learning_rate": 1.94613951798133e-06, "loss": 61.9844, "step": 13924 }, { "epoch": 0.13181435238212436, "grad_norm": 287.45428466796875, "learning_rate": 1.946129591611391e-06, "loss": 22.5, "step": 13925 }, { "epoch": 0.13182381840383942, "grad_norm": 384.4999084472656, "learning_rate": 1.946119664352152e-06, "loss": 21.6172, "step": 13926 }, { "epoch": 0.13183328442555448, "grad_norm": 328.90313720703125, "learning_rate": 1.9461097362036217e-06, "loss": 35.3906, "step": 13927 }, { "epoch": 0.13184275044726954, "grad_norm": 239.62335205078125, "learning_rate": 1.9460998071658102e-06, "loss": 18.7891, "step": 13928 }, { "epoch": 0.13185221646898457, "grad_norm": 1101.528564453125, "learning_rate": 1.9460898772387264e-06, "loss": 16.1914, "step": 13929 }, { "epoch": 0.13186168249069963, "grad_norm": 249.6175537109375, "learning_rate": 1.946079946422379e-06, "loss": 19.0312, "step": 13930 }, { "epoch": 0.1318711485124147, "grad_norm": 636.847412109375, "learning_rate": 1.9460700147167785e-06, "loss": 33.7031, "step": 13931 }, { "epoch": 0.13188061453412975, "grad_norm": 738.2593383789062, "learning_rate": 1.946060082121934e-06, "loss": 62.1562, "step": 13932 }, { "epoch": 0.1318900805558448, "grad_norm": 2.869182586669922, "learning_rate": 1.9460501486378542e-06, "loss": 0.9712, "step": 13933 }, { "epoch": 0.13189954657755984, "grad_norm": 401.5758056640625, "learning_rate": 1.9460402142645488e-06, "loss": 42.0312, "step": 13934 }, { "epoch": 0.1319090125992749, "grad_norm": 574.9983520507812, "learning_rate": 1.9460302790020273e-06, "loss": 66.2188, "step": 13935 }, { "epoch": 0.13191847862098996, "grad_norm": 347.1741638183594, "learning_rate": 1.9460203428502987e-06, "loss": 22.2891, "step": 13936 }, { "epoch": 0.13192794464270502, "grad_norm": 270.5704040527344, "learning_rate": 1.9460104058093726e-06, "loss": 19.4375, "step": 13937 }, { "epoch": 0.13193741066442005, "grad_norm": 571.4815673828125, "learning_rate": 1.9460004678792585e-06, "loss": 61.625, "step": 13938 }, { "epoch": 0.1319468766861351, "grad_norm": 474.87506103515625, "learning_rate": 1.945990529059965e-06, "loss": 42.7969, "step": 13939 }, { "epoch": 0.13195634270785017, "grad_norm": 526.8367309570312, "learning_rate": 1.9459805893515025e-06, "loss": 46.3594, "step": 13940 }, { "epoch": 0.13196580872956523, "grad_norm": 349.48089599609375, "learning_rate": 1.9459706487538796e-06, "loss": 24.9453, "step": 13941 }, { "epoch": 0.1319752747512803, "grad_norm": 696.0598754882812, "learning_rate": 1.9459607072671054e-06, "loss": 29.4258, "step": 13942 }, { "epoch": 0.13198474077299532, "grad_norm": 321.1485595703125, "learning_rate": 1.9459507648911903e-06, "loss": 29.6094, "step": 13943 }, { "epoch": 0.13199420679471038, "grad_norm": 459.7760009765625, "learning_rate": 1.9459408216261425e-06, "loss": 23.625, "step": 13944 }, { "epoch": 0.13200367281642544, "grad_norm": 424.45111083984375, "learning_rate": 1.9459308774719722e-06, "loss": 47.7344, "step": 13945 }, { "epoch": 0.1320131388381405, "grad_norm": 728.9364013671875, "learning_rate": 1.9459209324286884e-06, "loss": 10.5703, "step": 13946 }, { "epoch": 0.13202260485985554, "grad_norm": 3.3510043621063232, "learning_rate": 1.9459109864963004e-06, "loss": 1.0066, "step": 13947 }, { "epoch": 0.1320320708815706, "grad_norm": 488.26507568359375, "learning_rate": 1.9459010396748175e-06, "loss": 39.5859, "step": 13948 }, { "epoch": 0.13204153690328566, "grad_norm": 251.10491943359375, "learning_rate": 1.9458910919642494e-06, "loss": 20.9688, "step": 13949 }, { "epoch": 0.13205100292500072, "grad_norm": 437.98095703125, "learning_rate": 1.945881143364605e-06, "loss": 57.7461, "step": 13950 }, { "epoch": 0.13206046894671578, "grad_norm": 166.7730255126953, "learning_rate": 1.9458711938758938e-06, "loss": 24.9141, "step": 13951 }, { "epoch": 0.1320699349684308, "grad_norm": 644.92822265625, "learning_rate": 1.9458612434981252e-06, "loss": 42.0469, "step": 13952 }, { "epoch": 0.13207940099014587, "grad_norm": 311.0833740234375, "learning_rate": 1.945851292231309e-06, "loss": 10.1172, "step": 13953 }, { "epoch": 0.13208886701186093, "grad_norm": 413.9958801269531, "learning_rate": 1.945841340075454e-06, "loss": 27.5625, "step": 13954 }, { "epoch": 0.132098333033576, "grad_norm": 3320.8720703125, "learning_rate": 1.945831387030569e-06, "loss": 36.25, "step": 13955 }, { "epoch": 0.13210779905529102, "grad_norm": 185.9911651611328, "learning_rate": 1.945821433096665e-06, "loss": 18.5859, "step": 13956 }, { "epoch": 0.13211726507700608, "grad_norm": 317.9609680175781, "learning_rate": 1.945811478273749e-06, "loss": 29.6328, "step": 13957 }, { "epoch": 0.13212673109872114, "grad_norm": 696.7667846679688, "learning_rate": 1.945801522561833e-06, "loss": 33.2812, "step": 13958 }, { "epoch": 0.1321361971204362, "grad_norm": 211.266845703125, "learning_rate": 1.945791565960925e-06, "loss": 16.1641, "step": 13959 }, { "epoch": 0.13214566314215126, "grad_norm": 233.521240234375, "learning_rate": 1.945781608471034e-06, "loss": 20.3125, "step": 13960 }, { "epoch": 0.1321551291638663, "grad_norm": 414.9678955078125, "learning_rate": 1.94577165009217e-06, "loss": 40.7812, "step": 13961 }, { "epoch": 0.13216459518558135, "grad_norm": 440.48492431640625, "learning_rate": 1.945761690824342e-06, "loss": 8.0391, "step": 13962 }, { "epoch": 0.1321740612072964, "grad_norm": 2.8680191040039062, "learning_rate": 1.94575173066756e-06, "loss": 0.9751, "step": 13963 }, { "epoch": 0.13218352722901147, "grad_norm": 532.6820068359375, "learning_rate": 1.9457417696218324e-06, "loss": 51.7656, "step": 13964 }, { "epoch": 0.1321929932507265, "grad_norm": 572.3438110351562, "learning_rate": 1.945731807687169e-06, "loss": 36.6016, "step": 13965 }, { "epoch": 0.13220245927244156, "grad_norm": 595.00341796875, "learning_rate": 1.9457218448635796e-06, "loss": 20.7891, "step": 13966 }, { "epoch": 0.13221192529415662, "grad_norm": 144.9382781982422, "learning_rate": 1.945711881151073e-06, "loss": 18.875, "step": 13967 }, { "epoch": 0.13222139131587168, "grad_norm": 265.1666564941406, "learning_rate": 1.945701916549659e-06, "loss": 21.1094, "step": 13968 }, { "epoch": 0.13223085733758674, "grad_norm": 526.660400390625, "learning_rate": 1.9456919510593465e-06, "loss": 19.6602, "step": 13969 }, { "epoch": 0.13224032335930178, "grad_norm": 128.69468688964844, "learning_rate": 1.945681984680145e-06, "loss": 19.7891, "step": 13970 }, { "epoch": 0.13224978938101684, "grad_norm": 595.4284057617188, "learning_rate": 1.945672017412064e-06, "loss": 46.6797, "step": 13971 }, { "epoch": 0.1322592554027319, "grad_norm": 396.3382263183594, "learning_rate": 1.9456620492551133e-06, "loss": 39.7188, "step": 13972 }, { "epoch": 0.13226872142444696, "grad_norm": 182.8799285888672, "learning_rate": 1.945652080209301e-06, "loss": 22.6328, "step": 13973 }, { "epoch": 0.132278187446162, "grad_norm": 485.30859375, "learning_rate": 1.9456421102746383e-06, "loss": 56.1094, "step": 13974 }, { "epoch": 0.13228765346787705, "grad_norm": 193.2859344482422, "learning_rate": 1.9456321394511326e-06, "loss": 12.5117, "step": 13975 }, { "epoch": 0.1322971194895921, "grad_norm": 224.33802795410156, "learning_rate": 1.9456221677387945e-06, "loss": 15.2109, "step": 13976 }, { "epoch": 0.13230658551130717, "grad_norm": 587.7809448242188, "learning_rate": 1.945612195137633e-06, "loss": 22.5117, "step": 13977 }, { "epoch": 0.13231605153302223, "grad_norm": 627.0035400390625, "learning_rate": 1.945602221647658e-06, "loss": 52.4219, "step": 13978 }, { "epoch": 0.13232551755473726, "grad_norm": 241.34335327148438, "learning_rate": 1.945592247268878e-06, "loss": 18.3945, "step": 13979 }, { "epoch": 0.13233498357645232, "grad_norm": 757.5731201171875, "learning_rate": 1.945582272001303e-06, "loss": 25.2891, "step": 13980 }, { "epoch": 0.13234444959816738, "grad_norm": 240.85231018066406, "learning_rate": 1.9455722958449417e-06, "loss": 10.0234, "step": 13981 }, { "epoch": 0.13235391561988244, "grad_norm": 445.7492370605469, "learning_rate": 1.9455623187998045e-06, "loss": 34.5312, "step": 13982 }, { "epoch": 0.1323633816415975, "grad_norm": 375.5331726074219, "learning_rate": 1.9455523408659e-06, "loss": 27.5, "step": 13983 }, { "epoch": 0.13237284766331253, "grad_norm": 435.4332275390625, "learning_rate": 1.9455423620432376e-06, "loss": 31.4766, "step": 13984 }, { "epoch": 0.1323823136850276, "grad_norm": 312.35943603515625, "learning_rate": 1.945532382331827e-06, "loss": 17.3438, "step": 13985 }, { "epoch": 0.13239177970674265, "grad_norm": 633.0208740234375, "learning_rate": 1.945522401731678e-06, "loss": 40.2969, "step": 13986 }, { "epoch": 0.1324012457284577, "grad_norm": 626.302001953125, "learning_rate": 1.945512420242799e-06, "loss": 30.0156, "step": 13987 }, { "epoch": 0.13241071175017274, "grad_norm": 318.468505859375, "learning_rate": 1.9455024378652e-06, "loss": 22.1562, "step": 13988 }, { "epoch": 0.1324201777718878, "grad_norm": 1213.4654541015625, "learning_rate": 1.94549245459889e-06, "loss": 42.4688, "step": 13989 }, { "epoch": 0.13242964379360286, "grad_norm": 440.8114318847656, "learning_rate": 1.9454824704438787e-06, "loss": 20.3203, "step": 13990 }, { "epoch": 0.13243910981531792, "grad_norm": 317.60693359375, "learning_rate": 1.9454724854001755e-06, "loss": 22.8672, "step": 13991 }, { "epoch": 0.13244857583703298, "grad_norm": 371.7138366699219, "learning_rate": 1.9454624994677895e-06, "loss": 33.2812, "step": 13992 }, { "epoch": 0.13245804185874802, "grad_norm": 445.5029296875, "learning_rate": 1.9454525126467304e-06, "loss": 37.3633, "step": 13993 }, { "epoch": 0.13246750788046308, "grad_norm": 403.0223693847656, "learning_rate": 1.9454425249370075e-06, "loss": 31.8906, "step": 13994 }, { "epoch": 0.13247697390217814, "grad_norm": 3.151064872741699, "learning_rate": 1.9454325363386297e-06, "loss": 0.9731, "step": 13995 }, { "epoch": 0.1324864399238932, "grad_norm": 188.68118286132812, "learning_rate": 1.9454225468516072e-06, "loss": 21.1641, "step": 13996 }, { "epoch": 0.13249590594560823, "grad_norm": 315.9077453613281, "learning_rate": 1.945412556475949e-06, "loss": 26.1328, "step": 13997 }, { "epoch": 0.1325053719673233, "grad_norm": 1034.1585693359375, "learning_rate": 1.9454025652116646e-06, "loss": 45.4219, "step": 13998 }, { "epoch": 0.13251483798903835, "grad_norm": 296.4239501953125, "learning_rate": 1.945392573058763e-06, "loss": 27.1953, "step": 13999 }, { "epoch": 0.1325243040107534, "grad_norm": 342.42462158203125, "learning_rate": 1.945382580017254e-06, "loss": 17.9688, "step": 14000 }, { "epoch": 0.13253377003246847, "grad_norm": 501.4331359863281, "learning_rate": 1.945372586087147e-06, "loss": 49.0625, "step": 14001 }, { "epoch": 0.1325432360541835, "grad_norm": 827.2095947265625, "learning_rate": 1.945362591268451e-06, "loss": 49.0273, "step": 14002 }, { "epoch": 0.13255270207589856, "grad_norm": 624.2586059570312, "learning_rate": 1.945352595561176e-06, "loss": 22.9375, "step": 14003 }, { "epoch": 0.13256216809761362, "grad_norm": 370.6922302246094, "learning_rate": 1.945342598965331e-06, "loss": 24.4844, "step": 14004 }, { "epoch": 0.13257163411932868, "grad_norm": 274.0337219238281, "learning_rate": 1.9453326014809252e-06, "loss": 17.9727, "step": 14005 }, { "epoch": 0.1325811001410437, "grad_norm": 470.4805603027344, "learning_rate": 1.9453226031079685e-06, "loss": 46.3828, "step": 14006 }, { "epoch": 0.13259056616275877, "grad_norm": 814.704833984375, "learning_rate": 1.9453126038464696e-06, "loss": 61.9766, "step": 14007 }, { "epoch": 0.13260003218447383, "grad_norm": 196.5371551513672, "learning_rate": 1.945302603696439e-06, "loss": 19.0547, "step": 14008 }, { "epoch": 0.1326094982061889, "grad_norm": 2.998887300491333, "learning_rate": 1.9452926026578853e-06, "loss": 0.832, "step": 14009 }, { "epoch": 0.13261896422790395, "grad_norm": 328.5835266113281, "learning_rate": 1.9452826007308177e-06, "loss": 10.7578, "step": 14010 }, { "epoch": 0.13262843024961898, "grad_norm": 364.3477478027344, "learning_rate": 1.945272597915246e-06, "loss": 13.4414, "step": 14011 }, { "epoch": 0.13263789627133404, "grad_norm": 319.021728515625, "learning_rate": 1.9452625942111796e-06, "loss": 23.2148, "step": 14012 }, { "epoch": 0.1326473622930491, "grad_norm": 248.9507598876953, "learning_rate": 1.945252589618628e-06, "loss": 23.7031, "step": 14013 }, { "epoch": 0.13265682831476416, "grad_norm": 693.1679077148438, "learning_rate": 1.9452425841376004e-06, "loss": 15.2422, "step": 14014 }, { "epoch": 0.1326662943364792, "grad_norm": 448.82891845703125, "learning_rate": 1.9452325777681064e-06, "loss": 46.2578, "step": 14015 }, { "epoch": 0.13267576035819426, "grad_norm": 328.3697509765625, "learning_rate": 1.945222570510155e-06, "loss": 8.7949, "step": 14016 }, { "epoch": 0.13268522637990932, "grad_norm": 299.48699951171875, "learning_rate": 1.9452125623637562e-06, "loss": 24.6641, "step": 14017 }, { "epoch": 0.13269469240162438, "grad_norm": 3.302295207977295, "learning_rate": 1.945202553328919e-06, "loss": 0.9116, "step": 14018 }, { "epoch": 0.13270415842333944, "grad_norm": 440.435791015625, "learning_rate": 1.9451925434056523e-06, "loss": 23.9531, "step": 14019 }, { "epoch": 0.13271362444505447, "grad_norm": 165.55784606933594, "learning_rate": 1.945182532593967e-06, "loss": 16.8477, "step": 14020 }, { "epoch": 0.13272309046676953, "grad_norm": 430.63531494140625, "learning_rate": 1.9451725208938708e-06, "loss": 42.1719, "step": 14021 }, { "epoch": 0.1327325564884846, "grad_norm": 3.082817792892456, "learning_rate": 1.9451625083053744e-06, "loss": 0.9644, "step": 14022 }, { "epoch": 0.13274202251019965, "grad_norm": 3.186969041824341, "learning_rate": 1.9451524948284866e-06, "loss": 0.8296, "step": 14023 }, { "epoch": 0.13275148853191468, "grad_norm": 527.2069702148438, "learning_rate": 1.945142480463217e-06, "loss": 17.0859, "step": 14024 }, { "epoch": 0.13276095455362974, "grad_norm": 375.37225341796875, "learning_rate": 1.9451324652095747e-06, "loss": 27.6094, "step": 14025 }, { "epoch": 0.1327704205753448, "grad_norm": 441.5431213378906, "learning_rate": 1.9451224490675698e-06, "loss": 35.0156, "step": 14026 }, { "epoch": 0.13277988659705986, "grad_norm": 350.35675048828125, "learning_rate": 1.945112432037211e-06, "loss": 28.0, "step": 14027 }, { "epoch": 0.13278935261877492, "grad_norm": 1471.958740234375, "learning_rate": 1.9451024141185083e-06, "loss": 55.7969, "step": 14028 }, { "epoch": 0.13279881864048995, "grad_norm": 1316.1343994140625, "learning_rate": 1.9450923953114704e-06, "loss": 20.375, "step": 14029 }, { "epoch": 0.132808284662205, "grad_norm": 168.56581115722656, "learning_rate": 1.9450823756161076e-06, "loss": 17.1172, "step": 14030 }, { "epoch": 0.13281775068392007, "grad_norm": 782.4176635742188, "learning_rate": 1.9450723550324282e-06, "loss": 43.7969, "step": 14031 }, { "epoch": 0.13282721670563513, "grad_norm": 278.43548583984375, "learning_rate": 1.9450623335604426e-06, "loss": 19.4609, "step": 14032 }, { "epoch": 0.13283668272735016, "grad_norm": 372.5954284667969, "learning_rate": 1.9450523112001604e-06, "loss": 30.0625, "step": 14033 }, { "epoch": 0.13284614874906522, "grad_norm": 597.493896484375, "learning_rate": 1.9450422879515896e-06, "loss": 55.75, "step": 14034 }, { "epoch": 0.13285561477078028, "grad_norm": 417.73553466796875, "learning_rate": 1.9450322638147413e-06, "loss": 42.7969, "step": 14035 }, { "epoch": 0.13286508079249534, "grad_norm": 600.1283569335938, "learning_rate": 1.9450222387896236e-06, "loss": 50.4258, "step": 14036 }, { "epoch": 0.1328745468142104, "grad_norm": 617.43310546875, "learning_rate": 1.9450122128762465e-06, "loss": 59.3594, "step": 14037 }, { "epoch": 0.13288401283592544, "grad_norm": 590.3587646484375, "learning_rate": 1.94500218607462e-06, "loss": 21.2344, "step": 14038 }, { "epoch": 0.1328934788576405, "grad_norm": 3.4318699836730957, "learning_rate": 1.9449921583847526e-06, "loss": 0.9961, "step": 14039 }, { "epoch": 0.13290294487935556, "grad_norm": 197.85964965820312, "learning_rate": 1.944982129806654e-06, "loss": 14.5234, "step": 14040 }, { "epoch": 0.13291241090107062, "grad_norm": 425.5473937988281, "learning_rate": 1.944972100340333e-06, "loss": 27.7578, "step": 14041 }, { "epoch": 0.13292187692278565, "grad_norm": 638.3748168945312, "learning_rate": 1.9449620699858006e-06, "loss": 10.0625, "step": 14042 }, { "epoch": 0.1329313429445007, "grad_norm": 397.6438903808594, "learning_rate": 1.944952038743065e-06, "loss": 23.4727, "step": 14043 }, { "epoch": 0.13294080896621577, "grad_norm": 221.83880615234375, "learning_rate": 1.9449420066121362e-06, "loss": 15.8281, "step": 14044 }, { "epoch": 0.13295027498793083, "grad_norm": 590.9420166015625, "learning_rate": 1.944931973593023e-06, "loss": 9.918, "step": 14045 }, { "epoch": 0.1329597410096459, "grad_norm": 696.0506591796875, "learning_rate": 1.9449219396857353e-06, "loss": 36.7422, "step": 14046 }, { "epoch": 0.13296920703136092, "grad_norm": 313.4411315917969, "learning_rate": 1.944911904890283e-06, "loss": 18.6172, "step": 14047 }, { "epoch": 0.13297867305307598, "grad_norm": 213.51771545410156, "learning_rate": 1.9449018692066745e-06, "loss": 23.1562, "step": 14048 }, { "epoch": 0.13298813907479104, "grad_norm": 325.18402099609375, "learning_rate": 1.9448918326349196e-06, "loss": 18.5977, "step": 14049 }, { "epoch": 0.1329976050965061, "grad_norm": 198.39801025390625, "learning_rate": 1.944881795175028e-06, "loss": 15.4219, "step": 14050 }, { "epoch": 0.13300707111822113, "grad_norm": 173.6390838623047, "learning_rate": 1.944871756827009e-06, "loss": 21.3438, "step": 14051 }, { "epoch": 0.1330165371399362, "grad_norm": 268.3299255371094, "learning_rate": 1.944861717590872e-06, "loss": 23.4297, "step": 14052 }, { "epoch": 0.13302600316165125, "grad_norm": 537.9740600585938, "learning_rate": 1.9448516774666264e-06, "loss": 41.25, "step": 14053 }, { "epoch": 0.1330354691833663, "grad_norm": 478.17059326171875, "learning_rate": 1.944841636454282e-06, "loss": 24.1562, "step": 14054 }, { "epoch": 0.13304493520508137, "grad_norm": 690.7666015625, "learning_rate": 1.9448315945538474e-06, "loss": 49.3438, "step": 14055 }, { "epoch": 0.1330544012267964, "grad_norm": 610.751220703125, "learning_rate": 1.9448215517653327e-06, "loss": 11.4805, "step": 14056 }, { "epoch": 0.13306386724851146, "grad_norm": 424.6312255859375, "learning_rate": 1.9448115080887477e-06, "loss": 21.5, "step": 14057 }, { "epoch": 0.13307333327022652, "grad_norm": 231.652099609375, "learning_rate": 1.9448014635241007e-06, "loss": 19.1094, "step": 14058 }, { "epoch": 0.13308279929194158, "grad_norm": 423.5890808105469, "learning_rate": 1.944791418071402e-06, "loss": 13.5, "step": 14059 }, { "epoch": 0.13309226531365662, "grad_norm": 242.61744689941406, "learning_rate": 1.944781371730661e-06, "loss": 21.0547, "step": 14060 }, { "epoch": 0.13310173133537168, "grad_norm": 147.78231811523438, "learning_rate": 1.9447713245018867e-06, "loss": 16.1992, "step": 14061 }, { "epoch": 0.13311119735708674, "grad_norm": 184.26898193359375, "learning_rate": 1.9447612763850893e-06, "loss": 19.0391, "step": 14062 }, { "epoch": 0.1331206633788018, "grad_norm": 674.0974731445312, "learning_rate": 1.9447512273802773e-06, "loss": 38.3203, "step": 14063 }, { "epoch": 0.13313012940051686, "grad_norm": 343.9236145019531, "learning_rate": 1.9447411774874606e-06, "loss": 22.5859, "step": 14064 }, { "epoch": 0.1331395954222319, "grad_norm": 571.6119995117188, "learning_rate": 1.944731126706649e-06, "loss": 23.3438, "step": 14065 }, { "epoch": 0.13314906144394695, "grad_norm": 492.3842468261719, "learning_rate": 1.9447210750378515e-06, "loss": 29.3828, "step": 14066 }, { "epoch": 0.133158527465662, "grad_norm": 450.27471923828125, "learning_rate": 1.9447110224810776e-06, "loss": 36.8281, "step": 14067 }, { "epoch": 0.13316799348737707, "grad_norm": 420.8173522949219, "learning_rate": 1.9447009690363367e-06, "loss": 57.6016, "step": 14068 }, { "epoch": 0.13317745950909213, "grad_norm": 401.215087890625, "learning_rate": 1.944690914703638e-06, "loss": 37.1094, "step": 14069 }, { "epoch": 0.13318692553080716, "grad_norm": 692.4955444335938, "learning_rate": 1.944680859482992e-06, "loss": 17.293, "step": 14070 }, { "epoch": 0.13319639155252222, "grad_norm": 323.14605712890625, "learning_rate": 1.944670803374407e-06, "loss": 36.75, "step": 14071 }, { "epoch": 0.13320585757423728, "grad_norm": 243.7960662841797, "learning_rate": 1.944660746377893e-06, "loss": 17.668, "step": 14072 }, { "epoch": 0.13321532359595234, "grad_norm": 375.0509033203125, "learning_rate": 1.94465068849346e-06, "loss": 13.0625, "step": 14073 }, { "epoch": 0.13322478961766737, "grad_norm": 349.4651794433594, "learning_rate": 1.944640629721116e-06, "loss": 33.6406, "step": 14074 }, { "epoch": 0.13323425563938243, "grad_norm": 385.38372802734375, "learning_rate": 1.9446305700608713e-06, "loss": 37.8281, "step": 14075 }, { "epoch": 0.1332437216610975, "grad_norm": 411.33740234375, "learning_rate": 1.9446205095127355e-06, "loss": 40.8281, "step": 14076 }, { "epoch": 0.13325318768281255, "grad_norm": 374.66839599609375, "learning_rate": 1.9446104480767177e-06, "loss": 24.3672, "step": 14077 }, { "epoch": 0.1332626537045276, "grad_norm": 264.6129150390625, "learning_rate": 1.9446003857528276e-06, "loss": 10.3867, "step": 14078 }, { "epoch": 0.13327211972624264, "grad_norm": 302.4737854003906, "learning_rate": 1.9445903225410747e-06, "loss": 33.0156, "step": 14079 }, { "epoch": 0.1332815857479577, "grad_norm": 399.12786865234375, "learning_rate": 1.9445802584414682e-06, "loss": 37.9375, "step": 14080 }, { "epoch": 0.13329105176967276, "grad_norm": 383.6885986328125, "learning_rate": 1.9445701934540183e-06, "loss": 18.0273, "step": 14081 }, { "epoch": 0.13330051779138782, "grad_norm": 295.3364562988281, "learning_rate": 1.944560127578733e-06, "loss": 18.0234, "step": 14082 }, { "epoch": 0.13330998381310286, "grad_norm": 548.1799926757812, "learning_rate": 1.944550060815623e-06, "loss": 46.6484, "step": 14083 }, { "epoch": 0.13331944983481792, "grad_norm": 315.5260925292969, "learning_rate": 1.9445399931646974e-06, "loss": 34.1562, "step": 14084 }, { "epoch": 0.13332891585653298, "grad_norm": 344.89483642578125, "learning_rate": 1.9445299246259657e-06, "loss": 20.1719, "step": 14085 }, { "epoch": 0.13333838187824804, "grad_norm": 616.0325927734375, "learning_rate": 1.9445198551994376e-06, "loss": 43.8047, "step": 14086 }, { "epoch": 0.1333478478999631, "grad_norm": 448.6207580566406, "learning_rate": 1.9445097848851216e-06, "loss": 50.6094, "step": 14087 }, { "epoch": 0.13335731392167813, "grad_norm": 270.03350830078125, "learning_rate": 1.944499713683028e-06, "loss": 20.4688, "step": 14088 }, { "epoch": 0.1333667799433932, "grad_norm": 940.17919921875, "learning_rate": 1.9444896415931668e-06, "loss": 40.7891, "step": 14089 }, { "epoch": 0.13337624596510825, "grad_norm": 567.7007446289062, "learning_rate": 1.9444795686155462e-06, "loss": 48.1094, "step": 14090 }, { "epoch": 0.1333857119868233, "grad_norm": 275.9041442871094, "learning_rate": 1.9444694947501763e-06, "loss": 18.2812, "step": 14091 }, { "epoch": 0.13339517800853834, "grad_norm": 524.6348266601562, "learning_rate": 1.9444594199970667e-06, "loss": 37.7969, "step": 14092 }, { "epoch": 0.1334046440302534, "grad_norm": 664.5223388671875, "learning_rate": 1.9444493443562263e-06, "loss": 16.5391, "step": 14093 }, { "epoch": 0.13341411005196846, "grad_norm": 693.8245239257812, "learning_rate": 1.9444392678276653e-06, "loss": 31.125, "step": 14094 }, { "epoch": 0.13342357607368352, "grad_norm": 274.4820251464844, "learning_rate": 1.9444291904113927e-06, "loss": 27.1172, "step": 14095 }, { "epoch": 0.13343304209539858, "grad_norm": 255.0679473876953, "learning_rate": 1.9444191121074184e-06, "loss": 20.625, "step": 14096 }, { "epoch": 0.1334425081171136, "grad_norm": 389.8281555175781, "learning_rate": 1.944409032915751e-06, "loss": 17.9688, "step": 14097 }, { "epoch": 0.13345197413882867, "grad_norm": 562.8201904296875, "learning_rate": 1.944398952836401e-06, "loss": 48.0469, "step": 14098 }, { "epoch": 0.13346144016054373, "grad_norm": 161.4737548828125, "learning_rate": 1.944388871869377e-06, "loss": 21.5312, "step": 14099 }, { "epoch": 0.1334709061822588, "grad_norm": 417.68060302734375, "learning_rate": 1.9443787900146896e-06, "loss": 42.4375, "step": 14100 }, { "epoch": 0.13348037220397382, "grad_norm": 385.7509765625, "learning_rate": 1.944368707272347e-06, "loss": 27.7812, "step": 14101 }, { "epoch": 0.13348983822568888, "grad_norm": 137.77792358398438, "learning_rate": 1.944358623642359e-06, "loss": 14.6523, "step": 14102 }, { "epoch": 0.13349930424740394, "grad_norm": 372.3295593261719, "learning_rate": 1.944348539124736e-06, "loss": 20.9141, "step": 14103 }, { "epoch": 0.133508770269119, "grad_norm": 279.2896728515625, "learning_rate": 1.9443384537194865e-06, "loss": 19.1484, "step": 14104 }, { "epoch": 0.13351823629083406, "grad_norm": 276.90118408203125, "learning_rate": 1.9443283674266203e-06, "loss": 23.8594, "step": 14105 }, { "epoch": 0.1335277023125491, "grad_norm": 3.2869646549224854, "learning_rate": 1.944318280246147e-06, "loss": 0.9873, "step": 14106 }, { "epoch": 0.13353716833426416, "grad_norm": 313.6844787597656, "learning_rate": 1.944308192178076e-06, "loss": 47.3359, "step": 14107 }, { "epoch": 0.13354663435597922, "grad_norm": 304.5935363769531, "learning_rate": 1.944298103222416e-06, "loss": 16.7461, "step": 14108 }, { "epoch": 0.13355610037769428, "grad_norm": 249.31724548339844, "learning_rate": 1.9442880133791783e-06, "loss": 20.7734, "step": 14109 }, { "epoch": 0.1335655663994093, "grad_norm": 251.36898803710938, "learning_rate": 1.9442779226483706e-06, "loss": 30.4688, "step": 14110 }, { "epoch": 0.13357503242112437, "grad_norm": 206.7998504638672, "learning_rate": 1.9442678310300032e-06, "loss": 16.0312, "step": 14111 }, { "epoch": 0.13358449844283943, "grad_norm": 278.50421142578125, "learning_rate": 1.9442577385240855e-06, "loss": 20.6484, "step": 14112 }, { "epoch": 0.1335939644645545, "grad_norm": 528.382568359375, "learning_rate": 1.944247645130627e-06, "loss": 19.4453, "step": 14113 }, { "epoch": 0.13360343048626955, "grad_norm": 353.4820861816406, "learning_rate": 1.944237550849637e-06, "loss": 19.7266, "step": 14114 }, { "epoch": 0.13361289650798458, "grad_norm": 3.281451463699341, "learning_rate": 1.944227455681125e-06, "loss": 0.7817, "step": 14115 }, { "epoch": 0.13362236252969964, "grad_norm": 470.3594665527344, "learning_rate": 1.9442173596251013e-06, "loss": 42.0469, "step": 14116 }, { "epoch": 0.1336318285514147, "grad_norm": 388.6083984375, "learning_rate": 1.944207262681574e-06, "loss": 24.9219, "step": 14117 }, { "epoch": 0.13364129457312976, "grad_norm": 294.1619567871094, "learning_rate": 1.944197164850554e-06, "loss": 30.6094, "step": 14118 }, { "epoch": 0.1336507605948448, "grad_norm": 1353.3441162109375, "learning_rate": 1.9441870661320493e-06, "loss": 46.3438, "step": 14119 }, { "epoch": 0.13366022661655985, "grad_norm": 348.78643798828125, "learning_rate": 1.9441769665260706e-06, "loss": 47.6172, "step": 14120 }, { "epoch": 0.1336696926382749, "grad_norm": 666.6171264648438, "learning_rate": 1.944166866032627e-06, "loss": 14.8359, "step": 14121 }, { "epoch": 0.13367915865998997, "grad_norm": 256.5204772949219, "learning_rate": 1.944156764651728e-06, "loss": 21.4219, "step": 14122 }, { "epoch": 0.13368862468170503, "grad_norm": 459.7120666503906, "learning_rate": 1.944146662383383e-06, "loss": 26.1836, "step": 14123 }, { "epoch": 0.13369809070342006, "grad_norm": 158.26951599121094, "learning_rate": 1.9441365592276015e-06, "loss": 23.375, "step": 14124 }, { "epoch": 0.13370755672513512, "grad_norm": 456.4707336425781, "learning_rate": 1.944126455184393e-06, "loss": 25.7969, "step": 14125 }, { "epoch": 0.13371702274685018, "grad_norm": 501.8450622558594, "learning_rate": 1.9441163502537672e-06, "loss": 38.0723, "step": 14126 }, { "epoch": 0.13372648876856524, "grad_norm": 380.2008361816406, "learning_rate": 1.9441062444357336e-06, "loss": 40.6133, "step": 14127 }, { "epoch": 0.13373595479028028, "grad_norm": 542.9956665039062, "learning_rate": 1.944096137730301e-06, "loss": 20.3203, "step": 14128 }, { "epoch": 0.13374542081199534, "grad_norm": 439.3186340332031, "learning_rate": 1.94408603013748e-06, "loss": 51.7188, "step": 14129 }, { "epoch": 0.1337548868337104, "grad_norm": 211.76760864257812, "learning_rate": 1.944075921657279e-06, "loss": 24.9766, "step": 14130 }, { "epoch": 0.13376435285542546, "grad_norm": 400.4131774902344, "learning_rate": 1.9440658122897084e-06, "loss": 32.5, "step": 14131 }, { "epoch": 0.13377381887714052, "grad_norm": 235.63385009765625, "learning_rate": 1.9440557020347772e-06, "loss": 26.918, "step": 14132 }, { "epoch": 0.13378328489885555, "grad_norm": 216.56845092773438, "learning_rate": 1.9440455908924953e-06, "loss": 16.4531, "step": 14133 }, { "epoch": 0.1337927509205706, "grad_norm": 749.4583129882812, "learning_rate": 1.9440354788628717e-06, "loss": 50.4375, "step": 14134 }, { "epoch": 0.13380221694228567, "grad_norm": 1230.23291015625, "learning_rate": 1.9440253659459164e-06, "loss": 10.3203, "step": 14135 }, { "epoch": 0.13381168296400073, "grad_norm": 1373.510498046875, "learning_rate": 1.9440152521416385e-06, "loss": 52.5195, "step": 14136 }, { "epoch": 0.13382114898571576, "grad_norm": 3.376716136932373, "learning_rate": 1.944005137450048e-06, "loss": 0.9297, "step": 14137 }, { "epoch": 0.13383061500743082, "grad_norm": 227.19464111328125, "learning_rate": 1.9439950218711535e-06, "loss": 20.3125, "step": 14138 }, { "epoch": 0.13384008102914588, "grad_norm": 348.6930236816406, "learning_rate": 1.9439849054049653e-06, "loss": 25.6797, "step": 14139 }, { "epoch": 0.13384954705086094, "grad_norm": 2.7287697792053223, "learning_rate": 1.943974788051493e-06, "loss": 0.8452, "step": 14140 }, { "epoch": 0.133859013072576, "grad_norm": 454.28912353515625, "learning_rate": 1.9439646698107456e-06, "loss": 27.6797, "step": 14141 }, { "epoch": 0.13386847909429103, "grad_norm": 2.954986572265625, "learning_rate": 1.9439545506827327e-06, "loss": 0.8867, "step": 14142 }, { "epoch": 0.1338779451160061, "grad_norm": 454.2418518066406, "learning_rate": 1.9439444306674642e-06, "loss": 51.9844, "step": 14143 }, { "epoch": 0.13388741113772115, "grad_norm": 404.5354309082031, "learning_rate": 1.943934309764949e-06, "loss": 49.4766, "step": 14144 }, { "epoch": 0.1338968771594362, "grad_norm": 194.04605102539062, "learning_rate": 1.9439241879751974e-06, "loss": 20.9297, "step": 14145 }, { "epoch": 0.13390634318115124, "grad_norm": 305.3949279785156, "learning_rate": 1.943914065298218e-06, "loss": 32.4219, "step": 14146 }, { "epoch": 0.1339158092028663, "grad_norm": 230.68368530273438, "learning_rate": 1.9439039417340213e-06, "loss": 21.75, "step": 14147 }, { "epoch": 0.13392527522458136, "grad_norm": 1219.0914306640625, "learning_rate": 1.943893817282616e-06, "loss": 25.1953, "step": 14148 }, { "epoch": 0.13393474124629642, "grad_norm": 221.67417907714844, "learning_rate": 1.943883691944012e-06, "loss": 14.793, "step": 14149 }, { "epoch": 0.13394420726801148, "grad_norm": 306.36102294921875, "learning_rate": 1.9438735657182186e-06, "loss": 25.6094, "step": 14150 }, { "epoch": 0.13395367328972652, "grad_norm": 459.3589172363281, "learning_rate": 1.9438634386052456e-06, "loss": 44.8906, "step": 14151 }, { "epoch": 0.13396313931144158, "grad_norm": 443.8653869628906, "learning_rate": 1.943853310605102e-06, "loss": 54.9688, "step": 14152 }, { "epoch": 0.13397260533315664, "grad_norm": 355.4482116699219, "learning_rate": 1.9438431817177983e-06, "loss": 28.4609, "step": 14153 }, { "epoch": 0.1339820713548717, "grad_norm": 830.1468505859375, "learning_rate": 1.943833051943343e-06, "loss": 41.1875, "step": 14154 }, { "epoch": 0.13399153737658676, "grad_norm": 268.7648620605469, "learning_rate": 1.943822921281746e-06, "loss": 21.1641, "step": 14155 }, { "epoch": 0.1340010033983018, "grad_norm": 302.2831726074219, "learning_rate": 1.943812789733017e-06, "loss": 17.9688, "step": 14156 }, { "epoch": 0.13401046942001685, "grad_norm": 308.0558166503906, "learning_rate": 1.943802657297165e-06, "loss": 49.9688, "step": 14157 }, { "epoch": 0.1340199354417319, "grad_norm": 267.1319274902344, "learning_rate": 1.9437925239742006e-06, "loss": 15.4688, "step": 14158 }, { "epoch": 0.13402940146344697, "grad_norm": 3.4573042392730713, "learning_rate": 1.943782389764132e-06, "loss": 1.0981, "step": 14159 }, { "epoch": 0.134038867485162, "grad_norm": 615.1035766601562, "learning_rate": 1.9437722546669697e-06, "loss": 19.9492, "step": 14160 }, { "epoch": 0.13404833350687706, "grad_norm": 505.2385559082031, "learning_rate": 1.9437621186827233e-06, "loss": 37.8828, "step": 14161 }, { "epoch": 0.13405779952859212, "grad_norm": 427.3658752441406, "learning_rate": 1.9437519818114014e-06, "loss": 16.1445, "step": 14162 }, { "epoch": 0.13406726555030718, "grad_norm": 645.6940307617188, "learning_rate": 1.943741844053014e-06, "loss": 34.9688, "step": 14163 }, { "epoch": 0.13407673157202224, "grad_norm": 361.7265625, "learning_rate": 1.9437317054075705e-06, "loss": 32.8281, "step": 14164 }, { "epoch": 0.13408619759373727, "grad_norm": 242.26365661621094, "learning_rate": 1.9437215658750806e-06, "loss": 24.5547, "step": 14165 }, { "epoch": 0.13409566361545233, "grad_norm": 610.8528442382812, "learning_rate": 1.943711425455554e-06, "loss": 27.25, "step": 14166 }, { "epoch": 0.1341051296371674, "grad_norm": 633.407958984375, "learning_rate": 1.943701284149e-06, "loss": 53.6641, "step": 14167 }, { "epoch": 0.13411459565888245, "grad_norm": 872.4664916992188, "learning_rate": 1.9436911419554287e-06, "loss": 48.4531, "step": 14168 }, { "epoch": 0.13412406168059748, "grad_norm": 417.2672424316406, "learning_rate": 1.9436809988748483e-06, "loss": 21.4453, "step": 14169 }, { "epoch": 0.13413352770231254, "grad_norm": 393.7019958496094, "learning_rate": 1.9436708549072698e-06, "loss": 27.0469, "step": 14170 }, { "epoch": 0.1341429937240276, "grad_norm": 191.14700317382812, "learning_rate": 1.9436607100527017e-06, "loss": 13.9023, "step": 14171 }, { "epoch": 0.13415245974574266, "grad_norm": 696.6983642578125, "learning_rate": 1.943650564311154e-06, "loss": 31.4375, "step": 14172 }, { "epoch": 0.13416192576745772, "grad_norm": 722.6909790039062, "learning_rate": 1.943640417682636e-06, "loss": 44.0547, "step": 14173 }, { "epoch": 0.13417139178917276, "grad_norm": 470.7260437011719, "learning_rate": 1.9436302701671575e-06, "loss": 31.2656, "step": 14174 }, { "epoch": 0.13418085781088782, "grad_norm": 269.5786437988281, "learning_rate": 1.943620121764728e-06, "loss": 27.5156, "step": 14175 }, { "epoch": 0.13419032383260288, "grad_norm": 502.9660949707031, "learning_rate": 1.943609972475357e-06, "loss": 21.3047, "step": 14176 }, { "epoch": 0.13419978985431794, "grad_norm": 345.9015808105469, "learning_rate": 1.943599822299054e-06, "loss": 20.2891, "step": 14177 }, { "epoch": 0.13420925587603297, "grad_norm": 388.90447998046875, "learning_rate": 1.943589671235829e-06, "loss": 19.375, "step": 14178 }, { "epoch": 0.13421872189774803, "grad_norm": 301.009765625, "learning_rate": 1.9435795192856905e-06, "loss": 19.2578, "step": 14179 }, { "epoch": 0.1342281879194631, "grad_norm": 649.9561767578125, "learning_rate": 1.9435693664486487e-06, "loss": 21.8359, "step": 14180 }, { "epoch": 0.13423765394117815, "grad_norm": 156.1368865966797, "learning_rate": 1.943559212724713e-06, "loss": 16.6406, "step": 14181 }, { "epoch": 0.1342471199628932, "grad_norm": 373.58624267578125, "learning_rate": 1.9435490581138933e-06, "loss": 23.1484, "step": 14182 }, { "epoch": 0.13425658598460824, "grad_norm": 280.83135986328125, "learning_rate": 1.9435389026161987e-06, "loss": 18.5859, "step": 14183 }, { "epoch": 0.1342660520063233, "grad_norm": 198.314697265625, "learning_rate": 1.943528746231639e-06, "loss": 24.6719, "step": 14184 }, { "epoch": 0.13427551802803836, "grad_norm": 504.6814270019531, "learning_rate": 1.943518588960224e-06, "loss": 29.9766, "step": 14185 }, { "epoch": 0.13428498404975342, "grad_norm": 285.0907287597656, "learning_rate": 1.943508430801962e-06, "loss": 14.8125, "step": 14186 }, { "epoch": 0.13429445007146845, "grad_norm": 422.0186767578125, "learning_rate": 1.943498271756864e-06, "loss": 41.4844, "step": 14187 }, { "epoch": 0.1343039160931835, "grad_norm": 728.0380249023438, "learning_rate": 1.9434881118249392e-06, "loss": 58.0, "step": 14188 }, { "epoch": 0.13431338211489857, "grad_norm": 309.2087097167969, "learning_rate": 1.9434779510061963e-06, "loss": 35.6406, "step": 14189 }, { "epoch": 0.13432284813661363, "grad_norm": 413.2934265136719, "learning_rate": 1.9434677893006463e-06, "loss": 39.9531, "step": 14190 }, { "epoch": 0.1343323141583287, "grad_norm": 444.0975341796875, "learning_rate": 1.9434576267082978e-06, "loss": 37.9141, "step": 14191 }, { "epoch": 0.13434178018004372, "grad_norm": 204.76400756835938, "learning_rate": 1.94344746322916e-06, "loss": 23.9219, "step": 14192 }, { "epoch": 0.13435124620175878, "grad_norm": 383.9498596191406, "learning_rate": 1.9434372988632433e-06, "loss": 41.4531, "step": 14193 }, { "epoch": 0.13436071222347384, "grad_norm": 408.2906188964844, "learning_rate": 1.943427133610557e-06, "loss": 53.125, "step": 14194 }, { "epoch": 0.1343701782451889, "grad_norm": 496.3587341308594, "learning_rate": 1.94341696747111e-06, "loss": 68.3828, "step": 14195 }, { "epoch": 0.13437964426690394, "grad_norm": 917.7031860351562, "learning_rate": 1.943406800444913e-06, "loss": 66.1562, "step": 14196 }, { "epoch": 0.134389110288619, "grad_norm": 705.3610229492188, "learning_rate": 1.943396632531975e-06, "loss": 41.6172, "step": 14197 }, { "epoch": 0.13439857631033406, "grad_norm": 4.417141437530518, "learning_rate": 1.9433864637323053e-06, "loss": 1.0679, "step": 14198 }, { "epoch": 0.13440804233204912, "grad_norm": 223.31044006347656, "learning_rate": 1.943376294045914e-06, "loss": 20.7773, "step": 14199 }, { "epoch": 0.13441750835376418, "grad_norm": 996.3882446289062, "learning_rate": 1.94336612347281e-06, "loss": 35.3359, "step": 14200 }, { "epoch": 0.1344269743754792, "grad_norm": 321.0157775878906, "learning_rate": 1.9433559520130034e-06, "loss": 24.0234, "step": 14201 }, { "epoch": 0.13443644039719427, "grad_norm": 367.0862121582031, "learning_rate": 1.943345779666504e-06, "loss": 24.4062, "step": 14202 }, { "epoch": 0.13444590641890933, "grad_norm": 136.08985900878906, "learning_rate": 1.94333560643332e-06, "loss": 20.625, "step": 14203 }, { "epoch": 0.1344553724406244, "grad_norm": 389.1661071777344, "learning_rate": 1.9433254323134624e-06, "loss": 42.5625, "step": 14204 }, { "epoch": 0.13446483846233942, "grad_norm": 285.5508117675781, "learning_rate": 1.94331525730694e-06, "loss": 36.4922, "step": 14205 }, { "epoch": 0.13447430448405448, "grad_norm": 339.95556640625, "learning_rate": 1.9433050814137627e-06, "loss": 10.043, "step": 14206 }, { "epoch": 0.13448377050576954, "grad_norm": 309.1684875488281, "learning_rate": 1.9432949046339405e-06, "loss": 25.1719, "step": 14207 }, { "epoch": 0.1344932365274846, "grad_norm": 146.3238067626953, "learning_rate": 1.943284726967482e-06, "loss": 19.2109, "step": 14208 }, { "epoch": 0.13450270254919966, "grad_norm": 368.12451171875, "learning_rate": 1.943274548414397e-06, "loss": 28.6484, "step": 14209 }, { "epoch": 0.1345121685709147, "grad_norm": 339.240478515625, "learning_rate": 1.9432643689746956e-06, "loss": 12.8027, "step": 14210 }, { "epoch": 0.13452163459262975, "grad_norm": 606.95556640625, "learning_rate": 1.943254188648387e-06, "loss": 30.1094, "step": 14211 }, { "epoch": 0.1345311006143448, "grad_norm": 213.56454467773438, "learning_rate": 1.943244007435481e-06, "loss": 21.5469, "step": 14212 }, { "epoch": 0.13454056663605987, "grad_norm": 370.81524658203125, "learning_rate": 1.943233825335987e-06, "loss": 23.9844, "step": 14213 }, { "epoch": 0.1345500326577749, "grad_norm": 311.1214294433594, "learning_rate": 1.9432236423499143e-06, "loss": 19.0977, "step": 14214 }, { "epoch": 0.13455949867948996, "grad_norm": 316.57562255859375, "learning_rate": 1.943213458477273e-06, "loss": 39.375, "step": 14215 }, { "epoch": 0.13456896470120502, "grad_norm": 1624.587646484375, "learning_rate": 1.9432032737180724e-06, "loss": 49.9375, "step": 14216 }, { "epoch": 0.13457843072292008, "grad_norm": 710.3591918945312, "learning_rate": 1.9431930880723216e-06, "loss": 33.1484, "step": 14217 }, { "epoch": 0.13458789674463514, "grad_norm": 717.5377197265625, "learning_rate": 1.9431829015400314e-06, "loss": 36.9688, "step": 14218 }, { "epoch": 0.13459736276635018, "grad_norm": 1054.202392578125, "learning_rate": 1.94317271412121e-06, "loss": 55.7109, "step": 14219 }, { "epoch": 0.13460682878806524, "grad_norm": 749.232666015625, "learning_rate": 1.9431625258158683e-06, "loss": 35.4258, "step": 14220 }, { "epoch": 0.1346162948097803, "grad_norm": 370.68304443359375, "learning_rate": 1.9431523366240144e-06, "loss": 42.9688, "step": 14221 }, { "epoch": 0.13462576083149536, "grad_norm": 272.285400390625, "learning_rate": 1.943142146545659e-06, "loss": 13.168, "step": 14222 }, { "epoch": 0.1346352268532104, "grad_norm": 299.84320068359375, "learning_rate": 1.9431319555808114e-06, "loss": 28.8906, "step": 14223 }, { "epoch": 0.13464469287492545, "grad_norm": 3.0009701251983643, "learning_rate": 1.943121763729481e-06, "loss": 1.0073, "step": 14224 }, { "epoch": 0.1346541588966405, "grad_norm": 1224.3033447265625, "learning_rate": 1.9431115709916773e-06, "loss": 81.0781, "step": 14225 }, { "epoch": 0.13466362491835557, "grad_norm": 245.18484497070312, "learning_rate": 1.9431013773674106e-06, "loss": 20.4062, "step": 14226 }, { "epoch": 0.13467309094007063, "grad_norm": 739.587646484375, "learning_rate": 1.9430911828566896e-06, "loss": 51.8281, "step": 14227 }, { "epoch": 0.13468255696178566, "grad_norm": 183.4781951904297, "learning_rate": 1.9430809874595245e-06, "loss": 15.0938, "step": 14228 }, { "epoch": 0.13469202298350072, "grad_norm": 288.9739990234375, "learning_rate": 1.943070791175924e-06, "loss": 27.4609, "step": 14229 }, { "epoch": 0.13470148900521578, "grad_norm": 499.42315673828125, "learning_rate": 1.9430605940058992e-06, "loss": 43.0156, "step": 14230 }, { "epoch": 0.13471095502693084, "grad_norm": 450.610107421875, "learning_rate": 1.9430503959494585e-06, "loss": 49.4141, "step": 14231 }, { "epoch": 0.13472042104864587, "grad_norm": 230.6866455078125, "learning_rate": 1.9430401970066114e-06, "loss": 24.8203, "step": 14232 }, { "epoch": 0.13472988707036093, "grad_norm": 536.8527221679688, "learning_rate": 1.9430299971773684e-06, "loss": 47.4531, "step": 14233 }, { "epoch": 0.134739353092076, "grad_norm": 544.7283325195312, "learning_rate": 1.9430197964617377e-06, "loss": 21.6562, "step": 14234 }, { "epoch": 0.13474881911379105, "grad_norm": 444.9249267578125, "learning_rate": 1.9430095948597305e-06, "loss": 36.1094, "step": 14235 }, { "epoch": 0.1347582851355061, "grad_norm": 308.171630859375, "learning_rate": 1.9429993923713555e-06, "loss": 36.2188, "step": 14236 }, { "epoch": 0.13476775115722114, "grad_norm": 312.9499206542969, "learning_rate": 1.942989188996622e-06, "loss": 27.4492, "step": 14237 }, { "epoch": 0.1347772171789362, "grad_norm": 385.99346923828125, "learning_rate": 1.942978984735541e-06, "loss": 35.5938, "step": 14238 }, { "epoch": 0.13478668320065126, "grad_norm": 228.2179412841797, "learning_rate": 1.94296877958812e-06, "loss": 22.3906, "step": 14239 }, { "epoch": 0.13479614922236632, "grad_norm": 595.6715698242188, "learning_rate": 1.94295857355437e-06, "loss": 21.375, "step": 14240 }, { "epoch": 0.13480561524408138, "grad_norm": 210.91259765625, "learning_rate": 1.9429483666343006e-06, "loss": 13.1484, "step": 14241 }, { "epoch": 0.13481508126579642, "grad_norm": 382.349853515625, "learning_rate": 1.9429381588279207e-06, "loss": 41.3516, "step": 14242 }, { "epoch": 0.13482454728751148, "grad_norm": 221.2873992919922, "learning_rate": 1.9429279501352407e-06, "loss": 14.7891, "step": 14243 }, { "epoch": 0.13483401330922654, "grad_norm": 769.3270874023438, "learning_rate": 1.9429177405562695e-06, "loss": 29.2031, "step": 14244 }, { "epoch": 0.1348434793309416, "grad_norm": 580.925537109375, "learning_rate": 1.942907530091017e-06, "loss": 44.2656, "step": 14245 }, { "epoch": 0.13485294535265663, "grad_norm": 712.106689453125, "learning_rate": 1.9428973187394927e-06, "loss": 21.8203, "step": 14246 }, { "epoch": 0.1348624113743717, "grad_norm": 409.77777099609375, "learning_rate": 1.942887106501706e-06, "loss": 51.1562, "step": 14247 }, { "epoch": 0.13487187739608675, "grad_norm": 548.4237060546875, "learning_rate": 1.9428768933776674e-06, "loss": 27.0938, "step": 14248 }, { "epoch": 0.1348813434178018, "grad_norm": 578.2396240234375, "learning_rate": 1.9428666793673853e-06, "loss": 33.3203, "step": 14249 }, { "epoch": 0.13489080943951687, "grad_norm": 290.0273742675781, "learning_rate": 1.9428564644708704e-06, "loss": 17.7344, "step": 14250 }, { "epoch": 0.1349002754612319, "grad_norm": 549.3677978515625, "learning_rate": 1.942846248688131e-06, "loss": 48.8125, "step": 14251 }, { "epoch": 0.13490974148294696, "grad_norm": 3.2586915493011475, "learning_rate": 1.942836032019178e-06, "loss": 1.1069, "step": 14252 }, { "epoch": 0.13491920750466202, "grad_norm": 572.5894775390625, "learning_rate": 1.9428258144640204e-06, "loss": 49.7227, "step": 14253 }, { "epoch": 0.13492867352637708, "grad_norm": 293.2240295410156, "learning_rate": 1.942815596022668e-06, "loss": 19.4922, "step": 14254 }, { "epoch": 0.1349381395480921, "grad_norm": 630.0292358398438, "learning_rate": 1.94280537669513e-06, "loss": 55.2578, "step": 14255 }, { "epoch": 0.13494760556980717, "grad_norm": 2.585941791534424, "learning_rate": 1.9427951564814163e-06, "loss": 0.8528, "step": 14256 }, { "epoch": 0.13495707159152223, "grad_norm": 651.8449096679688, "learning_rate": 1.942784935381537e-06, "loss": 47.8984, "step": 14257 }, { "epoch": 0.1349665376132373, "grad_norm": 648.421142578125, "learning_rate": 1.9427747133955008e-06, "loss": 60.1172, "step": 14258 }, { "epoch": 0.13497600363495235, "grad_norm": 189.5576629638672, "learning_rate": 1.9427644905233173e-06, "loss": 19.8047, "step": 14259 }, { "epoch": 0.13498546965666738, "grad_norm": 887.979736328125, "learning_rate": 1.942754266764997e-06, "loss": 41.4609, "step": 14260 }, { "epoch": 0.13499493567838244, "grad_norm": 277.2003173828125, "learning_rate": 1.942744042120549e-06, "loss": 21.0547, "step": 14261 }, { "epoch": 0.1350044017000975, "grad_norm": 4.32442569732666, "learning_rate": 1.942733816589983e-06, "loss": 0.9893, "step": 14262 }, { "epoch": 0.13501386772181256, "grad_norm": 426.88409423828125, "learning_rate": 1.9427235901733083e-06, "loss": 32.3281, "step": 14263 }, { "epoch": 0.1350233337435276, "grad_norm": 452.5902099609375, "learning_rate": 1.9427133628705348e-06, "loss": 37.6094, "step": 14264 }, { "epoch": 0.13503279976524266, "grad_norm": 304.7467956542969, "learning_rate": 1.9427031346816724e-06, "loss": 15.0234, "step": 14265 }, { "epoch": 0.13504226578695772, "grad_norm": 233.39991760253906, "learning_rate": 1.9426929056067297e-06, "loss": 18.4297, "step": 14266 }, { "epoch": 0.13505173180867278, "grad_norm": 297.34393310546875, "learning_rate": 1.9426826756457173e-06, "loss": 28.8359, "step": 14267 }, { "epoch": 0.13506119783038784, "grad_norm": 3.0538952350616455, "learning_rate": 1.942672444798645e-06, "loss": 0.9043, "step": 14268 }, { "epoch": 0.13507066385210287, "grad_norm": 399.8939208984375, "learning_rate": 1.942662213065521e-06, "loss": 43.5625, "step": 14269 }, { "epoch": 0.13508012987381793, "grad_norm": 467.2568359375, "learning_rate": 1.9426519804463566e-06, "loss": 25.4297, "step": 14270 }, { "epoch": 0.135089595895533, "grad_norm": 132.637939453125, "learning_rate": 1.9426417469411606e-06, "loss": 18.5938, "step": 14271 }, { "epoch": 0.13509906191724805, "grad_norm": 390.696044921875, "learning_rate": 1.9426315125499423e-06, "loss": 26.8906, "step": 14272 }, { "epoch": 0.13510852793896308, "grad_norm": 315.2506408691406, "learning_rate": 1.942621277272712e-06, "loss": 27.0703, "step": 14273 }, { "epoch": 0.13511799396067814, "grad_norm": 496.1028747558594, "learning_rate": 1.9426110411094786e-06, "loss": 18.5781, "step": 14274 }, { "epoch": 0.1351274599823932, "grad_norm": 306.5714111328125, "learning_rate": 1.9426008040602526e-06, "loss": 16.7734, "step": 14275 }, { "epoch": 0.13513692600410826, "grad_norm": 2.76682186126709, "learning_rate": 1.9425905661250434e-06, "loss": 1.0996, "step": 14276 }, { "epoch": 0.13514639202582332, "grad_norm": 267.9477233886719, "learning_rate": 1.94258032730386e-06, "loss": 15.457, "step": 14277 }, { "epoch": 0.13515585804753835, "grad_norm": 236.2344512939453, "learning_rate": 1.942570087596712e-06, "loss": 18.6797, "step": 14278 }, { "epoch": 0.1351653240692534, "grad_norm": 318.80670166015625, "learning_rate": 1.94255984700361e-06, "loss": 20.8984, "step": 14279 }, { "epoch": 0.13517479009096847, "grad_norm": 400.8988037109375, "learning_rate": 1.9425496055245627e-06, "loss": 20.4531, "step": 14280 }, { "epoch": 0.13518425611268353, "grad_norm": 451.33221435546875, "learning_rate": 1.9425393631595804e-06, "loss": 15.0234, "step": 14281 }, { "epoch": 0.13519372213439856, "grad_norm": 556.8779296875, "learning_rate": 1.9425291199086727e-06, "loss": 28.5391, "step": 14282 }, { "epoch": 0.13520318815611362, "grad_norm": 324.721435546875, "learning_rate": 1.9425188757718486e-06, "loss": 26.4609, "step": 14283 }, { "epoch": 0.13521265417782868, "grad_norm": 340.038818359375, "learning_rate": 1.942508630749118e-06, "loss": 20.7266, "step": 14284 }, { "epoch": 0.13522212019954374, "grad_norm": 227.8000946044922, "learning_rate": 1.9424983848404906e-06, "loss": 26.4609, "step": 14285 }, { "epoch": 0.1352315862212588, "grad_norm": 333.4505920410156, "learning_rate": 1.942488138045976e-06, "loss": 20.8906, "step": 14286 }, { "epoch": 0.13524105224297384, "grad_norm": 346.12481689453125, "learning_rate": 1.9424778903655837e-06, "loss": 23.0078, "step": 14287 }, { "epoch": 0.1352505182646889, "grad_norm": 312.9981689453125, "learning_rate": 1.942467641799324e-06, "loss": 32.5234, "step": 14288 }, { "epoch": 0.13525998428640396, "grad_norm": 272.75189208984375, "learning_rate": 1.9424573923472056e-06, "loss": 22.2734, "step": 14289 }, { "epoch": 0.13526945030811902, "grad_norm": 422.30047607421875, "learning_rate": 1.942447142009239e-06, "loss": 36.125, "step": 14290 }, { "epoch": 0.13527891632983405, "grad_norm": 413.9452209472656, "learning_rate": 1.942436890785433e-06, "loss": 18.5859, "step": 14291 }, { "epoch": 0.1352883823515491, "grad_norm": 415.3386535644531, "learning_rate": 1.9424266386757977e-06, "loss": 35.25, "step": 14292 }, { "epoch": 0.13529784837326417, "grad_norm": 779.1536254882812, "learning_rate": 1.9424163856803426e-06, "loss": 36.2969, "step": 14293 }, { "epoch": 0.13530731439497923, "grad_norm": 804.4453125, "learning_rate": 1.9424061317990775e-06, "loss": 50.0, "step": 14294 }, { "epoch": 0.1353167804166943, "grad_norm": 450.2271728515625, "learning_rate": 1.9423958770320123e-06, "loss": 49.9688, "step": 14295 }, { "epoch": 0.13532624643840932, "grad_norm": 310.309814453125, "learning_rate": 1.9423856213791557e-06, "loss": 20.4531, "step": 14296 }, { "epoch": 0.13533571246012438, "grad_norm": 487.6849365234375, "learning_rate": 1.9423753648405182e-06, "loss": 38.6016, "step": 14297 }, { "epoch": 0.13534517848183944, "grad_norm": 370.09765625, "learning_rate": 1.942365107416109e-06, "loss": 46.1406, "step": 14298 }, { "epoch": 0.1353546445035545, "grad_norm": 537.191162109375, "learning_rate": 1.9423548491059382e-06, "loss": 35.9727, "step": 14299 }, { "epoch": 0.13536411052526953, "grad_norm": 652.0018920898438, "learning_rate": 1.942344589910015e-06, "loss": 40.2188, "step": 14300 }, { "epoch": 0.1353735765469846, "grad_norm": 475.2620849609375, "learning_rate": 1.942334329828349e-06, "loss": 45.7031, "step": 14301 }, { "epoch": 0.13538304256869965, "grad_norm": 499.7846374511719, "learning_rate": 1.94232406886095e-06, "loss": 41.0781, "step": 14302 }, { "epoch": 0.1353925085904147, "grad_norm": 759.3964233398438, "learning_rate": 1.942313807007828e-06, "loss": 37.1797, "step": 14303 }, { "epoch": 0.13540197461212977, "grad_norm": 360.4717712402344, "learning_rate": 1.9423035442689924e-06, "loss": 39.7734, "step": 14304 }, { "epoch": 0.1354114406338448, "grad_norm": 288.0164794921875, "learning_rate": 1.942293280644453e-06, "loss": 22.0781, "step": 14305 }, { "epoch": 0.13542090665555986, "grad_norm": 342.9420471191406, "learning_rate": 1.9422830161342185e-06, "loss": 45.1875, "step": 14306 }, { "epoch": 0.13543037267727492, "grad_norm": 449.69415283203125, "learning_rate": 1.9422727507382994e-06, "loss": 42.2578, "step": 14307 }, { "epoch": 0.13543983869898998, "grad_norm": 289.931884765625, "learning_rate": 1.9422624844567056e-06, "loss": 17.3281, "step": 14308 }, { "epoch": 0.13544930472070502, "grad_norm": 285.14739990234375, "learning_rate": 1.9422522172894458e-06, "loss": 12.1758, "step": 14309 }, { "epoch": 0.13545877074242008, "grad_norm": 390.376953125, "learning_rate": 1.942241949236531e-06, "loss": 23.0156, "step": 14310 }, { "epoch": 0.13546823676413514, "grad_norm": 182.66494750976562, "learning_rate": 1.9422316802979696e-06, "loss": 17.4062, "step": 14311 }, { "epoch": 0.1354777027858502, "grad_norm": 185.91004943847656, "learning_rate": 1.9422214104737715e-06, "loss": 22.6719, "step": 14312 }, { "epoch": 0.13548716880756526, "grad_norm": 249.09140014648438, "learning_rate": 1.9422111397639467e-06, "loss": 26.9531, "step": 14313 }, { "epoch": 0.1354966348292803, "grad_norm": 721.0611572265625, "learning_rate": 1.942200868168505e-06, "loss": 44.8203, "step": 14314 }, { "epoch": 0.13550610085099535, "grad_norm": 516.061279296875, "learning_rate": 1.9421905956874554e-06, "loss": 24.1016, "step": 14315 }, { "epoch": 0.1355155668727104, "grad_norm": 526.4065551757812, "learning_rate": 1.942180322320808e-06, "loss": 22.8438, "step": 14316 }, { "epoch": 0.13552503289442547, "grad_norm": 463.66571044921875, "learning_rate": 1.942170048068573e-06, "loss": 44.9219, "step": 14317 }, { "epoch": 0.1355344989161405, "grad_norm": 242.68045043945312, "learning_rate": 1.9421597729307588e-06, "loss": 18.3477, "step": 14318 }, { "epoch": 0.13554396493785556, "grad_norm": 180.79383850097656, "learning_rate": 1.9421494969073756e-06, "loss": 24.875, "step": 14319 }, { "epoch": 0.13555343095957062, "grad_norm": 549.184326171875, "learning_rate": 1.942139219998434e-06, "loss": 43.5781, "step": 14320 }, { "epoch": 0.13556289698128568, "grad_norm": 253.86744689941406, "learning_rate": 1.942128942203942e-06, "loss": 8.2617, "step": 14321 }, { "epoch": 0.13557236300300074, "grad_norm": 262.84063720703125, "learning_rate": 1.9421186635239103e-06, "loss": 16.25, "step": 14322 }, { "epoch": 0.13558182902471577, "grad_norm": 274.1982727050781, "learning_rate": 1.9421083839583484e-06, "loss": 16.625, "step": 14323 }, { "epoch": 0.13559129504643083, "grad_norm": 150.78271484375, "learning_rate": 1.942098103507266e-06, "loss": 8.6172, "step": 14324 }, { "epoch": 0.1356007610681459, "grad_norm": 282.309326171875, "learning_rate": 1.9420878221706726e-06, "loss": 34.0938, "step": 14325 }, { "epoch": 0.13561022708986095, "grad_norm": 546.4632568359375, "learning_rate": 1.9420775399485777e-06, "loss": 43.8594, "step": 14326 }, { "epoch": 0.135619693111576, "grad_norm": 3.3821778297424316, "learning_rate": 1.9420672568409913e-06, "loss": 0.9565, "step": 14327 }, { "epoch": 0.13562915913329104, "grad_norm": 218.5395050048828, "learning_rate": 1.9420569728479234e-06, "loss": 16.2031, "step": 14328 }, { "epoch": 0.1356386251550061, "grad_norm": 462.84063720703125, "learning_rate": 1.9420466879693825e-06, "loss": 21.1484, "step": 14329 }, { "epoch": 0.13564809117672116, "grad_norm": 174.74119567871094, "learning_rate": 1.9420364022053796e-06, "loss": 19.8047, "step": 14330 }, { "epoch": 0.13565755719843622, "grad_norm": 591.1915283203125, "learning_rate": 1.9420261155559236e-06, "loss": 40.0547, "step": 14331 }, { "epoch": 0.13566702322015126, "grad_norm": 2.5343663692474365, "learning_rate": 1.9420158280210242e-06, "loss": 0.9028, "step": 14332 }, { "epoch": 0.13567648924186632, "grad_norm": 453.20965576171875, "learning_rate": 1.9420055396006913e-06, "loss": 40.5938, "step": 14333 }, { "epoch": 0.13568595526358138, "grad_norm": 528.5856323242188, "learning_rate": 1.9419952502949345e-06, "loss": 40.0469, "step": 14334 }, { "epoch": 0.13569542128529644, "grad_norm": 422.9614562988281, "learning_rate": 1.9419849601037635e-06, "loss": 24.2578, "step": 14335 }, { "epoch": 0.1357048873070115, "grad_norm": 2.8967514038085938, "learning_rate": 1.9419746690271878e-06, "loss": 1.0, "step": 14336 }, { "epoch": 0.13571435332872653, "grad_norm": 321.081787109375, "learning_rate": 1.9419643770652174e-06, "loss": 26.4688, "step": 14337 }, { "epoch": 0.1357238193504416, "grad_norm": 281.95123291015625, "learning_rate": 1.9419540842178616e-06, "loss": 28.0938, "step": 14338 }, { "epoch": 0.13573328537215665, "grad_norm": 338.8901062011719, "learning_rate": 1.94194379048513e-06, "loss": 20.2656, "step": 14339 }, { "epoch": 0.1357427513938717, "grad_norm": 282.5542297363281, "learning_rate": 1.9419334958670327e-06, "loss": 20.5312, "step": 14340 }, { "epoch": 0.13575221741558674, "grad_norm": 2.748065233230591, "learning_rate": 1.9419232003635794e-06, "loss": 0.9019, "step": 14341 }, { "epoch": 0.1357616834373018, "grad_norm": 216.1139678955078, "learning_rate": 1.9419129039747794e-06, "loss": 13.6016, "step": 14342 }, { "epoch": 0.13577114945901686, "grad_norm": 216.45445251464844, "learning_rate": 1.9419026067006427e-06, "loss": 17.3516, "step": 14343 }, { "epoch": 0.13578061548073192, "grad_norm": 391.2931213378906, "learning_rate": 1.9418923085411783e-06, "loss": 14.1133, "step": 14344 }, { "epoch": 0.13579008150244698, "grad_norm": 162.72409057617188, "learning_rate": 1.941882009496397e-06, "loss": 15.5156, "step": 14345 }, { "epoch": 0.135799547524162, "grad_norm": 3.540029287338257, "learning_rate": 1.9418717095663078e-06, "loss": 1.0161, "step": 14346 }, { "epoch": 0.13580901354587707, "grad_norm": 350.5783996582031, "learning_rate": 1.94186140875092e-06, "loss": 19.9453, "step": 14347 }, { "epoch": 0.13581847956759213, "grad_norm": 176.37191772460938, "learning_rate": 1.9418511070502445e-06, "loss": 6.2266, "step": 14348 }, { "epoch": 0.1358279455893072, "grad_norm": 569.4752807617188, "learning_rate": 1.9418408044642897e-06, "loss": 15.5625, "step": 14349 }, { "epoch": 0.13583741161102222, "grad_norm": 349.7993469238281, "learning_rate": 1.941830500993066e-06, "loss": 12.3398, "step": 14350 }, { "epoch": 0.13584687763273728, "grad_norm": 300.9669189453125, "learning_rate": 1.9418201966365827e-06, "loss": 12.8438, "step": 14351 }, { "epoch": 0.13585634365445234, "grad_norm": 436.9273681640625, "learning_rate": 1.9418098913948497e-06, "loss": 33.0703, "step": 14352 }, { "epoch": 0.1358658096761674, "grad_norm": 246.7513885498047, "learning_rate": 1.941799585267877e-06, "loss": 24.1797, "step": 14353 }, { "epoch": 0.13587527569788246, "grad_norm": 319.73773193359375, "learning_rate": 1.9417892782556736e-06, "loss": 43.1406, "step": 14354 }, { "epoch": 0.1358847417195975, "grad_norm": 1370.270263671875, "learning_rate": 1.9417789703582496e-06, "loss": 58.3438, "step": 14355 }, { "epoch": 0.13589420774131256, "grad_norm": 733.3107299804688, "learning_rate": 1.9417686615756147e-06, "loss": 39.1797, "step": 14356 }, { "epoch": 0.13590367376302762, "grad_norm": 1589.033935546875, "learning_rate": 1.9417583519077783e-06, "loss": 34.4219, "step": 14357 }, { "epoch": 0.13591313978474268, "grad_norm": 419.569580078125, "learning_rate": 1.9417480413547506e-06, "loss": 39.0469, "step": 14358 }, { "epoch": 0.1359226058064577, "grad_norm": 3.1426379680633545, "learning_rate": 1.9417377299165413e-06, "loss": 1.1025, "step": 14359 }, { "epoch": 0.13593207182817277, "grad_norm": 184.78567504882812, "learning_rate": 1.941727417593159e-06, "loss": 14.6797, "step": 14360 }, { "epoch": 0.13594153784988783, "grad_norm": 368.5979919433594, "learning_rate": 1.941717104384615e-06, "loss": 45.1016, "step": 14361 }, { "epoch": 0.1359510038716029, "grad_norm": 317.9703674316406, "learning_rate": 1.941706790290918e-06, "loss": 33.5469, "step": 14362 }, { "epoch": 0.13596046989331795, "grad_norm": 247.11314392089844, "learning_rate": 1.9416964753120774e-06, "loss": 29.9375, "step": 14363 }, { "epoch": 0.13596993591503298, "grad_norm": 393.7843017578125, "learning_rate": 1.9416861594481036e-06, "loss": 8.1211, "step": 14364 }, { "epoch": 0.13597940193674804, "grad_norm": 195.6481170654297, "learning_rate": 1.941675842699006e-06, "loss": 12.6172, "step": 14365 }, { "epoch": 0.1359888679584631, "grad_norm": 765.937255859375, "learning_rate": 1.941665525064795e-06, "loss": 45.5859, "step": 14366 }, { "epoch": 0.13599833398017816, "grad_norm": 389.0563049316406, "learning_rate": 1.9416552065454794e-06, "loss": 39.7422, "step": 14367 }, { "epoch": 0.1360078000018932, "grad_norm": 306.54901123046875, "learning_rate": 1.9416448871410687e-06, "loss": 41.8594, "step": 14368 }, { "epoch": 0.13601726602360825, "grad_norm": 355.2373352050781, "learning_rate": 1.941634566851573e-06, "loss": 18.3516, "step": 14369 }, { "epoch": 0.1360267320453233, "grad_norm": 639.3895874023438, "learning_rate": 1.941624245677003e-06, "loss": 54.8672, "step": 14370 }, { "epoch": 0.13603619806703837, "grad_norm": 420.28448486328125, "learning_rate": 1.9416139236173663e-06, "loss": 21.6875, "step": 14371 }, { "epoch": 0.13604566408875343, "grad_norm": 424.9850769042969, "learning_rate": 1.9416036006726743e-06, "loss": 43.7812, "step": 14372 }, { "epoch": 0.13605513011046846, "grad_norm": 455.503662109375, "learning_rate": 1.941593276842936e-06, "loss": 34.1875, "step": 14373 }, { "epoch": 0.13606459613218352, "grad_norm": 589.6475219726562, "learning_rate": 1.9415829521281618e-06, "loss": 51.1094, "step": 14374 }, { "epoch": 0.13607406215389858, "grad_norm": 602.8186645507812, "learning_rate": 1.9415726265283607e-06, "loss": 48.9219, "step": 14375 }, { "epoch": 0.13608352817561364, "grad_norm": 347.6571960449219, "learning_rate": 1.9415623000435424e-06, "loss": 36.375, "step": 14376 }, { "epoch": 0.13609299419732868, "grad_norm": 377.8717346191406, "learning_rate": 1.9415519726737165e-06, "loss": 33.8594, "step": 14377 }, { "epoch": 0.13610246021904374, "grad_norm": 175.28419494628906, "learning_rate": 1.9415416444188936e-06, "loss": 8.2695, "step": 14378 }, { "epoch": 0.1361119262407588, "grad_norm": 328.6724853515625, "learning_rate": 1.9415313152790823e-06, "loss": 40.9766, "step": 14379 }, { "epoch": 0.13612139226247386, "grad_norm": 2943.770263671875, "learning_rate": 1.9415209852542934e-06, "loss": 25.9531, "step": 14380 }, { "epoch": 0.13613085828418892, "grad_norm": 382.8528747558594, "learning_rate": 1.9415106543445356e-06, "loss": 33.4531, "step": 14381 }, { "epoch": 0.13614032430590395, "grad_norm": 321.88623046875, "learning_rate": 1.941500322549819e-06, "loss": 30.457, "step": 14382 }, { "epoch": 0.136149790327619, "grad_norm": 201.5207061767578, "learning_rate": 1.9414899898701535e-06, "loss": 10.8047, "step": 14383 }, { "epoch": 0.13615925634933407, "grad_norm": 459.3773193359375, "learning_rate": 1.9414796563055488e-06, "loss": 50.0234, "step": 14384 }, { "epoch": 0.13616872237104913, "grad_norm": 222.8262939453125, "learning_rate": 1.941469321856014e-06, "loss": 25.4531, "step": 14385 }, { "epoch": 0.13617818839276416, "grad_norm": 264.5660705566406, "learning_rate": 1.9414589865215593e-06, "loss": 18.7031, "step": 14386 }, { "epoch": 0.13618765441447922, "grad_norm": 590.0133666992188, "learning_rate": 1.941448650302195e-06, "loss": 35.5469, "step": 14387 }, { "epoch": 0.13619712043619428, "grad_norm": 332.54998779296875, "learning_rate": 1.9414383131979297e-06, "loss": 11.0586, "step": 14388 }, { "epoch": 0.13620658645790934, "grad_norm": 740.3516845703125, "learning_rate": 1.941427975208774e-06, "loss": 43.8516, "step": 14389 }, { "epoch": 0.1362160524796244, "grad_norm": 504.6856994628906, "learning_rate": 1.9414176363347366e-06, "loss": 44.4922, "step": 14390 }, { "epoch": 0.13622551850133943, "grad_norm": 478.0530090332031, "learning_rate": 1.9414072965758286e-06, "loss": 53.2969, "step": 14391 }, { "epoch": 0.1362349845230545, "grad_norm": 200.093505859375, "learning_rate": 1.9413969559320588e-06, "loss": 17.2344, "step": 14392 }, { "epoch": 0.13624445054476955, "grad_norm": 555.2085571289062, "learning_rate": 1.941386614403437e-06, "loss": 54.2188, "step": 14393 }, { "epoch": 0.1362539165664846, "grad_norm": 1164.4610595703125, "learning_rate": 1.941376271989973e-06, "loss": 46.4805, "step": 14394 }, { "epoch": 0.13626338258819964, "grad_norm": 461.6676940917969, "learning_rate": 1.9413659286916768e-06, "loss": 27.2656, "step": 14395 }, { "epoch": 0.1362728486099147, "grad_norm": 473.1082763671875, "learning_rate": 1.9413555845085573e-06, "loss": 29.7812, "step": 14396 }, { "epoch": 0.13628231463162976, "grad_norm": 270.583740234375, "learning_rate": 1.941345239440625e-06, "loss": 18.9297, "step": 14397 }, { "epoch": 0.13629178065334482, "grad_norm": 387.0010986328125, "learning_rate": 1.94133489348789e-06, "loss": 10.3125, "step": 14398 }, { "epoch": 0.13630124667505988, "grad_norm": 603.7037963867188, "learning_rate": 1.9413245466503606e-06, "loss": 12.4727, "step": 14399 }, { "epoch": 0.13631071269677492, "grad_norm": 366.6186828613281, "learning_rate": 1.941314198928048e-06, "loss": 15.2344, "step": 14400 }, { "epoch": 0.13632017871848998, "grad_norm": 350.64312744140625, "learning_rate": 1.941303850320961e-06, "loss": 18.7109, "step": 14401 }, { "epoch": 0.13632964474020504, "grad_norm": 428.6097412109375, "learning_rate": 1.9412935008291094e-06, "loss": 41.7656, "step": 14402 }, { "epoch": 0.1363391107619201, "grad_norm": 578.7471923828125, "learning_rate": 1.9412831504525032e-06, "loss": 48.0, "step": 14403 }, { "epoch": 0.13634857678363513, "grad_norm": 613.8093872070312, "learning_rate": 1.9412727991911525e-06, "loss": 51.9219, "step": 14404 }, { "epoch": 0.1363580428053502, "grad_norm": 250.78173828125, "learning_rate": 1.941262447045066e-06, "loss": 29.0625, "step": 14405 }, { "epoch": 0.13636750882706525, "grad_norm": 603.5148315429688, "learning_rate": 1.9412520940142547e-06, "loss": 39.2266, "step": 14406 }, { "epoch": 0.1363769748487803, "grad_norm": 1130.936279296875, "learning_rate": 1.941241740098727e-06, "loss": 58.4219, "step": 14407 }, { "epoch": 0.13638644087049537, "grad_norm": 284.7001953125, "learning_rate": 1.941231385298494e-06, "loss": 22.4219, "step": 14408 }, { "epoch": 0.1363959068922104, "grad_norm": 703.0911865234375, "learning_rate": 1.9412210296135645e-06, "loss": 55.3438, "step": 14409 }, { "epoch": 0.13640537291392546, "grad_norm": 287.46197509765625, "learning_rate": 1.9412106730439485e-06, "loss": 15.6719, "step": 14410 }, { "epoch": 0.13641483893564052, "grad_norm": 372.4715270996094, "learning_rate": 1.9412003155896553e-06, "loss": 29.8984, "step": 14411 }, { "epoch": 0.13642430495735558, "grad_norm": 338.6893310546875, "learning_rate": 1.941189957250695e-06, "loss": 16.3906, "step": 14412 }, { "epoch": 0.1364337709790706, "grad_norm": 448.7368469238281, "learning_rate": 1.941179598027078e-06, "loss": 46.8438, "step": 14413 }, { "epoch": 0.13644323700078567, "grad_norm": 282.96319580078125, "learning_rate": 1.941169237918813e-06, "loss": 24.9297, "step": 14414 }, { "epoch": 0.13645270302250073, "grad_norm": 622.4090576171875, "learning_rate": 1.94115887692591e-06, "loss": 8.0, "step": 14415 }, { "epoch": 0.1364621690442158, "grad_norm": 547.6738891601562, "learning_rate": 1.941148515048379e-06, "loss": 53.1719, "step": 14416 }, { "epoch": 0.13647163506593085, "grad_norm": 340.5558166503906, "learning_rate": 1.9411381522862296e-06, "loss": 18.2227, "step": 14417 }, { "epoch": 0.13648110108764588, "grad_norm": 279.1901550292969, "learning_rate": 1.941127788639472e-06, "loss": 25.625, "step": 14418 }, { "epoch": 0.13649056710936094, "grad_norm": 435.7654113769531, "learning_rate": 1.9411174241081153e-06, "loss": 22.7344, "step": 14419 }, { "epoch": 0.136500033131076, "grad_norm": 355.8713684082031, "learning_rate": 1.941107058692169e-06, "loss": 34.9531, "step": 14420 }, { "epoch": 0.13650949915279106, "grad_norm": 768.5711059570312, "learning_rate": 1.9410966923916437e-06, "loss": 35.2617, "step": 14421 }, { "epoch": 0.13651896517450612, "grad_norm": 447.43719482421875, "learning_rate": 1.9410863252065488e-06, "loss": 32.8594, "step": 14422 }, { "epoch": 0.13652843119622116, "grad_norm": 346.3257141113281, "learning_rate": 1.941075957136894e-06, "loss": 22.3125, "step": 14423 }, { "epoch": 0.13653789721793622, "grad_norm": 249.5777130126953, "learning_rate": 1.9410655881826885e-06, "loss": 22.9453, "step": 14424 }, { "epoch": 0.13654736323965128, "grad_norm": 565.896728515625, "learning_rate": 1.9410552183439427e-06, "loss": 45.3359, "step": 14425 }, { "epoch": 0.13655682926136634, "grad_norm": 1192.61279296875, "learning_rate": 1.9410448476206667e-06, "loss": 43.7969, "step": 14426 }, { "epoch": 0.13656629528308137, "grad_norm": 252.94654846191406, "learning_rate": 1.9410344760128692e-06, "loss": 14.8516, "step": 14427 }, { "epoch": 0.13657576130479643, "grad_norm": 364.39715576171875, "learning_rate": 1.941024103520561e-06, "loss": 32.7188, "step": 14428 }, { "epoch": 0.1365852273265115, "grad_norm": 358.43499755859375, "learning_rate": 1.941013730143751e-06, "loss": 19.4922, "step": 14429 }, { "epoch": 0.13659469334822655, "grad_norm": 423.6853942871094, "learning_rate": 1.9410033558824496e-06, "loss": 22.0312, "step": 14430 }, { "epoch": 0.1366041593699416, "grad_norm": 673.2259521484375, "learning_rate": 1.940992980736666e-06, "loss": 49.5938, "step": 14431 }, { "epoch": 0.13661362539165664, "grad_norm": 500.832275390625, "learning_rate": 1.9409826047064104e-06, "loss": 21.6211, "step": 14432 }, { "epoch": 0.1366230914133717, "grad_norm": 428.1765441894531, "learning_rate": 1.9409722277916923e-06, "loss": 22.5, "step": 14433 }, { "epoch": 0.13663255743508676, "grad_norm": 330.9739074707031, "learning_rate": 1.940961849992521e-06, "loss": 17.6562, "step": 14434 }, { "epoch": 0.13664202345680182, "grad_norm": 314.803955078125, "learning_rate": 1.940951471308908e-06, "loss": 20.9609, "step": 14435 }, { "epoch": 0.13665148947851685, "grad_norm": 510.18927001953125, "learning_rate": 1.9409410917408608e-06, "loss": 33.4531, "step": 14436 }, { "epoch": 0.1366609555002319, "grad_norm": 607.7249755859375, "learning_rate": 1.9409307112883906e-06, "loss": 48.3672, "step": 14437 }, { "epoch": 0.13667042152194697, "grad_norm": 441.3402099609375, "learning_rate": 1.9409203299515067e-06, "loss": 25.5078, "step": 14438 }, { "epoch": 0.13667988754366203, "grad_norm": 451.9589538574219, "learning_rate": 1.9409099477302187e-06, "loss": 32.2031, "step": 14439 }, { "epoch": 0.1366893535653771, "grad_norm": 1083.9979248046875, "learning_rate": 1.940899564624537e-06, "loss": 45.2578, "step": 14440 }, { "epoch": 0.13669881958709212, "grad_norm": 470.5418395996094, "learning_rate": 1.94088918063447e-06, "loss": 45.0312, "step": 14441 }, { "epoch": 0.13670828560880718, "grad_norm": 191.46092224121094, "learning_rate": 1.9408787957600293e-06, "loss": 19.5, "step": 14442 }, { "epoch": 0.13671775163052224, "grad_norm": 217.44686889648438, "learning_rate": 1.9408684100012234e-06, "loss": 12.4805, "step": 14443 }, { "epoch": 0.1367272176522373, "grad_norm": 185.24842834472656, "learning_rate": 1.9408580233580626e-06, "loss": 14.6094, "step": 14444 }, { "epoch": 0.13673668367395234, "grad_norm": 351.1026306152344, "learning_rate": 1.9408476358305563e-06, "loss": 23.75, "step": 14445 }, { "epoch": 0.1367461496956674, "grad_norm": 588.9005737304688, "learning_rate": 1.9408372474187145e-06, "loss": 33.9531, "step": 14446 }, { "epoch": 0.13675561571738246, "grad_norm": 239.61715698242188, "learning_rate": 1.9408268581225465e-06, "loss": 20.9688, "step": 14447 }, { "epoch": 0.13676508173909752, "grad_norm": 510.9012145996094, "learning_rate": 1.940816467942063e-06, "loss": 27.5312, "step": 14448 }, { "epoch": 0.13677454776081258, "grad_norm": 357.74835205078125, "learning_rate": 1.940806076877273e-06, "loss": 37.875, "step": 14449 }, { "epoch": 0.1367840137825276, "grad_norm": 834.3912963867188, "learning_rate": 1.9407956849281865e-06, "loss": 49.3125, "step": 14450 }, { "epoch": 0.13679347980424267, "grad_norm": 820.1607055664062, "learning_rate": 1.940785292094813e-06, "loss": 43.0156, "step": 14451 }, { "epoch": 0.13680294582595773, "grad_norm": 204.52906799316406, "learning_rate": 1.940774898377163e-06, "loss": 18.4297, "step": 14452 }, { "epoch": 0.1368124118476728, "grad_norm": 465.4677734375, "learning_rate": 1.940764503775246e-06, "loss": 23.625, "step": 14453 }, { "epoch": 0.13682187786938782, "grad_norm": 270.3006286621094, "learning_rate": 1.940754108289071e-06, "loss": 19.5234, "step": 14454 }, { "epoch": 0.13683134389110288, "grad_norm": 496.26397705078125, "learning_rate": 1.9407437119186485e-06, "loss": 46.5781, "step": 14455 }, { "epoch": 0.13684080991281794, "grad_norm": 621.424560546875, "learning_rate": 1.940733314663988e-06, "loss": 36.2812, "step": 14456 }, { "epoch": 0.136850275934533, "grad_norm": 236.60400390625, "learning_rate": 1.9407229165250995e-06, "loss": 17.3789, "step": 14457 }, { "epoch": 0.13685974195624806, "grad_norm": 349.22100830078125, "learning_rate": 1.9407125175019927e-06, "loss": 23.5703, "step": 14458 }, { "epoch": 0.1368692079779631, "grad_norm": 449.05224609375, "learning_rate": 1.9407021175946775e-06, "loss": 39.2578, "step": 14459 }, { "epoch": 0.13687867399967815, "grad_norm": 463.70794677734375, "learning_rate": 1.9406917168031633e-06, "loss": 27.0273, "step": 14460 }, { "epoch": 0.1368881400213932, "grad_norm": 360.3856201171875, "learning_rate": 1.94068131512746e-06, "loss": 28.7109, "step": 14461 }, { "epoch": 0.13689760604310827, "grad_norm": 3.029500961303711, "learning_rate": 1.940670912567577e-06, "loss": 1.0229, "step": 14462 }, { "epoch": 0.1369070720648233, "grad_norm": 641.0867919921875, "learning_rate": 1.9406605091235255e-06, "loss": 20.9805, "step": 14463 }, { "epoch": 0.13691653808653836, "grad_norm": 453.7593688964844, "learning_rate": 1.9406501047953137e-06, "loss": 24.7188, "step": 14464 }, { "epoch": 0.13692600410825342, "grad_norm": 3.157745838165283, "learning_rate": 1.9406396995829524e-06, "loss": 0.8594, "step": 14465 }, { "epoch": 0.13693547012996848, "grad_norm": 443.2610168457031, "learning_rate": 1.9406292934864506e-06, "loss": 68.9688, "step": 14466 }, { "epoch": 0.13694493615168354, "grad_norm": 425.0729064941406, "learning_rate": 1.9406188865058186e-06, "loss": 18.5703, "step": 14467 }, { "epoch": 0.13695440217339858, "grad_norm": 378.45648193359375, "learning_rate": 1.940608478641066e-06, "loss": 17.957, "step": 14468 }, { "epoch": 0.13696386819511364, "grad_norm": 593.649658203125, "learning_rate": 1.9405980698922024e-06, "loss": 22.75, "step": 14469 }, { "epoch": 0.1369733342168287, "grad_norm": 2.674243211746216, "learning_rate": 1.9405876602592385e-06, "loss": 0.8962, "step": 14470 }, { "epoch": 0.13698280023854376, "grad_norm": 2.755335569381714, "learning_rate": 1.940577249742183e-06, "loss": 0.8269, "step": 14471 }, { "epoch": 0.1369922662602588, "grad_norm": 496.1726379394531, "learning_rate": 1.940566838341046e-06, "loss": 20.4062, "step": 14472 }, { "epoch": 0.13700173228197385, "grad_norm": 1147.906494140625, "learning_rate": 1.940556426055837e-06, "loss": 24.8906, "step": 14473 }, { "epoch": 0.1370111983036889, "grad_norm": 756.1587524414062, "learning_rate": 1.940546012886567e-06, "loss": 41.8203, "step": 14474 }, { "epoch": 0.13702066432540397, "grad_norm": 204.9695281982422, "learning_rate": 1.940535598833244e-06, "loss": 16.75, "step": 14475 }, { "epoch": 0.13703013034711903, "grad_norm": 495.5716247558594, "learning_rate": 1.9405251838958795e-06, "loss": 45.7969, "step": 14476 }, { "epoch": 0.13703959636883406, "grad_norm": 282.9782409667969, "learning_rate": 1.940514768074482e-06, "loss": 18.5781, "step": 14477 }, { "epoch": 0.13704906239054912, "grad_norm": 454.7116394042969, "learning_rate": 1.940504351369062e-06, "loss": 10.668, "step": 14478 }, { "epoch": 0.13705852841226418, "grad_norm": 433.0386657714844, "learning_rate": 1.9404939337796292e-06, "loss": 33.7188, "step": 14479 }, { "epoch": 0.13706799443397924, "grad_norm": 3.1748204231262207, "learning_rate": 1.940483515306193e-06, "loss": 0.8975, "step": 14480 }, { "epoch": 0.13707746045569427, "grad_norm": 314.2892761230469, "learning_rate": 1.940473095948764e-06, "loss": 20.2539, "step": 14481 }, { "epoch": 0.13708692647740933, "grad_norm": 457.5575256347656, "learning_rate": 1.940462675707351e-06, "loss": 45.5156, "step": 14482 }, { "epoch": 0.1370963924991244, "grad_norm": 3.34739089012146, "learning_rate": 1.9404522545819643e-06, "loss": 1.0029, "step": 14483 }, { "epoch": 0.13710585852083945, "grad_norm": 146.04843139648438, "learning_rate": 1.9404418325726135e-06, "loss": 20.3281, "step": 14484 }, { "epoch": 0.1371153245425545, "grad_norm": 354.4349060058594, "learning_rate": 1.9404314096793094e-06, "loss": 23.9453, "step": 14485 }, { "epoch": 0.13712479056426954, "grad_norm": 222.0786590576172, "learning_rate": 1.9404209859020603e-06, "loss": 10.3594, "step": 14486 }, { "epoch": 0.1371342565859846, "grad_norm": 382.436767578125, "learning_rate": 1.9404105612408764e-06, "loss": 19.0156, "step": 14487 }, { "epoch": 0.13714372260769966, "grad_norm": 540.5431518554688, "learning_rate": 1.9404001356957684e-06, "loss": 50.4219, "step": 14488 }, { "epoch": 0.13715318862941472, "grad_norm": 189.47349548339844, "learning_rate": 1.940389709266745e-06, "loss": 15.0781, "step": 14489 }, { "epoch": 0.13716265465112976, "grad_norm": 511.69537353515625, "learning_rate": 1.940379281953817e-06, "loss": 24.2578, "step": 14490 }, { "epoch": 0.13717212067284482, "grad_norm": 467.4491882324219, "learning_rate": 1.940368853756993e-06, "loss": 25.3672, "step": 14491 }, { "epoch": 0.13718158669455988, "grad_norm": 541.051025390625, "learning_rate": 1.9403584246762834e-06, "loss": 44.2422, "step": 14492 }, { "epoch": 0.13719105271627494, "grad_norm": 266.64813232421875, "learning_rate": 1.9403479947116984e-06, "loss": 20.125, "step": 14493 }, { "epoch": 0.13720051873799, "grad_norm": 686.5930786132812, "learning_rate": 1.9403375638632476e-06, "loss": 52.2656, "step": 14494 }, { "epoch": 0.13720998475970503, "grad_norm": 1744.3304443359375, "learning_rate": 1.9403271321309403e-06, "loss": 57.9531, "step": 14495 }, { "epoch": 0.1372194507814201, "grad_norm": 295.7391052246094, "learning_rate": 1.940316699514787e-06, "loss": 28.4141, "step": 14496 }, { "epoch": 0.13722891680313515, "grad_norm": 431.36322021484375, "learning_rate": 1.9403062660147968e-06, "loss": 11.9727, "step": 14497 }, { "epoch": 0.1372383828248502, "grad_norm": 660.0068359375, "learning_rate": 1.94029583163098e-06, "loss": 21.5312, "step": 14498 }, { "epoch": 0.13724784884656524, "grad_norm": 229.7149200439453, "learning_rate": 1.9402853963633465e-06, "loss": 19.7734, "step": 14499 }, { "epoch": 0.1372573148682803, "grad_norm": 3.157325506210327, "learning_rate": 1.9402749602119057e-06, "loss": 0.939, "step": 14500 }, { "epoch": 0.13726678088999536, "grad_norm": 420.3076171875, "learning_rate": 1.940264523176668e-06, "loss": 30.6641, "step": 14501 }, { "epoch": 0.13727624691171042, "grad_norm": 219.36309814453125, "learning_rate": 1.940254085257642e-06, "loss": 17.2891, "step": 14502 }, { "epoch": 0.13728571293342548, "grad_norm": 425.1888732910156, "learning_rate": 1.940243646454839e-06, "loss": 52.4531, "step": 14503 }, { "epoch": 0.1372951789551405, "grad_norm": 708.7177124023438, "learning_rate": 1.940233206768268e-06, "loss": 19.5117, "step": 14504 }, { "epoch": 0.13730464497685557, "grad_norm": 496.3577575683594, "learning_rate": 1.9402227661979388e-06, "loss": 49.3125, "step": 14505 }, { "epoch": 0.13731411099857063, "grad_norm": 426.8575744628906, "learning_rate": 1.9402123247438617e-06, "loss": 19.1719, "step": 14506 }, { "epoch": 0.1373235770202857, "grad_norm": 191.63951110839844, "learning_rate": 1.940201882406046e-06, "loss": 20.7578, "step": 14507 }, { "epoch": 0.13733304304200075, "grad_norm": 522.3524780273438, "learning_rate": 1.940191439184501e-06, "loss": 34.3672, "step": 14508 }, { "epoch": 0.13734250906371578, "grad_norm": 513.3342895507812, "learning_rate": 1.940180995079238e-06, "loss": 20.3672, "step": 14509 }, { "epoch": 0.13735197508543084, "grad_norm": 400.19683837890625, "learning_rate": 1.940170550090266e-06, "loss": 40.4375, "step": 14510 }, { "epoch": 0.1373614411071459, "grad_norm": 541.7872314453125, "learning_rate": 1.940160104217595e-06, "loss": 37.2812, "step": 14511 }, { "epoch": 0.13737090712886096, "grad_norm": 575.3005981445312, "learning_rate": 1.9401496574612343e-06, "loss": 45.6094, "step": 14512 }, { "epoch": 0.137380373150576, "grad_norm": 779.1284790039062, "learning_rate": 1.9401392098211943e-06, "loss": 25.0234, "step": 14513 }, { "epoch": 0.13738983917229106, "grad_norm": 3.790364980697632, "learning_rate": 1.9401287612974842e-06, "loss": 1.0581, "step": 14514 }, { "epoch": 0.13739930519400612, "grad_norm": 544.45703125, "learning_rate": 1.9401183118901146e-06, "loss": 61.1328, "step": 14515 }, { "epoch": 0.13740877121572118, "grad_norm": 391.787353515625, "learning_rate": 1.940107861599095e-06, "loss": 48.0, "step": 14516 }, { "epoch": 0.13741823723743624, "grad_norm": 554.7273559570312, "learning_rate": 1.940097410424435e-06, "loss": 41.0859, "step": 14517 }, { "epoch": 0.13742770325915127, "grad_norm": 230.86343383789062, "learning_rate": 1.9400869583661446e-06, "loss": 8.9766, "step": 14518 }, { "epoch": 0.13743716928086633, "grad_norm": 548.0765380859375, "learning_rate": 1.9400765054242336e-06, "loss": 34.9453, "step": 14519 }, { "epoch": 0.1374466353025814, "grad_norm": 423.25982666015625, "learning_rate": 1.9400660515987117e-06, "loss": 23.2344, "step": 14520 }, { "epoch": 0.13745610132429645, "grad_norm": 2.9143121242523193, "learning_rate": 1.9400555968895893e-06, "loss": 0.8862, "step": 14521 }, { "epoch": 0.13746556734601148, "grad_norm": 434.7381286621094, "learning_rate": 1.9400451412968755e-06, "loss": 45.0781, "step": 14522 }, { "epoch": 0.13747503336772654, "grad_norm": 391.5168762207031, "learning_rate": 1.9400346848205806e-06, "loss": 9.6094, "step": 14523 }, { "epoch": 0.1374844993894416, "grad_norm": 2.711182117462158, "learning_rate": 1.940024227460714e-06, "loss": 0.9448, "step": 14524 }, { "epoch": 0.13749396541115666, "grad_norm": 301.67889404296875, "learning_rate": 1.940013769217286e-06, "loss": 17.625, "step": 14525 }, { "epoch": 0.13750343143287172, "grad_norm": 315.4129943847656, "learning_rate": 1.9400033100903056e-06, "loss": 30.9297, "step": 14526 }, { "epoch": 0.13751289745458675, "grad_norm": 215.34165954589844, "learning_rate": 1.939992850079784e-06, "loss": 15.7578, "step": 14527 }, { "epoch": 0.1375223634763018, "grad_norm": 293.5810546875, "learning_rate": 1.9399823891857298e-06, "loss": 32.7031, "step": 14528 }, { "epoch": 0.13753182949801687, "grad_norm": 3.564134359359741, "learning_rate": 1.939971927408154e-06, "loss": 1.0027, "step": 14529 }, { "epoch": 0.13754129551973193, "grad_norm": 341.1619873046875, "learning_rate": 1.939961464747065e-06, "loss": 21.5156, "step": 14530 }, { "epoch": 0.13755076154144696, "grad_norm": 287.5250549316406, "learning_rate": 1.939951001202473e-06, "loss": 20.7109, "step": 14531 }, { "epoch": 0.13756022756316202, "grad_norm": 194.63169860839844, "learning_rate": 1.939940536774389e-06, "loss": 15.5078, "step": 14532 }, { "epoch": 0.13756969358487708, "grad_norm": 561.3964233398438, "learning_rate": 1.9399300714628224e-06, "loss": 39.8672, "step": 14533 }, { "epoch": 0.13757915960659214, "grad_norm": 640.3809204101562, "learning_rate": 1.9399196052677815e-06, "loss": 39.3125, "step": 14534 }, { "epoch": 0.1375886256283072, "grad_norm": 231.7581329345703, "learning_rate": 1.939909138189278e-06, "loss": 13.9688, "step": 14535 }, { "epoch": 0.13759809165002224, "grad_norm": 826.9564819335938, "learning_rate": 1.939898670227321e-06, "loss": 62.7734, "step": 14536 }, { "epoch": 0.1376075576717373, "grad_norm": 479.51385498046875, "learning_rate": 1.9398882013819205e-06, "loss": 23.6094, "step": 14537 }, { "epoch": 0.13761702369345236, "grad_norm": 430.9534606933594, "learning_rate": 1.939877731653086e-06, "loss": 44.2188, "step": 14538 }, { "epoch": 0.13762648971516742, "grad_norm": 289.9022216796875, "learning_rate": 1.939867261040828e-06, "loss": 17.3906, "step": 14539 }, { "epoch": 0.13763595573688245, "grad_norm": 268.0016174316406, "learning_rate": 1.9398567895451556e-06, "loss": 17.3281, "step": 14540 }, { "epoch": 0.1376454217585975, "grad_norm": 348.9818115234375, "learning_rate": 1.939846317166079e-06, "loss": 30.9688, "step": 14541 }, { "epoch": 0.13765488778031257, "grad_norm": 248.12713623046875, "learning_rate": 1.939835843903608e-06, "loss": 24.9766, "step": 14542 }, { "epoch": 0.13766435380202763, "grad_norm": 291.0238037109375, "learning_rate": 1.9398253697577523e-06, "loss": 22.8281, "step": 14543 }, { "epoch": 0.1376738198237427, "grad_norm": 433.50347900390625, "learning_rate": 1.939814894728522e-06, "loss": 46.6172, "step": 14544 }, { "epoch": 0.13768328584545772, "grad_norm": 306.7830810546875, "learning_rate": 1.939804418815927e-06, "loss": 9.793, "step": 14545 }, { "epoch": 0.13769275186717278, "grad_norm": 591.6935424804688, "learning_rate": 1.9397939420199767e-06, "loss": 24.7344, "step": 14546 }, { "epoch": 0.13770221788888784, "grad_norm": 427.2427978515625, "learning_rate": 1.9397834643406815e-06, "loss": 20.4141, "step": 14547 }, { "epoch": 0.1377116839106029, "grad_norm": 221.27027893066406, "learning_rate": 1.9397729857780508e-06, "loss": 14.2266, "step": 14548 }, { "epoch": 0.13772114993231793, "grad_norm": 831.1246948242188, "learning_rate": 1.9397625063320947e-06, "loss": 43.9688, "step": 14549 }, { "epoch": 0.137730615954033, "grad_norm": 256.3222961425781, "learning_rate": 1.939752026002823e-06, "loss": 17.2109, "step": 14550 }, { "epoch": 0.13774008197574805, "grad_norm": 520.4042358398438, "learning_rate": 1.9397415447902455e-06, "loss": 48.1953, "step": 14551 }, { "epoch": 0.1377495479974631, "grad_norm": 486.5082092285156, "learning_rate": 1.9397310626943723e-06, "loss": 20.625, "step": 14552 }, { "epoch": 0.13775901401917817, "grad_norm": 188.5352325439453, "learning_rate": 1.9397205797152126e-06, "loss": 17.5234, "step": 14553 }, { "epoch": 0.1377684800408932, "grad_norm": 185.6084747314453, "learning_rate": 1.939710095852777e-06, "loss": 19.1289, "step": 14554 }, { "epoch": 0.13777794606260826, "grad_norm": 2.90786075592041, "learning_rate": 1.9396996111070753e-06, "loss": 0.8301, "step": 14555 }, { "epoch": 0.13778741208432332, "grad_norm": 386.15484619140625, "learning_rate": 1.9396891254781163e-06, "loss": 27.0625, "step": 14556 }, { "epoch": 0.13779687810603838, "grad_norm": 473.3148498535156, "learning_rate": 1.939678638965911e-06, "loss": 17.8047, "step": 14557 }, { "epoch": 0.13780634412775342, "grad_norm": 3.382570743560791, "learning_rate": 1.939668151570469e-06, "loss": 0.9507, "step": 14558 }, { "epoch": 0.13781581014946848, "grad_norm": 429.8518371582031, "learning_rate": 1.9396576632918004e-06, "loss": 28.2812, "step": 14559 }, { "epoch": 0.13782527617118354, "grad_norm": 198.17893981933594, "learning_rate": 1.9396471741299144e-06, "loss": 16.4453, "step": 14560 }, { "epoch": 0.1378347421928986, "grad_norm": 285.2873229980469, "learning_rate": 1.939636684084821e-06, "loss": 17.5312, "step": 14561 }, { "epoch": 0.13784420821461366, "grad_norm": 370.8929138183594, "learning_rate": 1.9396261931565305e-06, "loss": 55.6719, "step": 14562 }, { "epoch": 0.1378536742363287, "grad_norm": 384.3174133300781, "learning_rate": 1.9396157013450526e-06, "loss": 20.5078, "step": 14563 }, { "epoch": 0.13786314025804375, "grad_norm": 233.0722198486328, "learning_rate": 1.9396052086503967e-06, "loss": 34.1289, "step": 14564 }, { "epoch": 0.1378726062797588, "grad_norm": 3.177105665206909, "learning_rate": 1.9395947150725734e-06, "loss": 0.8604, "step": 14565 }, { "epoch": 0.13788207230147387, "grad_norm": 539.3713989257812, "learning_rate": 1.939584220611592e-06, "loss": 19.4297, "step": 14566 }, { "epoch": 0.1378915383231889, "grad_norm": 432.9978942871094, "learning_rate": 1.939573725267463e-06, "loss": 12.8203, "step": 14567 }, { "epoch": 0.13790100434490396, "grad_norm": 483.4571838378906, "learning_rate": 1.939563229040195e-06, "loss": 26.5352, "step": 14568 }, { "epoch": 0.13791047036661902, "grad_norm": 416.947021484375, "learning_rate": 1.9395527319297993e-06, "loss": 11.9531, "step": 14569 }, { "epoch": 0.13791993638833408, "grad_norm": 287.9900817871094, "learning_rate": 1.939542233936285e-06, "loss": 12.2266, "step": 14570 }, { "epoch": 0.13792940241004914, "grad_norm": 694.2150268554688, "learning_rate": 1.939531735059662e-06, "loss": 44.0, "step": 14571 }, { "epoch": 0.13793886843176417, "grad_norm": 330.19573974609375, "learning_rate": 1.93952123529994e-06, "loss": 15.0625, "step": 14572 }, { "epoch": 0.13794833445347923, "grad_norm": 255.7476348876953, "learning_rate": 1.9395107346571294e-06, "loss": 22.8203, "step": 14573 }, { "epoch": 0.1379578004751943, "grad_norm": 375.4814147949219, "learning_rate": 1.9395002331312403e-06, "loss": 25.5, "step": 14574 }, { "epoch": 0.13796726649690935, "grad_norm": 568.2471923828125, "learning_rate": 1.9394897307222815e-06, "loss": 35.5078, "step": 14575 }, { "epoch": 0.13797673251862438, "grad_norm": 215.6570281982422, "learning_rate": 1.939479227430264e-06, "loss": 19.4844, "step": 14576 }, { "epoch": 0.13798619854033944, "grad_norm": 415.9028015136719, "learning_rate": 1.9394687232551966e-06, "loss": 14.5, "step": 14577 }, { "epoch": 0.1379956645620545, "grad_norm": 2.813129186630249, "learning_rate": 1.9394582181970902e-06, "loss": 0.8262, "step": 14578 }, { "epoch": 0.13800513058376956, "grad_norm": 3.7464773654937744, "learning_rate": 1.9394477122559537e-06, "loss": 0.9429, "step": 14579 }, { "epoch": 0.13801459660548462, "grad_norm": 527.9346313476562, "learning_rate": 1.939437205431798e-06, "loss": 33.3594, "step": 14580 }, { "epoch": 0.13802406262719966, "grad_norm": 874.8916625976562, "learning_rate": 1.9394266977246322e-06, "loss": 60.7031, "step": 14581 }, { "epoch": 0.13803352864891472, "grad_norm": 3.212329626083374, "learning_rate": 1.9394161891344662e-06, "loss": 0.7817, "step": 14582 }, { "epoch": 0.13804299467062978, "grad_norm": 242.2941131591797, "learning_rate": 1.9394056796613104e-06, "loss": 22.5469, "step": 14583 }, { "epoch": 0.13805246069234484, "grad_norm": 1113.37353515625, "learning_rate": 1.9393951693051743e-06, "loss": 56.1875, "step": 14584 }, { "epoch": 0.13806192671405987, "grad_norm": 2.8500778675079346, "learning_rate": 1.9393846580660678e-06, "loss": 0.7788, "step": 14585 }, { "epoch": 0.13807139273577493, "grad_norm": 731.5200805664062, "learning_rate": 1.9393741459440006e-06, "loss": 14.4961, "step": 14586 }, { "epoch": 0.13808085875749, "grad_norm": 734.0994262695312, "learning_rate": 1.9393636329389833e-06, "loss": 34.0781, "step": 14587 }, { "epoch": 0.13809032477920505, "grad_norm": 306.208984375, "learning_rate": 1.939353119051025e-06, "loss": 21.2812, "step": 14588 }, { "epoch": 0.1380997908009201, "grad_norm": 959.3610229492188, "learning_rate": 1.939342604280136e-06, "loss": 60.375, "step": 14589 }, { "epoch": 0.13810925682263514, "grad_norm": 360.4083557128906, "learning_rate": 1.939332088626326e-06, "loss": 12.8906, "step": 14590 }, { "epoch": 0.1381187228443502, "grad_norm": 223.05877685546875, "learning_rate": 1.9393215720896048e-06, "loss": 20.2031, "step": 14591 }, { "epoch": 0.13812818886606526, "grad_norm": 778.7549438476562, "learning_rate": 1.9393110546699826e-06, "loss": 44.7031, "step": 14592 }, { "epoch": 0.13813765488778032, "grad_norm": 250.13792419433594, "learning_rate": 1.939300536367469e-06, "loss": 12.3984, "step": 14593 }, { "epoch": 0.13814712090949538, "grad_norm": 595.5359497070312, "learning_rate": 1.9392900171820743e-06, "loss": 51.5938, "step": 14594 }, { "epoch": 0.1381565869312104, "grad_norm": 914.6290283203125, "learning_rate": 1.9392794971138076e-06, "loss": 61.75, "step": 14595 }, { "epoch": 0.13816605295292547, "grad_norm": 726.3569946289062, "learning_rate": 1.93926897616268e-06, "loss": 47.9531, "step": 14596 }, { "epoch": 0.13817551897464053, "grad_norm": 475.15155029296875, "learning_rate": 1.9392584543286997e-06, "loss": 37.2188, "step": 14597 }, { "epoch": 0.1381849849963556, "grad_norm": 570.043212890625, "learning_rate": 1.9392479316118783e-06, "loss": 50.1875, "step": 14598 }, { "epoch": 0.13819445101807062, "grad_norm": 379.0234069824219, "learning_rate": 1.9392374080122246e-06, "loss": 12.9219, "step": 14599 }, { "epoch": 0.13820391703978568, "grad_norm": 684.9772338867188, "learning_rate": 1.939226883529749e-06, "loss": 69.2305, "step": 14600 }, { "epoch": 0.13821338306150074, "grad_norm": 3.2398524284362793, "learning_rate": 1.939216358164461e-06, "loss": 0.9194, "step": 14601 }, { "epoch": 0.1382228490832158, "grad_norm": 206.59017944335938, "learning_rate": 1.939205831916371e-06, "loss": 23.1875, "step": 14602 }, { "epoch": 0.13823231510493086, "grad_norm": 388.6730041503906, "learning_rate": 1.9391953047854887e-06, "loss": 23.625, "step": 14603 }, { "epoch": 0.1382417811266459, "grad_norm": 511.4549255371094, "learning_rate": 1.939184776771824e-06, "loss": 33.5, "step": 14604 }, { "epoch": 0.13825124714836096, "grad_norm": 228.3059844970703, "learning_rate": 1.939174247875386e-06, "loss": 17.4688, "step": 14605 }, { "epoch": 0.13826071317007602, "grad_norm": 1140.053466796875, "learning_rate": 1.9391637180961856e-06, "loss": 44.3047, "step": 14606 }, { "epoch": 0.13827017919179108, "grad_norm": 293.0855407714844, "learning_rate": 1.9391531874342324e-06, "loss": 37.2656, "step": 14607 }, { "epoch": 0.1382796452135061, "grad_norm": 283.7287292480469, "learning_rate": 1.9391426558895367e-06, "loss": 24.2344, "step": 14608 }, { "epoch": 0.13828911123522117, "grad_norm": 567.024169921875, "learning_rate": 1.9391321234621077e-06, "loss": 22.8594, "step": 14609 }, { "epoch": 0.13829857725693623, "grad_norm": 393.46478271484375, "learning_rate": 1.9391215901519556e-06, "loss": 23.875, "step": 14610 }, { "epoch": 0.1383080432786513, "grad_norm": 319.70440673828125, "learning_rate": 1.9391110559590905e-06, "loss": 25.7031, "step": 14611 }, { "epoch": 0.13831750930036635, "grad_norm": 561.1226196289062, "learning_rate": 1.9391005208835223e-06, "loss": 51.0938, "step": 14612 }, { "epoch": 0.13832697532208138, "grad_norm": 219.4717559814453, "learning_rate": 1.93908998492526e-06, "loss": 12.7422, "step": 14613 }, { "epoch": 0.13833644134379644, "grad_norm": 364.7059326171875, "learning_rate": 1.9390794480843147e-06, "loss": 17.0039, "step": 14614 }, { "epoch": 0.1383459073655115, "grad_norm": 318.0524597167969, "learning_rate": 1.9390689103606953e-06, "loss": 30.0, "step": 14615 }, { "epoch": 0.13835537338722656, "grad_norm": 183.27444458007812, "learning_rate": 1.939058371754413e-06, "loss": 17.9922, "step": 14616 }, { "epoch": 0.1383648394089416, "grad_norm": 580.42822265625, "learning_rate": 1.9390478322654765e-06, "loss": 38.4844, "step": 14617 }, { "epoch": 0.13837430543065665, "grad_norm": 640.0345458984375, "learning_rate": 1.939037291893896e-06, "loss": 44.5625, "step": 14618 }, { "epoch": 0.1383837714523717, "grad_norm": 287.50872802734375, "learning_rate": 1.9390267506396816e-06, "loss": 15.8516, "step": 14619 }, { "epoch": 0.13839323747408677, "grad_norm": 329.6753845214844, "learning_rate": 1.939016208502843e-06, "loss": 48.0625, "step": 14620 }, { "epoch": 0.13840270349580183, "grad_norm": 2.5778005123138428, "learning_rate": 1.939005665483391e-06, "loss": 0.8147, "step": 14621 }, { "epoch": 0.13841216951751686, "grad_norm": 410.013427734375, "learning_rate": 1.938995121581334e-06, "loss": 44.9062, "step": 14622 }, { "epoch": 0.13842163553923192, "grad_norm": 817.0358276367188, "learning_rate": 1.9389845767966827e-06, "loss": 55.2344, "step": 14623 }, { "epoch": 0.13843110156094698, "grad_norm": 291.4938049316406, "learning_rate": 1.9389740311294473e-06, "loss": 22.5, "step": 14624 }, { "epoch": 0.13844056758266204, "grad_norm": 471.0341491699219, "learning_rate": 1.938963484579637e-06, "loss": 19.0781, "step": 14625 }, { "epoch": 0.13845003360437708, "grad_norm": 478.44732666015625, "learning_rate": 1.9389529371472624e-06, "loss": 23.8906, "step": 14626 }, { "epoch": 0.13845949962609214, "grad_norm": 433.5776062011719, "learning_rate": 1.9389423888323333e-06, "loss": 14.375, "step": 14627 }, { "epoch": 0.1384689656478072, "grad_norm": 799.7786254882812, "learning_rate": 1.938931839634859e-06, "loss": 46.3672, "step": 14628 }, { "epoch": 0.13847843166952226, "grad_norm": 373.0169982910156, "learning_rate": 1.9389212895548504e-06, "loss": 24.7148, "step": 14629 }, { "epoch": 0.13848789769123732, "grad_norm": 388.2788391113281, "learning_rate": 1.9389107385923166e-06, "loss": 20.6484, "step": 14630 }, { "epoch": 0.13849736371295235, "grad_norm": 300.3373107910156, "learning_rate": 1.9389001867472675e-06, "loss": 48.0391, "step": 14631 }, { "epoch": 0.1385068297346674, "grad_norm": 1285.8031005859375, "learning_rate": 1.9388896340197135e-06, "loss": 48.6016, "step": 14632 }, { "epoch": 0.13851629575638247, "grad_norm": 588.3613891601562, "learning_rate": 1.9388790804096646e-06, "loss": 20.7148, "step": 14633 }, { "epoch": 0.13852576177809753, "grad_norm": 364.2294616699219, "learning_rate": 1.93886852591713e-06, "loss": 33.5469, "step": 14634 }, { "epoch": 0.13853522779981256, "grad_norm": 497.79437255859375, "learning_rate": 1.9388579705421205e-06, "loss": 48.3125, "step": 14635 }, { "epoch": 0.13854469382152762, "grad_norm": 805.8424682617188, "learning_rate": 1.9388474142846452e-06, "loss": 67.5469, "step": 14636 }, { "epoch": 0.13855415984324268, "grad_norm": 924.9945068359375, "learning_rate": 1.9388368571447145e-06, "loss": 66.0078, "step": 14637 }, { "epoch": 0.13856362586495774, "grad_norm": 357.7459411621094, "learning_rate": 1.9388262991223384e-06, "loss": 26.5195, "step": 14638 }, { "epoch": 0.1385730918866728, "grad_norm": 561.8486328125, "learning_rate": 1.938815740217527e-06, "loss": 7.4766, "step": 14639 }, { "epoch": 0.13858255790838783, "grad_norm": 554.224853515625, "learning_rate": 1.938805180430289e-06, "loss": 47.9375, "step": 14640 }, { "epoch": 0.1385920239301029, "grad_norm": 211.40899658203125, "learning_rate": 1.9387946197606356e-06, "loss": 10.9746, "step": 14641 }, { "epoch": 0.13860148995181795, "grad_norm": 181.70863342285156, "learning_rate": 1.9387840582085763e-06, "loss": 22.7188, "step": 14642 }, { "epoch": 0.138610955973533, "grad_norm": 211.50318908691406, "learning_rate": 1.9387734957741213e-06, "loss": 14.8359, "step": 14643 }, { "epoch": 0.13862042199524804, "grad_norm": 317.8036193847656, "learning_rate": 1.93876293245728e-06, "loss": 31.7656, "step": 14644 }, { "epoch": 0.1386298880169631, "grad_norm": 247.92874145507812, "learning_rate": 1.9387523682580625e-06, "loss": 7.9336, "step": 14645 }, { "epoch": 0.13863935403867816, "grad_norm": 260.0355529785156, "learning_rate": 1.938741803176479e-06, "loss": 19.5859, "step": 14646 }, { "epoch": 0.13864882006039322, "grad_norm": 365.44549560546875, "learning_rate": 1.9387312372125395e-06, "loss": 37.0625, "step": 14647 }, { "epoch": 0.13865828608210828, "grad_norm": 231.75460815429688, "learning_rate": 1.9387206703662536e-06, "loss": 18.3984, "step": 14648 }, { "epoch": 0.13866775210382332, "grad_norm": 1112.9189453125, "learning_rate": 1.938710102637631e-06, "loss": 97.6562, "step": 14649 }, { "epoch": 0.13867721812553838, "grad_norm": 567.42919921875, "learning_rate": 1.9386995340266824e-06, "loss": 64.0, "step": 14650 }, { "epoch": 0.13868668414725344, "grad_norm": 3.5189602375030518, "learning_rate": 1.9386889645334173e-06, "loss": 1.0576, "step": 14651 }, { "epoch": 0.1386961501689685, "grad_norm": 433.8910827636719, "learning_rate": 1.9386783941578454e-06, "loss": 22.0859, "step": 14652 }, { "epoch": 0.13870561619068353, "grad_norm": 307.06951904296875, "learning_rate": 1.9386678228999773e-06, "loss": 17.7773, "step": 14653 }, { "epoch": 0.1387150822123986, "grad_norm": 143.4996795654297, "learning_rate": 1.938657250759822e-06, "loss": 15.3516, "step": 14654 }, { "epoch": 0.13872454823411365, "grad_norm": 2.637031316757202, "learning_rate": 1.93864667773739e-06, "loss": 0.8843, "step": 14655 }, { "epoch": 0.1387340142558287, "grad_norm": 415.98773193359375, "learning_rate": 1.938636103832691e-06, "loss": 29.6875, "step": 14656 }, { "epoch": 0.13874348027754377, "grad_norm": 785.0823974609375, "learning_rate": 1.9386255290457357e-06, "loss": 41.0703, "step": 14657 }, { "epoch": 0.1387529462992588, "grad_norm": 465.4402770996094, "learning_rate": 1.938614953376533e-06, "loss": 32.1719, "step": 14658 }, { "epoch": 0.13876241232097386, "grad_norm": 357.15045166015625, "learning_rate": 1.9386043768250937e-06, "loss": 20.2969, "step": 14659 }, { "epoch": 0.13877187834268892, "grad_norm": 267.90667724609375, "learning_rate": 1.938593799391427e-06, "loss": 37.375, "step": 14660 }, { "epoch": 0.13878134436440398, "grad_norm": 166.49069213867188, "learning_rate": 1.9385832210755435e-06, "loss": 14.3516, "step": 14661 }, { "epoch": 0.138790810386119, "grad_norm": 319.9599609375, "learning_rate": 1.9385726418774526e-06, "loss": 23.8438, "step": 14662 }, { "epoch": 0.13880027640783407, "grad_norm": 480.7591552734375, "learning_rate": 1.9385620617971646e-06, "loss": 37.0859, "step": 14663 }, { "epoch": 0.13880974242954913, "grad_norm": 772.5506591796875, "learning_rate": 1.9385514808346893e-06, "loss": 49.6172, "step": 14664 }, { "epoch": 0.1388192084512642, "grad_norm": 554.5753173828125, "learning_rate": 1.9385408989900367e-06, "loss": 12.0469, "step": 14665 }, { "epoch": 0.13882867447297925, "grad_norm": 894.3859252929688, "learning_rate": 1.9385303162632164e-06, "loss": 48.3125, "step": 14666 }, { "epoch": 0.13883814049469428, "grad_norm": 414.9085998535156, "learning_rate": 1.938519732654239e-06, "loss": 20.8047, "step": 14667 }, { "epoch": 0.13884760651640934, "grad_norm": 262.47235107421875, "learning_rate": 1.938509148163114e-06, "loss": 24.5781, "step": 14668 }, { "epoch": 0.1388570725381244, "grad_norm": 269.55267333984375, "learning_rate": 1.9384985627898517e-06, "loss": 16.6797, "step": 14669 }, { "epoch": 0.13886653855983946, "grad_norm": 262.482177734375, "learning_rate": 1.9384879765344614e-06, "loss": 27.625, "step": 14670 }, { "epoch": 0.1388760045815545, "grad_norm": 525.8825073242188, "learning_rate": 1.9384773893969533e-06, "loss": 23.2812, "step": 14671 }, { "epoch": 0.13888547060326956, "grad_norm": 435.8257141113281, "learning_rate": 1.9384668013773383e-06, "loss": 26.9141, "step": 14672 }, { "epoch": 0.13889493662498462, "grad_norm": 615.0692749023438, "learning_rate": 1.938456212475625e-06, "loss": 36.75, "step": 14673 }, { "epoch": 0.13890440264669968, "grad_norm": 321.2135314941406, "learning_rate": 1.938445622691824e-06, "loss": 41.6797, "step": 14674 }, { "epoch": 0.13891386866841474, "grad_norm": 507.8591003417969, "learning_rate": 1.938435032025945e-06, "loss": 45.9922, "step": 14675 }, { "epoch": 0.13892333469012977, "grad_norm": 430.25799560546875, "learning_rate": 1.9384244404779986e-06, "loss": 45.6719, "step": 14676 }, { "epoch": 0.13893280071184483, "grad_norm": 221.51779174804688, "learning_rate": 1.9384138480479935e-06, "loss": 9.125, "step": 14677 }, { "epoch": 0.1389422667335599, "grad_norm": 240.30027770996094, "learning_rate": 1.938403254735941e-06, "loss": 29.2266, "step": 14678 }, { "epoch": 0.13895173275527495, "grad_norm": 743.7542114257812, "learning_rate": 1.9383926605418504e-06, "loss": 13.3359, "step": 14679 }, { "epoch": 0.13896119877699, "grad_norm": 788.7483520507812, "learning_rate": 1.9383820654657317e-06, "loss": 50.6562, "step": 14680 }, { "epoch": 0.13897066479870504, "grad_norm": 615.8445434570312, "learning_rate": 1.938371469507595e-06, "loss": 47.2812, "step": 14681 }, { "epoch": 0.1389801308204201, "grad_norm": 295.3322448730469, "learning_rate": 1.93836087266745e-06, "loss": 7.7129, "step": 14682 }, { "epoch": 0.13898959684213516, "grad_norm": 3.525876760482788, "learning_rate": 1.9383502749453067e-06, "loss": 0.9104, "step": 14683 }, { "epoch": 0.13899906286385022, "grad_norm": 367.0206604003906, "learning_rate": 1.9383396763411756e-06, "loss": 29.8906, "step": 14684 }, { "epoch": 0.13900852888556525, "grad_norm": 617.1051635742188, "learning_rate": 1.938329076855066e-06, "loss": 62.9766, "step": 14685 }, { "epoch": 0.1390179949072803, "grad_norm": 3.014101505279541, "learning_rate": 1.9383184764869883e-06, "loss": 0.9424, "step": 14686 }, { "epoch": 0.13902746092899537, "grad_norm": 3.2322189807891846, "learning_rate": 1.938307875236952e-06, "loss": 0.9316, "step": 14687 }, { "epoch": 0.13903692695071043, "grad_norm": 866.3662109375, "learning_rate": 1.938297273104967e-06, "loss": 35.8828, "step": 14688 }, { "epoch": 0.1390463929724255, "grad_norm": 405.4069519042969, "learning_rate": 1.9382866700910445e-06, "loss": 9.8438, "step": 14689 }, { "epoch": 0.13905585899414052, "grad_norm": 164.38766479492188, "learning_rate": 1.938276066195193e-06, "loss": 16.1953, "step": 14690 }, { "epoch": 0.13906532501585558, "grad_norm": 517.9735107421875, "learning_rate": 1.938265461417423e-06, "loss": 27.8125, "step": 14691 }, { "epoch": 0.13907479103757064, "grad_norm": 2.874856472015381, "learning_rate": 1.938254855757745e-06, "loss": 0.9194, "step": 14692 }, { "epoch": 0.1390842570592857, "grad_norm": 819.78125, "learning_rate": 1.938244249216168e-06, "loss": 49.8672, "step": 14693 }, { "epoch": 0.13909372308100074, "grad_norm": 231.9407501220703, "learning_rate": 1.938233641792703e-06, "loss": 21.5312, "step": 14694 }, { "epoch": 0.1391031891027158, "grad_norm": 700.6179809570312, "learning_rate": 1.938223033487359e-06, "loss": 28.8359, "step": 14695 }, { "epoch": 0.13911265512443086, "grad_norm": 493.6686096191406, "learning_rate": 1.9382124243001462e-06, "loss": 33.9141, "step": 14696 }, { "epoch": 0.13912212114614592, "grad_norm": 365.427734375, "learning_rate": 1.9382018142310748e-06, "loss": 26.4531, "step": 14697 }, { "epoch": 0.13913158716786098, "grad_norm": 323.0664978027344, "learning_rate": 1.938191203280155e-06, "loss": 22.6016, "step": 14698 }, { "epoch": 0.139141053189576, "grad_norm": 213.05152893066406, "learning_rate": 1.9381805914473964e-06, "loss": 24.3906, "step": 14699 }, { "epoch": 0.13915051921129107, "grad_norm": 428.69476318359375, "learning_rate": 1.938169978732809e-06, "loss": 17.6016, "step": 14700 }, { "epoch": 0.13915998523300613, "grad_norm": 189.87913513183594, "learning_rate": 1.938159365136403e-06, "loss": 19.0781, "step": 14701 }, { "epoch": 0.1391694512547212, "grad_norm": 287.02996826171875, "learning_rate": 1.938148750658188e-06, "loss": 20.9688, "step": 14702 }, { "epoch": 0.13917891727643622, "grad_norm": 303.7251281738281, "learning_rate": 1.9381381352981743e-06, "loss": 39.5156, "step": 14703 }, { "epoch": 0.13918838329815128, "grad_norm": 746.0884399414062, "learning_rate": 1.938127519056372e-06, "loss": 47.5, "step": 14704 }, { "epoch": 0.13919784931986634, "grad_norm": 364.38238525390625, "learning_rate": 1.9381169019327907e-06, "loss": 35.2656, "step": 14705 }, { "epoch": 0.1392073153415814, "grad_norm": 496.6123046875, "learning_rate": 1.9381062839274407e-06, "loss": 20.2031, "step": 14706 }, { "epoch": 0.13921678136329646, "grad_norm": 317.5548400878906, "learning_rate": 1.9380956650403314e-06, "loss": 8.5703, "step": 14707 }, { "epoch": 0.1392262473850115, "grad_norm": 287.4568786621094, "learning_rate": 1.938085045271473e-06, "loss": 16.2773, "step": 14708 }, { "epoch": 0.13923571340672655, "grad_norm": 1182.2938232421875, "learning_rate": 1.9380744246208765e-06, "loss": 26.8281, "step": 14709 }, { "epoch": 0.1392451794284416, "grad_norm": 2.8970727920532227, "learning_rate": 1.9380638030885507e-06, "loss": 0.8994, "step": 14710 }, { "epoch": 0.13925464545015667, "grad_norm": 463.2901916503906, "learning_rate": 1.938053180674506e-06, "loss": 51.8281, "step": 14711 }, { "epoch": 0.1392641114718717, "grad_norm": 295.49725341796875, "learning_rate": 1.938042557378752e-06, "loss": 22.0938, "step": 14712 }, { "epoch": 0.13927357749358676, "grad_norm": 235.4300537109375, "learning_rate": 1.9380319332012995e-06, "loss": 12.9375, "step": 14713 }, { "epoch": 0.13928304351530182, "grad_norm": 234.47129821777344, "learning_rate": 1.938021308142158e-06, "loss": 20.9062, "step": 14714 }, { "epoch": 0.13929250953701688, "grad_norm": 313.0304260253906, "learning_rate": 1.938010682201337e-06, "loss": 32.6094, "step": 14715 }, { "epoch": 0.13930197555873194, "grad_norm": 360.6496276855469, "learning_rate": 1.9380000553788476e-06, "loss": 22.2188, "step": 14716 }, { "epoch": 0.13931144158044698, "grad_norm": 227.46578979492188, "learning_rate": 1.937989427674699e-06, "loss": 20.9883, "step": 14717 }, { "epoch": 0.13932090760216204, "grad_norm": 242.79908752441406, "learning_rate": 1.937978799088901e-06, "loss": 20.5703, "step": 14718 }, { "epoch": 0.1393303736238771, "grad_norm": 349.3076171875, "learning_rate": 1.9379681696214642e-06, "loss": 35.8281, "step": 14719 }, { "epoch": 0.13933983964559216, "grad_norm": 342.6841125488281, "learning_rate": 1.9379575392723983e-06, "loss": 23.0312, "step": 14720 }, { "epoch": 0.1393493056673072, "grad_norm": 885.8406372070312, "learning_rate": 1.937946908041713e-06, "loss": 31.7891, "step": 14721 }, { "epoch": 0.13935877168902225, "grad_norm": 466.5111083984375, "learning_rate": 1.937936275929419e-06, "loss": 12.2734, "step": 14722 }, { "epoch": 0.1393682377107373, "grad_norm": 272.4049377441406, "learning_rate": 1.937925642935526e-06, "loss": 17.6562, "step": 14723 }, { "epoch": 0.13937770373245237, "grad_norm": 513.0679931640625, "learning_rate": 1.9379150090600434e-06, "loss": 16.6719, "step": 14724 }, { "epoch": 0.13938716975416743, "grad_norm": 881.9497680664062, "learning_rate": 1.9379043743029825e-06, "loss": 57.4336, "step": 14725 }, { "epoch": 0.13939663577588246, "grad_norm": 479.6487731933594, "learning_rate": 1.9378937386643517e-06, "loss": 46.2344, "step": 14726 }, { "epoch": 0.13940610179759752, "grad_norm": 384.3031921386719, "learning_rate": 1.9378831021441625e-06, "loss": 56.3906, "step": 14727 }, { "epoch": 0.13941556781931258, "grad_norm": 342.2161560058594, "learning_rate": 1.9378724647424237e-06, "loss": 39.7812, "step": 14728 }, { "epoch": 0.13942503384102764, "grad_norm": 322.2275390625, "learning_rate": 1.9378618264591464e-06, "loss": 14.6758, "step": 14729 }, { "epoch": 0.13943449986274267, "grad_norm": 421.5139465332031, "learning_rate": 1.9378511872943395e-06, "loss": 29.6016, "step": 14730 }, { "epoch": 0.13944396588445773, "grad_norm": 207.29180908203125, "learning_rate": 1.937840547248013e-06, "loss": 17.9531, "step": 14731 }, { "epoch": 0.1394534319061728, "grad_norm": 684.3483276367188, "learning_rate": 1.937829906320178e-06, "loss": 68.3828, "step": 14732 }, { "epoch": 0.13946289792788785, "grad_norm": 311.2198791503906, "learning_rate": 1.937819264510844e-06, "loss": 9.918, "step": 14733 }, { "epoch": 0.1394723639496029, "grad_norm": 427.2634582519531, "learning_rate": 1.937808621820021e-06, "loss": 46.4062, "step": 14734 }, { "epoch": 0.13948182997131794, "grad_norm": 437.158203125, "learning_rate": 1.9377979782477186e-06, "loss": 34.4219, "step": 14735 }, { "epoch": 0.139491295993033, "grad_norm": 305.1331481933594, "learning_rate": 1.9377873337939473e-06, "loss": 21.6406, "step": 14736 }, { "epoch": 0.13950076201474806, "grad_norm": 590.8109130859375, "learning_rate": 1.9377766884587167e-06, "loss": 32.4297, "step": 14737 }, { "epoch": 0.13951022803646312, "grad_norm": 780.8865356445312, "learning_rate": 1.937766042242037e-06, "loss": 38.2656, "step": 14738 }, { "epoch": 0.13951969405817816, "grad_norm": 362.9554138183594, "learning_rate": 1.9377553951439185e-06, "loss": 24.1953, "step": 14739 }, { "epoch": 0.13952916007989322, "grad_norm": 197.2570037841797, "learning_rate": 1.9377447471643704e-06, "loss": 17.2969, "step": 14740 }, { "epoch": 0.13953862610160828, "grad_norm": 603.64599609375, "learning_rate": 1.9377340983034037e-06, "loss": 34.0938, "step": 14741 }, { "epoch": 0.13954809212332334, "grad_norm": 1074.408203125, "learning_rate": 1.937723448561028e-06, "loss": 47.9688, "step": 14742 }, { "epoch": 0.1395575581450384, "grad_norm": 468.36724853515625, "learning_rate": 1.9377127979372532e-06, "loss": 47.5156, "step": 14743 }, { "epoch": 0.13956702416675343, "grad_norm": 265.2091064453125, "learning_rate": 1.9377021464320893e-06, "loss": 17.7109, "step": 14744 }, { "epoch": 0.1395764901884685, "grad_norm": 331.79412841796875, "learning_rate": 1.937691494045546e-06, "loss": 35.2422, "step": 14745 }, { "epoch": 0.13958595621018355, "grad_norm": 374.520751953125, "learning_rate": 1.9376808407776343e-06, "loss": 26.1641, "step": 14746 }, { "epoch": 0.1395954222318986, "grad_norm": 1232.1910400390625, "learning_rate": 1.9376701866283635e-06, "loss": 18.4766, "step": 14747 }, { "epoch": 0.13960488825361364, "grad_norm": 318.7411804199219, "learning_rate": 1.9376595315977435e-06, "loss": 20.3984, "step": 14748 }, { "epoch": 0.1396143542753287, "grad_norm": 541.0393676757812, "learning_rate": 1.937648875685785e-06, "loss": 41.5312, "step": 14749 }, { "epoch": 0.13962382029704376, "grad_norm": 440.8999938964844, "learning_rate": 1.937638218892497e-06, "loss": 25.2812, "step": 14750 }, { "epoch": 0.13963328631875882, "grad_norm": 532.0723876953125, "learning_rate": 1.93762756121789e-06, "loss": 10.7539, "step": 14751 }, { "epoch": 0.13964275234047388, "grad_norm": 242.78932189941406, "learning_rate": 1.9376169026619745e-06, "loss": 22.2812, "step": 14752 }, { "epoch": 0.1396522183621889, "grad_norm": 302.1191101074219, "learning_rate": 1.93760624322476e-06, "loss": 27.3984, "step": 14753 }, { "epoch": 0.13966168438390397, "grad_norm": 753.4466552734375, "learning_rate": 1.937595582906257e-06, "loss": 27.4688, "step": 14754 }, { "epoch": 0.13967115040561903, "grad_norm": 186.31051635742188, "learning_rate": 1.9375849217064743e-06, "loss": 21.3906, "step": 14755 }, { "epoch": 0.1396806164273341, "grad_norm": 326.7740173339844, "learning_rate": 1.9375742596254235e-06, "loss": 29.7422, "step": 14756 }, { "epoch": 0.13969008244904912, "grad_norm": 639.6425170898438, "learning_rate": 1.9375635966631133e-06, "loss": 24.4922, "step": 14757 }, { "epoch": 0.13969954847076418, "grad_norm": 690.0364990234375, "learning_rate": 1.9375529328195546e-06, "loss": 53.9375, "step": 14758 }, { "epoch": 0.13970901449247924, "grad_norm": 471.1467590332031, "learning_rate": 1.9375422680947573e-06, "loss": 25.1172, "step": 14759 }, { "epoch": 0.1397184805141943, "grad_norm": 659.37255859375, "learning_rate": 1.937531602488731e-06, "loss": 49.4219, "step": 14760 }, { "epoch": 0.13972794653590936, "grad_norm": 192.2098846435547, "learning_rate": 1.9375209360014863e-06, "loss": 15.4453, "step": 14761 }, { "epoch": 0.1397374125576244, "grad_norm": 296.0223388671875, "learning_rate": 1.9375102686330327e-06, "loss": 20.2617, "step": 14762 }, { "epoch": 0.13974687857933946, "grad_norm": 191.3737030029297, "learning_rate": 1.93749960038338e-06, "loss": 17.625, "step": 14763 }, { "epoch": 0.13975634460105452, "grad_norm": 396.9471740722656, "learning_rate": 1.9374889312525393e-06, "loss": 31.8828, "step": 14764 }, { "epoch": 0.13976581062276958, "grad_norm": 460.6831970214844, "learning_rate": 1.93747826124052e-06, "loss": 25.1797, "step": 14765 }, { "epoch": 0.13977527664448464, "grad_norm": 328.3064880371094, "learning_rate": 1.937467590347332e-06, "loss": 20.0234, "step": 14766 }, { "epoch": 0.13978474266619967, "grad_norm": 495.137939453125, "learning_rate": 1.937456918572985e-06, "loss": 28.4141, "step": 14767 }, { "epoch": 0.13979420868791473, "grad_norm": 231.49789428710938, "learning_rate": 1.9374462459174902e-06, "loss": 19.3516, "step": 14768 }, { "epoch": 0.1398036747096298, "grad_norm": 534.6295166015625, "learning_rate": 1.9374355723808562e-06, "loss": 27.1016, "step": 14769 }, { "epoch": 0.13981314073134485, "grad_norm": 313.2982482910156, "learning_rate": 1.937424897963094e-06, "loss": 16.9219, "step": 14770 }, { "epoch": 0.13982260675305988, "grad_norm": 290.97271728515625, "learning_rate": 1.937414222664214e-06, "loss": 20.9844, "step": 14771 }, { "epoch": 0.13983207277477494, "grad_norm": 2.9243922233581543, "learning_rate": 1.937403546484225e-06, "loss": 0.7751, "step": 14772 }, { "epoch": 0.13984153879649, "grad_norm": 3.2363650798797607, "learning_rate": 1.9373928694231376e-06, "loss": 0.9321, "step": 14773 }, { "epoch": 0.13985100481820506, "grad_norm": 595.9929809570312, "learning_rate": 1.937382191480962e-06, "loss": 53.75, "step": 14774 }, { "epoch": 0.13986047083992012, "grad_norm": 606.3397827148438, "learning_rate": 1.9373715126577082e-06, "loss": 24.2031, "step": 14775 }, { "epoch": 0.13986993686163515, "grad_norm": 258.60443115234375, "learning_rate": 1.937360832953386e-06, "loss": 21.4688, "step": 14776 }, { "epoch": 0.1398794028833502, "grad_norm": 238.96742248535156, "learning_rate": 1.9373501523680056e-06, "loss": 19.3672, "step": 14777 }, { "epoch": 0.13988886890506527, "grad_norm": 191.65463256835938, "learning_rate": 1.9373394709015775e-06, "loss": 27.8125, "step": 14778 }, { "epoch": 0.13989833492678033, "grad_norm": 306.83099365234375, "learning_rate": 1.9373287885541106e-06, "loss": 38.5312, "step": 14779 }, { "epoch": 0.13990780094849536, "grad_norm": 591.3590698242188, "learning_rate": 1.937318105325616e-06, "loss": 52.2031, "step": 14780 }, { "epoch": 0.13991726697021042, "grad_norm": 266.3226013183594, "learning_rate": 1.9373074212161033e-06, "loss": 29.7266, "step": 14781 }, { "epoch": 0.13992673299192548, "grad_norm": 3.179981231689453, "learning_rate": 1.9372967362255827e-06, "loss": 1.064, "step": 14782 }, { "epoch": 0.13993619901364054, "grad_norm": 511.89898681640625, "learning_rate": 1.937286050354064e-06, "loss": 27.3281, "step": 14783 }, { "epoch": 0.1399456650353556, "grad_norm": 257.7176818847656, "learning_rate": 1.9372753636015575e-06, "loss": 30.4336, "step": 14784 }, { "epoch": 0.13995513105707064, "grad_norm": 388.4053649902344, "learning_rate": 1.937264675968073e-06, "loss": 42.5312, "step": 14785 }, { "epoch": 0.1399645970787857, "grad_norm": 314.0517578125, "learning_rate": 1.9372539874536206e-06, "loss": 35.4766, "step": 14786 }, { "epoch": 0.13997406310050076, "grad_norm": 713.219482421875, "learning_rate": 1.9372432980582106e-06, "loss": 53.9453, "step": 14787 }, { "epoch": 0.13998352912221582, "grad_norm": 368.81512451171875, "learning_rate": 1.937232607781853e-06, "loss": 21.5859, "step": 14788 }, { "epoch": 0.13999299514393085, "grad_norm": 3.103353261947632, "learning_rate": 1.9372219166245578e-06, "loss": 0.9585, "step": 14789 }, { "epoch": 0.1400024611656459, "grad_norm": 383.419677734375, "learning_rate": 1.9372112245863343e-06, "loss": 26.6641, "step": 14790 }, { "epoch": 0.14001192718736097, "grad_norm": 282.3758544921875, "learning_rate": 1.9372005316671936e-06, "loss": 12.2656, "step": 14791 }, { "epoch": 0.14002139320907603, "grad_norm": 234.12095642089844, "learning_rate": 1.9371898378671455e-06, "loss": 25.3906, "step": 14792 }, { "epoch": 0.1400308592307911, "grad_norm": 373.3013000488281, "learning_rate": 1.9371791431862e-06, "loss": 21.0859, "step": 14793 }, { "epoch": 0.14004032525250612, "grad_norm": 257.5171203613281, "learning_rate": 1.937168447624367e-06, "loss": 15.8633, "step": 14794 }, { "epoch": 0.14004979127422118, "grad_norm": 487.8263854980469, "learning_rate": 1.9371577511816564e-06, "loss": 55.4688, "step": 14795 }, { "epoch": 0.14005925729593624, "grad_norm": 219.743408203125, "learning_rate": 1.9371470538580786e-06, "loss": 8.1406, "step": 14796 }, { "epoch": 0.1400687233176513, "grad_norm": 716.4696655273438, "learning_rate": 1.9371363556536434e-06, "loss": 48.1484, "step": 14797 }, { "epoch": 0.14007818933936633, "grad_norm": 305.5185241699219, "learning_rate": 1.937125656568361e-06, "loss": 25.957, "step": 14798 }, { "epoch": 0.1400876553610814, "grad_norm": 421.0831604003906, "learning_rate": 1.937114956602242e-06, "loss": 34.4219, "step": 14799 }, { "epoch": 0.14009712138279645, "grad_norm": 297.7588806152344, "learning_rate": 1.9371042557552954e-06, "loss": 17.875, "step": 14800 }, { "epoch": 0.1401065874045115, "grad_norm": 202.34754943847656, "learning_rate": 1.9370935540275317e-06, "loss": 17.625, "step": 14801 }, { "epoch": 0.14011605342622657, "grad_norm": 366.3656311035156, "learning_rate": 1.9370828514189613e-06, "loss": 22.8203, "step": 14802 }, { "epoch": 0.1401255194479416, "grad_norm": 3.9386703968048096, "learning_rate": 1.9370721479295936e-06, "loss": 1.0179, "step": 14803 }, { "epoch": 0.14013498546965666, "grad_norm": 380.4600524902344, "learning_rate": 1.9370614435594395e-06, "loss": 26.1953, "step": 14804 }, { "epoch": 0.14014445149137172, "grad_norm": 250.21963500976562, "learning_rate": 1.9370507383085083e-06, "loss": 21.7109, "step": 14805 }, { "epoch": 0.14015391751308678, "grad_norm": 602.987548828125, "learning_rate": 1.9370400321768107e-06, "loss": 33.2812, "step": 14806 }, { "epoch": 0.14016338353480182, "grad_norm": 408.9290771484375, "learning_rate": 1.937029325164356e-06, "loss": 35.8594, "step": 14807 }, { "epoch": 0.14017284955651688, "grad_norm": 259.8009948730469, "learning_rate": 1.937018617271155e-06, "loss": 15.7812, "step": 14808 }, { "epoch": 0.14018231557823194, "grad_norm": 285.5356140136719, "learning_rate": 1.9370079084972174e-06, "loss": 17.5977, "step": 14809 }, { "epoch": 0.140191781599947, "grad_norm": 306.8110046386719, "learning_rate": 1.936997198842553e-06, "loss": 19.7891, "step": 14810 }, { "epoch": 0.14020124762166206, "grad_norm": 288.61083984375, "learning_rate": 1.9369864883071726e-06, "loss": 33.1211, "step": 14811 }, { "epoch": 0.1402107136433771, "grad_norm": 168.44198608398438, "learning_rate": 1.9369757768910855e-06, "loss": 25.2891, "step": 14812 }, { "epoch": 0.14022017966509215, "grad_norm": 517.4474487304688, "learning_rate": 1.9369650645943026e-06, "loss": 49.7344, "step": 14813 }, { "epoch": 0.1402296456868072, "grad_norm": 499.2262878417969, "learning_rate": 1.936954351416833e-06, "loss": 53.8906, "step": 14814 }, { "epoch": 0.14023911170852227, "grad_norm": 811.6121215820312, "learning_rate": 1.9369436373586874e-06, "loss": 61.1875, "step": 14815 }, { "epoch": 0.1402485777302373, "grad_norm": 241.40069580078125, "learning_rate": 1.936932922419876e-06, "loss": 21.4531, "step": 14816 }, { "epoch": 0.14025804375195236, "grad_norm": 550.1591186523438, "learning_rate": 1.936922206600408e-06, "loss": 30.7969, "step": 14817 }, { "epoch": 0.14026750977366742, "grad_norm": 319.7801513671875, "learning_rate": 1.936911489900295e-06, "loss": 27.1172, "step": 14818 }, { "epoch": 0.14027697579538248, "grad_norm": 435.9626159667969, "learning_rate": 1.9369007723195456e-06, "loss": 41.7188, "step": 14819 }, { "epoch": 0.14028644181709754, "grad_norm": 419.075439453125, "learning_rate": 1.9368900538581704e-06, "loss": 36.1875, "step": 14820 }, { "epoch": 0.14029590783881257, "grad_norm": 3.2273290157318115, "learning_rate": 1.9368793345161796e-06, "loss": 0.958, "step": 14821 }, { "epoch": 0.14030537386052763, "grad_norm": 855.9342651367188, "learning_rate": 1.9368686142935832e-06, "loss": 50.1641, "step": 14822 }, { "epoch": 0.1403148398822427, "grad_norm": 399.515380859375, "learning_rate": 1.936857893190391e-06, "loss": 34.0625, "step": 14823 }, { "epoch": 0.14032430590395775, "grad_norm": 3.0411877632141113, "learning_rate": 1.9368471712066133e-06, "loss": 0.9434, "step": 14824 }, { "epoch": 0.14033377192567278, "grad_norm": 467.5329895019531, "learning_rate": 1.9368364483422605e-06, "loss": 26.1953, "step": 14825 }, { "epoch": 0.14034323794738784, "grad_norm": 1178.7945556640625, "learning_rate": 1.9368257245973424e-06, "loss": 16.2031, "step": 14826 }, { "epoch": 0.1403527039691029, "grad_norm": 358.08551025390625, "learning_rate": 1.936814999971869e-06, "loss": 23.2812, "step": 14827 }, { "epoch": 0.14036216999081796, "grad_norm": 452.68731689453125, "learning_rate": 1.93680427446585e-06, "loss": 37.0703, "step": 14828 }, { "epoch": 0.14037163601253302, "grad_norm": 728.7552490234375, "learning_rate": 1.936793548079297e-06, "loss": 37.9375, "step": 14829 }, { "epoch": 0.14038110203424806, "grad_norm": 195.11766052246094, "learning_rate": 1.936782820812218e-06, "loss": 19.1094, "step": 14830 }, { "epoch": 0.14039056805596312, "grad_norm": 839.2469482421875, "learning_rate": 1.9367720926646246e-06, "loss": 51.2344, "step": 14831 }, { "epoch": 0.14040003407767818, "grad_norm": 275.2636413574219, "learning_rate": 1.936761363636526e-06, "loss": 15.9766, "step": 14832 }, { "epoch": 0.14040950009939324, "grad_norm": 426.24530029296875, "learning_rate": 1.936750633727933e-06, "loss": 23.5078, "step": 14833 }, { "epoch": 0.14041896612110827, "grad_norm": 514.3822631835938, "learning_rate": 1.9367399029388553e-06, "loss": 17.793, "step": 14834 }, { "epoch": 0.14042843214282333, "grad_norm": 383.7377624511719, "learning_rate": 1.9367291712693028e-06, "loss": 23.7266, "step": 14835 }, { "epoch": 0.1404378981645384, "grad_norm": 354.6600036621094, "learning_rate": 1.936718438719286e-06, "loss": 64.3594, "step": 14836 }, { "epoch": 0.14044736418625345, "grad_norm": 343.0989074707031, "learning_rate": 1.936707705288815e-06, "loss": 35.0625, "step": 14837 }, { "epoch": 0.1404568302079685, "grad_norm": 567.6829833984375, "learning_rate": 1.9366969709778994e-06, "loss": 41.3516, "step": 14838 }, { "epoch": 0.14046629622968354, "grad_norm": 286.7606201171875, "learning_rate": 1.9366862357865495e-06, "loss": 19.6875, "step": 14839 }, { "epoch": 0.1404757622513986, "grad_norm": 160.978759765625, "learning_rate": 1.9366754997147757e-06, "loss": 16.3672, "step": 14840 }, { "epoch": 0.14048522827311366, "grad_norm": 3.7228291034698486, "learning_rate": 1.9366647627625875e-06, "loss": 1.2271, "step": 14841 }, { "epoch": 0.14049469429482872, "grad_norm": 572.9854125976562, "learning_rate": 1.9366540249299957e-06, "loss": 50.8906, "step": 14842 }, { "epoch": 0.14050416031654375, "grad_norm": 884.3321533203125, "learning_rate": 1.93664328621701e-06, "loss": 37.5625, "step": 14843 }, { "epoch": 0.1405136263382588, "grad_norm": 693.8450317382812, "learning_rate": 1.9366325466236406e-06, "loss": 50.4219, "step": 14844 }, { "epoch": 0.14052309235997387, "grad_norm": 162.1572723388672, "learning_rate": 1.9366218061498976e-06, "loss": 17.8516, "step": 14845 }, { "epoch": 0.14053255838168893, "grad_norm": 246.1961669921875, "learning_rate": 1.936611064795791e-06, "loss": 16.7656, "step": 14846 }, { "epoch": 0.140542024403404, "grad_norm": 334.9472961425781, "learning_rate": 1.9366003225613308e-06, "loss": 19.5312, "step": 14847 }, { "epoch": 0.14055149042511902, "grad_norm": 192.149169921875, "learning_rate": 1.936589579446527e-06, "loss": 7.1348, "step": 14848 }, { "epoch": 0.14056095644683408, "grad_norm": 306.6083679199219, "learning_rate": 1.9365788354513904e-06, "loss": 13.0039, "step": 14849 }, { "epoch": 0.14057042246854914, "grad_norm": 502.3069152832031, "learning_rate": 1.9365680905759306e-06, "loss": 54.25, "step": 14850 }, { "epoch": 0.1405798884902642, "grad_norm": 529.5167236328125, "learning_rate": 1.9365573448201575e-06, "loss": 46.1406, "step": 14851 }, { "epoch": 0.14058935451197926, "grad_norm": 225.75418090820312, "learning_rate": 1.9365465981840813e-06, "loss": 21.1328, "step": 14852 }, { "epoch": 0.1405988205336943, "grad_norm": 264.4288024902344, "learning_rate": 1.9365358506677125e-06, "loss": 19.4062, "step": 14853 }, { "epoch": 0.14060828655540936, "grad_norm": 1039.6920166015625, "learning_rate": 1.9365251022710605e-06, "loss": 64.1016, "step": 14854 }, { "epoch": 0.14061775257712442, "grad_norm": 310.14556884765625, "learning_rate": 1.9365143529941363e-06, "loss": 24.7734, "step": 14855 }, { "epoch": 0.14062721859883948, "grad_norm": 362.9111328125, "learning_rate": 1.936503602836949e-06, "loss": 37.0469, "step": 14856 }, { "epoch": 0.1406366846205545, "grad_norm": 763.2771606445312, "learning_rate": 1.9364928517995098e-06, "loss": 10.6367, "step": 14857 }, { "epoch": 0.14064615064226957, "grad_norm": 245.13009643554688, "learning_rate": 1.936482099881828e-06, "loss": 21.7344, "step": 14858 }, { "epoch": 0.14065561666398463, "grad_norm": 3.131619930267334, "learning_rate": 1.9364713470839142e-06, "loss": 0.9819, "step": 14859 }, { "epoch": 0.1406650826856997, "grad_norm": 360.0959777832031, "learning_rate": 1.936460593405778e-06, "loss": 28.0703, "step": 14860 }, { "epoch": 0.14067454870741475, "grad_norm": 434.6404724121094, "learning_rate": 1.9364498388474297e-06, "loss": 26.9414, "step": 14861 }, { "epoch": 0.14068401472912978, "grad_norm": 2.6577236652374268, "learning_rate": 1.9364390834088795e-06, "loss": 0.79, "step": 14862 }, { "epoch": 0.14069348075084484, "grad_norm": 513.5977172851562, "learning_rate": 1.936428327090138e-06, "loss": 22.7969, "step": 14863 }, { "epoch": 0.1407029467725599, "grad_norm": 355.4776611328125, "learning_rate": 1.936417569891214e-06, "loss": 15.7344, "step": 14864 }, { "epoch": 0.14071241279427496, "grad_norm": 2.9007630348205566, "learning_rate": 1.9364068118121187e-06, "loss": 0.9897, "step": 14865 }, { "epoch": 0.14072187881599, "grad_norm": 415.6474914550781, "learning_rate": 1.936396052852862e-06, "loss": 32.0, "step": 14866 }, { "epoch": 0.14073134483770505, "grad_norm": 1756.56005859375, "learning_rate": 1.936385293013454e-06, "loss": 45.6719, "step": 14867 }, { "epoch": 0.1407408108594201, "grad_norm": 190.78494262695312, "learning_rate": 1.9363745322939045e-06, "loss": 21.9609, "step": 14868 }, { "epoch": 0.14075027688113517, "grad_norm": 276.0221862792969, "learning_rate": 1.9363637706942243e-06, "loss": 16.9297, "step": 14869 }, { "epoch": 0.14075974290285023, "grad_norm": 184.41880798339844, "learning_rate": 1.9363530082144226e-06, "loss": 25.5312, "step": 14870 }, { "epoch": 0.14076920892456526, "grad_norm": 275.74365234375, "learning_rate": 1.93634224485451e-06, "loss": 15.2344, "step": 14871 }, { "epoch": 0.14077867494628032, "grad_norm": 415.7905578613281, "learning_rate": 1.936331480614497e-06, "loss": 34.6562, "step": 14872 }, { "epoch": 0.14078814096799538, "grad_norm": 526.1575927734375, "learning_rate": 1.936320715494393e-06, "loss": 32.4375, "step": 14873 }, { "epoch": 0.14079760698971044, "grad_norm": 603.8992309570312, "learning_rate": 1.9363099494942086e-06, "loss": 41.7188, "step": 14874 }, { "epoch": 0.14080707301142548, "grad_norm": 849.7255859375, "learning_rate": 1.936299182613954e-06, "loss": 33.3672, "step": 14875 }, { "epoch": 0.14081653903314054, "grad_norm": 638.0672607421875, "learning_rate": 1.9362884148536382e-06, "loss": 28.6328, "step": 14876 }, { "epoch": 0.1408260050548556, "grad_norm": 348.4264831542969, "learning_rate": 1.936277646213273e-06, "loss": 26.0781, "step": 14877 }, { "epoch": 0.14083547107657066, "grad_norm": 259.0904846191406, "learning_rate": 1.9362668766928676e-06, "loss": 18.7734, "step": 14878 }, { "epoch": 0.14084493709828572, "grad_norm": 4.028494834899902, "learning_rate": 1.936256106292432e-06, "loss": 0.9824, "step": 14879 }, { "epoch": 0.14085440312000075, "grad_norm": 274.38751220703125, "learning_rate": 1.9362453350119766e-06, "loss": 27.0859, "step": 14880 }, { "epoch": 0.1408638691417158, "grad_norm": 567.98583984375, "learning_rate": 1.9362345628515116e-06, "loss": 21.7344, "step": 14881 }, { "epoch": 0.14087333516343087, "grad_norm": 294.11212158203125, "learning_rate": 1.9362237898110467e-06, "loss": 18.6055, "step": 14882 }, { "epoch": 0.14088280118514593, "grad_norm": 232.0441131591797, "learning_rate": 1.9362130158905925e-06, "loss": 19.0078, "step": 14883 }, { "epoch": 0.14089226720686096, "grad_norm": 3.058352470397949, "learning_rate": 1.9362022410901596e-06, "loss": 0.9204, "step": 14884 }, { "epoch": 0.14090173322857602, "grad_norm": 333.24786376953125, "learning_rate": 1.936191465409757e-06, "loss": 27.7969, "step": 14885 }, { "epoch": 0.14091119925029108, "grad_norm": 371.3109436035156, "learning_rate": 1.9361806888493953e-06, "loss": 48.1562, "step": 14886 }, { "epoch": 0.14092066527200614, "grad_norm": 359.437744140625, "learning_rate": 1.9361699114090847e-06, "loss": 17.3984, "step": 14887 }, { "epoch": 0.1409301312937212, "grad_norm": 1254.5804443359375, "learning_rate": 1.936159133088835e-06, "loss": 40.75, "step": 14888 }, { "epoch": 0.14093959731543623, "grad_norm": 190.00767517089844, "learning_rate": 1.9361483538886574e-06, "loss": 12.8398, "step": 14889 }, { "epoch": 0.1409490633371513, "grad_norm": 266.86273193359375, "learning_rate": 1.9361375738085605e-06, "loss": 40.7344, "step": 14890 }, { "epoch": 0.14095852935886635, "grad_norm": 424.8352355957031, "learning_rate": 1.9361267928485552e-06, "loss": 36.2812, "step": 14891 }, { "epoch": 0.1409679953805814, "grad_norm": 383.71978759765625, "learning_rate": 1.9361160110086517e-06, "loss": 24.0391, "step": 14892 }, { "epoch": 0.14097746140229644, "grad_norm": 2.852748394012451, "learning_rate": 1.9361052282888606e-06, "loss": 0.8389, "step": 14893 }, { "epoch": 0.1409869274240115, "grad_norm": 421.240478515625, "learning_rate": 1.936094444689191e-06, "loss": 31.4141, "step": 14894 }, { "epoch": 0.14099639344572656, "grad_norm": 3.516915798187256, "learning_rate": 1.9360836602096534e-06, "loss": 0.9912, "step": 14895 }, { "epoch": 0.14100585946744162, "grad_norm": 489.42236328125, "learning_rate": 1.936072874850258e-06, "loss": 49.7344, "step": 14896 }, { "epoch": 0.14101532548915668, "grad_norm": 226.53851318359375, "learning_rate": 1.9360620886110154e-06, "loss": 16.8594, "step": 14897 }, { "epoch": 0.14102479151087172, "grad_norm": 538.2565307617188, "learning_rate": 1.9360513014919354e-06, "loss": 38.1875, "step": 14898 }, { "epoch": 0.14103425753258678, "grad_norm": 543.2459106445312, "learning_rate": 1.936040513493028e-06, "loss": 30.6641, "step": 14899 }, { "epoch": 0.14104372355430184, "grad_norm": 196.1897735595703, "learning_rate": 1.936029724614303e-06, "loss": 14.957, "step": 14900 }, { "epoch": 0.1410531895760169, "grad_norm": 194.02976989746094, "learning_rate": 1.936018934855771e-06, "loss": 16.25, "step": 14901 }, { "epoch": 0.14106265559773193, "grad_norm": 268.9617919921875, "learning_rate": 1.9360081442174423e-06, "loss": 17.9219, "step": 14902 }, { "epoch": 0.141072121619447, "grad_norm": 174.6212158203125, "learning_rate": 1.935997352699327e-06, "loss": 24.2891, "step": 14903 }, { "epoch": 0.14108158764116205, "grad_norm": 279.06591796875, "learning_rate": 1.9359865603014346e-06, "loss": 31.625, "step": 14904 }, { "epoch": 0.1410910536628771, "grad_norm": 257.3699951171875, "learning_rate": 1.9359757670237763e-06, "loss": 20.5625, "step": 14905 }, { "epoch": 0.14110051968459217, "grad_norm": 261.7530212402344, "learning_rate": 1.9359649728663616e-06, "loss": 20.6914, "step": 14906 }, { "epoch": 0.1411099857063072, "grad_norm": 228.652587890625, "learning_rate": 1.9359541778292002e-06, "loss": 25.625, "step": 14907 }, { "epoch": 0.14111945172802226, "grad_norm": 398.1429443359375, "learning_rate": 1.935943381912303e-06, "loss": 14.3203, "step": 14908 }, { "epoch": 0.14112891774973732, "grad_norm": 590.08447265625, "learning_rate": 1.93593258511568e-06, "loss": 24.2109, "step": 14909 }, { "epoch": 0.14113838377145238, "grad_norm": 555.4364013671875, "learning_rate": 1.935921787439341e-06, "loss": 41.1719, "step": 14910 }, { "epoch": 0.1411478497931674, "grad_norm": 168.37217712402344, "learning_rate": 1.9359109888832966e-06, "loss": 20.25, "step": 14911 }, { "epoch": 0.14115731581488247, "grad_norm": 450.8891906738281, "learning_rate": 1.935900189447557e-06, "loss": 29.8203, "step": 14912 }, { "epoch": 0.14116678183659753, "grad_norm": 223.3078155517578, "learning_rate": 1.935889389132132e-06, "loss": 13.1719, "step": 14913 }, { "epoch": 0.1411762478583126, "grad_norm": 3.176321268081665, "learning_rate": 1.935878587937032e-06, "loss": 0.9463, "step": 14914 }, { "epoch": 0.14118571388002765, "grad_norm": 286.59185791015625, "learning_rate": 1.9358677858622663e-06, "loss": 27.2344, "step": 14915 }, { "epoch": 0.14119517990174268, "grad_norm": 849.7738037109375, "learning_rate": 1.9358569829078465e-06, "loss": 55.7031, "step": 14916 }, { "epoch": 0.14120464592345774, "grad_norm": 295.3871765136719, "learning_rate": 1.9358461790737817e-06, "loss": 16.9141, "step": 14917 }, { "epoch": 0.1412141119451728, "grad_norm": 487.6592102050781, "learning_rate": 1.9358353743600825e-06, "loss": 39.5469, "step": 14918 }, { "epoch": 0.14122357796688786, "grad_norm": 718.5209350585938, "learning_rate": 1.9358245687667587e-06, "loss": 38.0312, "step": 14919 }, { "epoch": 0.1412330439886029, "grad_norm": 477.38824462890625, "learning_rate": 1.935813762293821e-06, "loss": 27.2969, "step": 14920 }, { "epoch": 0.14124251001031796, "grad_norm": 488.3709716796875, "learning_rate": 1.935802954941279e-06, "loss": 37.5938, "step": 14921 }, { "epoch": 0.14125197603203302, "grad_norm": 243.21595764160156, "learning_rate": 1.935792146709143e-06, "loss": 20.625, "step": 14922 }, { "epoch": 0.14126144205374808, "grad_norm": 432.6713562011719, "learning_rate": 1.9357813375974235e-06, "loss": 38.9844, "step": 14923 }, { "epoch": 0.14127090807546314, "grad_norm": 563.6336669921875, "learning_rate": 1.9357705276061305e-06, "loss": 26.8242, "step": 14924 }, { "epoch": 0.14128037409717817, "grad_norm": 356.2821044921875, "learning_rate": 1.935759716735274e-06, "loss": 19.8281, "step": 14925 }, { "epoch": 0.14128984011889323, "grad_norm": 3.5303356647491455, "learning_rate": 1.935748904984864e-06, "loss": 1.0215, "step": 14926 }, { "epoch": 0.1412993061406083, "grad_norm": 275.6093444824219, "learning_rate": 1.935738092354911e-06, "loss": 23.2266, "step": 14927 }, { "epoch": 0.14130877216232335, "grad_norm": 242.27023315429688, "learning_rate": 1.9357272788454253e-06, "loss": 30.3281, "step": 14928 }, { "epoch": 0.14131823818403838, "grad_norm": 218.72962951660156, "learning_rate": 1.9357164644564165e-06, "loss": 23.4219, "step": 14929 }, { "epoch": 0.14132770420575344, "grad_norm": 434.3189392089844, "learning_rate": 1.935705649187895e-06, "loss": 37.5391, "step": 14930 }, { "epoch": 0.1413371702274685, "grad_norm": 749.3450317382812, "learning_rate": 1.9356948330398717e-06, "loss": 53.5469, "step": 14931 }, { "epoch": 0.14134663624918356, "grad_norm": 375.61151123046875, "learning_rate": 1.9356840160123557e-06, "loss": 37.2422, "step": 14932 }, { "epoch": 0.14135610227089862, "grad_norm": 333.8162841796875, "learning_rate": 1.9356731981053577e-06, "loss": 39.2969, "step": 14933 }, { "epoch": 0.14136556829261365, "grad_norm": 605.7168579101562, "learning_rate": 1.935662379318888e-06, "loss": 45.5469, "step": 14934 }, { "epoch": 0.1413750343143287, "grad_norm": 519.3638305664062, "learning_rate": 1.9356515596529558e-06, "loss": 46.4688, "step": 14935 }, { "epoch": 0.14138450033604377, "grad_norm": 818.6119384765625, "learning_rate": 1.9356407391075726e-06, "loss": 20.2422, "step": 14936 }, { "epoch": 0.14139396635775883, "grad_norm": 187.82460021972656, "learning_rate": 1.935629917682748e-06, "loss": 24.3359, "step": 14937 }, { "epoch": 0.1414034323794739, "grad_norm": 876.9950561523438, "learning_rate": 1.9356190953784917e-06, "loss": 30.1055, "step": 14938 }, { "epoch": 0.14141289840118892, "grad_norm": 337.6133117675781, "learning_rate": 1.9356082721948147e-06, "loss": 31.7578, "step": 14939 }, { "epoch": 0.14142236442290398, "grad_norm": 209.1629638671875, "learning_rate": 1.935597448131727e-06, "loss": 27.625, "step": 14940 }, { "epoch": 0.14143183044461904, "grad_norm": 254.03309631347656, "learning_rate": 1.9355866231892378e-06, "loss": 20.2031, "step": 14941 }, { "epoch": 0.1414412964663341, "grad_norm": 214.27084350585938, "learning_rate": 1.9355757973673585e-06, "loss": 17.4453, "step": 14942 }, { "epoch": 0.14145076248804914, "grad_norm": 3.5663514137268066, "learning_rate": 1.935564970666099e-06, "loss": 1.084, "step": 14943 }, { "epoch": 0.1414602285097642, "grad_norm": 360.5165710449219, "learning_rate": 1.935554143085469e-06, "loss": 35.3281, "step": 14944 }, { "epoch": 0.14146969453147926, "grad_norm": 179.7698211669922, "learning_rate": 1.935543314625479e-06, "loss": 17.2344, "step": 14945 }, { "epoch": 0.14147916055319432, "grad_norm": 908.4193115234375, "learning_rate": 1.935532485286139e-06, "loss": 38.875, "step": 14946 }, { "epoch": 0.14148862657490938, "grad_norm": 572.784912109375, "learning_rate": 1.9355216550674596e-06, "loss": 18.6016, "step": 14947 }, { "epoch": 0.1414980925966244, "grad_norm": 749.9091796875, "learning_rate": 1.9355108239694507e-06, "loss": 17.2266, "step": 14948 }, { "epoch": 0.14150755861833947, "grad_norm": 345.8460693359375, "learning_rate": 1.9354999919921225e-06, "loss": 19.3789, "step": 14949 }, { "epoch": 0.14151702464005453, "grad_norm": 396.0506286621094, "learning_rate": 1.935489159135485e-06, "loss": 49.6719, "step": 14950 }, { "epoch": 0.1415264906617696, "grad_norm": 186.4997100830078, "learning_rate": 1.935478325399549e-06, "loss": 22.0547, "step": 14951 }, { "epoch": 0.14153595668348462, "grad_norm": 312.6936950683594, "learning_rate": 1.9354674907843232e-06, "loss": 16.3438, "step": 14952 }, { "epoch": 0.14154542270519968, "grad_norm": 386.38531494140625, "learning_rate": 1.93545665528982e-06, "loss": 17.8438, "step": 14953 }, { "epoch": 0.14155488872691474, "grad_norm": 360.10150146484375, "learning_rate": 1.9354458189160476e-06, "loss": 24.3828, "step": 14954 }, { "epoch": 0.1415643547486298, "grad_norm": 296.5243835449219, "learning_rate": 1.9354349816630175e-06, "loss": 25.5312, "step": 14955 }, { "epoch": 0.14157382077034486, "grad_norm": 523.4317016601562, "learning_rate": 1.935424143530739e-06, "loss": 45.9219, "step": 14956 }, { "epoch": 0.1415832867920599, "grad_norm": 345.9701843261719, "learning_rate": 1.935413304519223e-06, "loss": 36.4531, "step": 14957 }, { "epoch": 0.14159275281377495, "grad_norm": 501.1931457519531, "learning_rate": 1.9354024646284787e-06, "loss": 55.1406, "step": 14958 }, { "epoch": 0.14160221883549, "grad_norm": 217.55792236328125, "learning_rate": 1.9353916238585174e-06, "loss": 19.2656, "step": 14959 }, { "epoch": 0.14161168485720507, "grad_norm": 426.17791748046875, "learning_rate": 1.935380782209349e-06, "loss": 44.8125, "step": 14960 }, { "epoch": 0.1416211508789201, "grad_norm": 372.80328369140625, "learning_rate": 1.935369939680983e-06, "loss": 45.5312, "step": 14961 }, { "epoch": 0.14163061690063516, "grad_norm": 181.2518310546875, "learning_rate": 1.9353590962734307e-06, "loss": 20.7031, "step": 14962 }, { "epoch": 0.14164008292235022, "grad_norm": 3.0146660804748535, "learning_rate": 1.935348251986701e-06, "loss": 0.8823, "step": 14963 }, { "epoch": 0.14164954894406528, "grad_norm": 1221.7445068359375, "learning_rate": 1.9353374068208053e-06, "loss": 11.3672, "step": 14964 }, { "epoch": 0.14165901496578034, "grad_norm": 165.58859252929688, "learning_rate": 1.9353265607757536e-06, "loss": 15.0938, "step": 14965 }, { "epoch": 0.14166848098749538, "grad_norm": 161.23675537109375, "learning_rate": 1.9353157138515553e-06, "loss": 21.9844, "step": 14966 }, { "epoch": 0.14167794700921044, "grad_norm": 467.98858642578125, "learning_rate": 1.935304866048221e-06, "loss": 44.9922, "step": 14967 }, { "epoch": 0.1416874130309255, "grad_norm": 209.8342742919922, "learning_rate": 1.9352940173657612e-06, "loss": 22.7109, "step": 14968 }, { "epoch": 0.14169687905264056, "grad_norm": 371.5445861816406, "learning_rate": 1.9352831678041857e-06, "loss": 27.1602, "step": 14969 }, { "epoch": 0.1417063450743556, "grad_norm": 213.59861755371094, "learning_rate": 1.9352723173635052e-06, "loss": 21.2734, "step": 14970 }, { "epoch": 0.14171581109607065, "grad_norm": 318.62664794921875, "learning_rate": 1.935261466043729e-06, "loss": 26.1562, "step": 14971 }, { "epoch": 0.1417252771177857, "grad_norm": 200.87779235839844, "learning_rate": 1.935250613844868e-06, "loss": 7.6875, "step": 14972 }, { "epoch": 0.14173474313950077, "grad_norm": 387.32568359375, "learning_rate": 1.9352397607669328e-06, "loss": 35.8125, "step": 14973 }, { "epoch": 0.14174420916121583, "grad_norm": 274.792724609375, "learning_rate": 1.9352289068099326e-06, "loss": 29.6719, "step": 14974 }, { "epoch": 0.14175367518293086, "grad_norm": 637.9340209960938, "learning_rate": 1.9352180519738783e-06, "loss": 29.293, "step": 14975 }, { "epoch": 0.14176314120464592, "grad_norm": 2.849439859390259, "learning_rate": 1.9352071962587795e-06, "loss": 0.874, "step": 14976 }, { "epoch": 0.14177260722636098, "grad_norm": 516.3411865234375, "learning_rate": 1.935196339664647e-06, "loss": 15.5508, "step": 14977 }, { "epoch": 0.14178207324807604, "grad_norm": 606.6853637695312, "learning_rate": 1.9351854821914906e-06, "loss": 31.3281, "step": 14978 }, { "epoch": 0.14179153926979107, "grad_norm": 836.5219116210938, "learning_rate": 1.9351746238393212e-06, "loss": 37.375, "step": 14979 }, { "epoch": 0.14180100529150613, "grad_norm": 781.0455322265625, "learning_rate": 1.935163764608148e-06, "loss": 33.2422, "step": 14980 }, { "epoch": 0.1418104713132212, "grad_norm": 369.2563171386719, "learning_rate": 1.935152904497982e-06, "loss": 30.125, "step": 14981 }, { "epoch": 0.14181993733493625, "grad_norm": 333.2777099609375, "learning_rate": 1.935142043508833e-06, "loss": 10.2852, "step": 14982 }, { "epoch": 0.1418294033566513, "grad_norm": 650.202392578125, "learning_rate": 1.9351311816407112e-06, "loss": 20.8672, "step": 14983 }, { "epoch": 0.14183886937836634, "grad_norm": 407.6846923828125, "learning_rate": 1.935120318893627e-06, "loss": 21.7812, "step": 14984 }, { "epoch": 0.1418483354000814, "grad_norm": 3.2504734992980957, "learning_rate": 1.9351094552675904e-06, "loss": 0.9248, "step": 14985 }, { "epoch": 0.14185780142179646, "grad_norm": 360.27508544921875, "learning_rate": 1.9350985907626122e-06, "loss": 23.3555, "step": 14986 }, { "epoch": 0.14186726744351152, "grad_norm": 219.43235778808594, "learning_rate": 1.935087725378702e-06, "loss": 27.5312, "step": 14987 }, { "epoch": 0.14187673346522656, "grad_norm": 310.915283203125, "learning_rate": 1.93507685911587e-06, "loss": 34.3438, "step": 14988 }, { "epoch": 0.14188619948694162, "grad_norm": 427.67108154296875, "learning_rate": 1.9350659919741267e-06, "loss": 29.6719, "step": 14989 }, { "epoch": 0.14189566550865668, "grad_norm": 709.0814208984375, "learning_rate": 1.935055123953482e-06, "loss": 46.5781, "step": 14990 }, { "epoch": 0.14190513153037174, "grad_norm": 717.1183471679688, "learning_rate": 1.9350442550539465e-06, "loss": 37.9414, "step": 14991 }, { "epoch": 0.1419145975520868, "grad_norm": 210.3656005859375, "learning_rate": 1.9350333852755296e-06, "loss": 22.4609, "step": 14992 }, { "epoch": 0.14192406357380183, "grad_norm": 240.66934204101562, "learning_rate": 1.9350225146182432e-06, "loss": 22.0898, "step": 14993 }, { "epoch": 0.1419335295955169, "grad_norm": 197.9385223388672, "learning_rate": 1.935011643082096e-06, "loss": 19.9688, "step": 14994 }, { "epoch": 0.14194299561723195, "grad_norm": 633.2142333984375, "learning_rate": 1.9350007706670983e-06, "loss": 43.6875, "step": 14995 }, { "epoch": 0.141952461638947, "grad_norm": 638.7588500976562, "learning_rate": 1.9349898973732614e-06, "loss": 35.6484, "step": 14996 }, { "epoch": 0.14196192766066204, "grad_norm": 205.34483337402344, "learning_rate": 1.934979023200594e-06, "loss": 18.3828, "step": 14997 }, { "epoch": 0.1419713936823771, "grad_norm": 542.4378051757812, "learning_rate": 1.9349681481491076e-06, "loss": 52.7578, "step": 14998 }, { "epoch": 0.14198085970409216, "grad_norm": 401.89617919921875, "learning_rate": 1.934957272218812e-06, "loss": 35.1641, "step": 14999 }, { "epoch": 0.14199032572580722, "grad_norm": 431.6573181152344, "learning_rate": 1.934946395409717e-06, "loss": 21.1641, "step": 15000 }, { "epoch": 0.14199979174752228, "grad_norm": 3.381633996963501, "learning_rate": 1.934935517721834e-06, "loss": 0.8657, "step": 15001 }, { "epoch": 0.1420092577692373, "grad_norm": 583.3131713867188, "learning_rate": 1.9349246391551718e-06, "loss": 49.418, "step": 15002 }, { "epoch": 0.14201872379095237, "grad_norm": 626.3351440429688, "learning_rate": 1.9349137597097415e-06, "loss": 33.4531, "step": 15003 }, { "epoch": 0.14202818981266743, "grad_norm": 210.5750274658203, "learning_rate": 1.934902879385553e-06, "loss": 14.3125, "step": 15004 }, { "epoch": 0.1420376558343825, "grad_norm": 234.41673278808594, "learning_rate": 1.9348919981826168e-06, "loss": 19.4062, "step": 15005 }, { "epoch": 0.14204712185609752, "grad_norm": 513.8355712890625, "learning_rate": 1.934881116100943e-06, "loss": 58.8438, "step": 15006 }, { "epoch": 0.14205658787781258, "grad_norm": 978.072265625, "learning_rate": 1.9348702331405416e-06, "loss": 33.2109, "step": 15007 }, { "epoch": 0.14206605389952764, "grad_norm": 506.51751708984375, "learning_rate": 1.9348593493014227e-06, "loss": 29.8906, "step": 15008 }, { "epoch": 0.1420755199212427, "grad_norm": 217.81178283691406, "learning_rate": 1.9348484645835974e-06, "loss": 21.3828, "step": 15009 }, { "epoch": 0.14208498594295776, "grad_norm": 297.2958068847656, "learning_rate": 1.934837578987075e-06, "loss": 28.0312, "step": 15010 }, { "epoch": 0.1420944519646728, "grad_norm": 408.3726806640625, "learning_rate": 1.934826692511866e-06, "loss": 28.2109, "step": 15011 }, { "epoch": 0.14210391798638786, "grad_norm": 497.74847412109375, "learning_rate": 1.934815805157981e-06, "loss": 45.7305, "step": 15012 }, { "epoch": 0.14211338400810292, "grad_norm": 3.1334245204925537, "learning_rate": 1.93480491692543e-06, "loss": 0.9268, "step": 15013 }, { "epoch": 0.14212285002981798, "grad_norm": 420.79779052734375, "learning_rate": 1.934794027814223e-06, "loss": 45.2188, "step": 15014 }, { "epoch": 0.142132316051533, "grad_norm": 1309.2716064453125, "learning_rate": 1.9347831378243704e-06, "loss": 28.7188, "step": 15015 }, { "epoch": 0.14214178207324807, "grad_norm": 252.41717529296875, "learning_rate": 1.934772246955882e-06, "loss": 13.9648, "step": 15016 }, { "epoch": 0.14215124809496313, "grad_norm": 171.01271057128906, "learning_rate": 1.9347613552087693e-06, "loss": 23.9766, "step": 15017 }, { "epoch": 0.1421607141166782, "grad_norm": 177.41879272460938, "learning_rate": 1.9347504625830413e-06, "loss": 19.1328, "step": 15018 }, { "epoch": 0.14217018013839325, "grad_norm": 209.6791534423828, "learning_rate": 1.934739569078709e-06, "loss": 6.4121, "step": 15019 }, { "epoch": 0.14217964616010828, "grad_norm": 150.61643981933594, "learning_rate": 1.934728674695782e-06, "loss": 18.6484, "step": 15020 }, { "epoch": 0.14218911218182334, "grad_norm": 252.52371215820312, "learning_rate": 1.934717779434271e-06, "loss": 20.1406, "step": 15021 }, { "epoch": 0.1421985782035384, "grad_norm": 407.21893310546875, "learning_rate": 1.934706883294186e-06, "loss": 13.4375, "step": 15022 }, { "epoch": 0.14220804422525346, "grad_norm": 833.4176025390625, "learning_rate": 1.9346959862755376e-06, "loss": 59.1719, "step": 15023 }, { "epoch": 0.14221751024696852, "grad_norm": 250.90589904785156, "learning_rate": 1.9346850883783357e-06, "loss": 22.9023, "step": 15024 }, { "epoch": 0.14222697626868355, "grad_norm": 409.11981201171875, "learning_rate": 1.9346741896025904e-06, "loss": 37.1094, "step": 15025 }, { "epoch": 0.1422364422903986, "grad_norm": 257.3052062988281, "learning_rate": 1.9346632899483123e-06, "loss": 23.1562, "step": 15026 }, { "epoch": 0.14224590831211367, "grad_norm": 347.3602294921875, "learning_rate": 1.9346523894155114e-06, "loss": 12.8008, "step": 15027 }, { "epoch": 0.14225537433382873, "grad_norm": 959.337890625, "learning_rate": 1.934641488004198e-06, "loss": 58.7891, "step": 15028 }, { "epoch": 0.14226484035554376, "grad_norm": 286.35760498046875, "learning_rate": 1.934630585714383e-06, "loss": 26.8047, "step": 15029 }, { "epoch": 0.14227430637725882, "grad_norm": 248.2887420654297, "learning_rate": 1.934619682546075e-06, "loss": 8.1992, "step": 15030 }, { "epoch": 0.14228377239897388, "grad_norm": 555.1014404296875, "learning_rate": 1.934608778499286e-06, "loss": 21.4219, "step": 15031 }, { "epoch": 0.14229323842068894, "grad_norm": 339.97015380859375, "learning_rate": 1.9345978735740256e-06, "loss": 32.9375, "step": 15032 }, { "epoch": 0.142302704442404, "grad_norm": 389.6776428222656, "learning_rate": 1.934586967770304e-06, "loss": 12.4121, "step": 15033 }, { "epoch": 0.14231217046411904, "grad_norm": 821.6337890625, "learning_rate": 1.934576061088131e-06, "loss": 69.7031, "step": 15034 }, { "epoch": 0.1423216364858341, "grad_norm": 585.4558715820312, "learning_rate": 1.934565153527518e-06, "loss": 18.4453, "step": 15035 }, { "epoch": 0.14233110250754916, "grad_norm": 180.5910186767578, "learning_rate": 1.934554245088474e-06, "loss": 24.3516, "step": 15036 }, { "epoch": 0.14234056852926422, "grad_norm": 508.0458984375, "learning_rate": 1.93454333577101e-06, "loss": 19.3281, "step": 15037 }, { "epoch": 0.14235003455097925, "grad_norm": 283.2483215332031, "learning_rate": 1.934532425575136e-06, "loss": 16.7031, "step": 15038 }, { "epoch": 0.1423595005726943, "grad_norm": 625.2389526367188, "learning_rate": 1.9345215145008626e-06, "loss": 25.7148, "step": 15039 }, { "epoch": 0.14236896659440937, "grad_norm": 807.3861083984375, "learning_rate": 1.9345106025481993e-06, "loss": 44.2188, "step": 15040 }, { "epoch": 0.14237843261612443, "grad_norm": 284.7561340332031, "learning_rate": 1.934499689717157e-06, "loss": 16.6797, "step": 15041 }, { "epoch": 0.1423878986378395, "grad_norm": 345.09906005859375, "learning_rate": 1.934488776007746e-06, "loss": 30.8516, "step": 15042 }, { "epoch": 0.14239736465955452, "grad_norm": 2.9000608921051025, "learning_rate": 1.9344778614199758e-06, "loss": 0.8916, "step": 15043 }, { "epoch": 0.14240683068126958, "grad_norm": 4.282545566558838, "learning_rate": 1.934466945953858e-06, "loss": 1.0117, "step": 15044 }, { "epoch": 0.14241629670298464, "grad_norm": 680.4677734375, "learning_rate": 1.9344560296094016e-06, "loss": 49.2344, "step": 15045 }, { "epoch": 0.1424257627246997, "grad_norm": 325.0600280761719, "learning_rate": 1.934445112386617e-06, "loss": 26.5938, "step": 15046 }, { "epoch": 0.14243522874641473, "grad_norm": 376.8807067871094, "learning_rate": 1.934434194285515e-06, "loss": 24.0547, "step": 15047 }, { "epoch": 0.1424446947681298, "grad_norm": 3.2208545207977295, "learning_rate": 1.934423275306106e-06, "loss": 1.002, "step": 15048 }, { "epoch": 0.14245416078984485, "grad_norm": 342.3756408691406, "learning_rate": 1.9344123554483994e-06, "loss": 20.0078, "step": 15049 }, { "epoch": 0.1424636268115599, "grad_norm": 277.0859375, "learning_rate": 1.9344014347124068e-06, "loss": 13.4141, "step": 15050 }, { "epoch": 0.14247309283327497, "grad_norm": 370.1090087890625, "learning_rate": 1.9343905130981367e-06, "loss": 41.2812, "step": 15051 }, { "epoch": 0.14248255885499, "grad_norm": 649.1536865234375, "learning_rate": 1.934379590605601e-06, "loss": 27.2188, "step": 15052 }, { "epoch": 0.14249202487670506, "grad_norm": 254.20254516601562, "learning_rate": 1.9343686672348086e-06, "loss": 20.3086, "step": 15053 }, { "epoch": 0.14250149089842012, "grad_norm": 176.53903198242188, "learning_rate": 1.934357742985771e-06, "loss": 24.875, "step": 15054 }, { "epoch": 0.14251095692013518, "grad_norm": 194.96331787109375, "learning_rate": 1.9343468178584977e-06, "loss": 22.0391, "step": 15055 }, { "epoch": 0.14252042294185022, "grad_norm": 176.11834716796875, "learning_rate": 1.934335891852999e-06, "loss": 15.7656, "step": 15056 }, { "epoch": 0.14252988896356528, "grad_norm": 417.9226989746094, "learning_rate": 1.934324964969286e-06, "loss": 43.0625, "step": 15057 }, { "epoch": 0.14253935498528034, "grad_norm": 224.34561157226562, "learning_rate": 1.9343140372073675e-06, "loss": 23.6094, "step": 15058 }, { "epoch": 0.1425488210069954, "grad_norm": 265.7191162109375, "learning_rate": 1.934303108567255e-06, "loss": 22.6328, "step": 15059 }, { "epoch": 0.14255828702871046, "grad_norm": 401.9120788574219, "learning_rate": 1.9342921790489585e-06, "loss": 37.3984, "step": 15060 }, { "epoch": 0.1425677530504255, "grad_norm": 480.4211730957031, "learning_rate": 1.9342812486524877e-06, "loss": 19.1875, "step": 15061 }, { "epoch": 0.14257721907214055, "grad_norm": 155.780029296875, "learning_rate": 1.9342703173778534e-06, "loss": 18.6523, "step": 15062 }, { "epoch": 0.1425866850938556, "grad_norm": 179.51998901367188, "learning_rate": 1.934259385225066e-06, "loss": 7.0273, "step": 15063 }, { "epoch": 0.14259615111557067, "grad_norm": 412.17840576171875, "learning_rate": 1.9342484521941358e-06, "loss": 27.1094, "step": 15064 }, { "epoch": 0.1426056171372857, "grad_norm": 3.1990880966186523, "learning_rate": 1.9342375182850723e-06, "loss": 0.9102, "step": 15065 }, { "epoch": 0.14261508315900076, "grad_norm": 220.90377807617188, "learning_rate": 1.9342265834978864e-06, "loss": 21.7539, "step": 15066 }, { "epoch": 0.14262454918071582, "grad_norm": 166.53692626953125, "learning_rate": 1.9342156478325882e-06, "loss": 20.4141, "step": 15067 }, { "epoch": 0.14263401520243088, "grad_norm": 1002.5890502929688, "learning_rate": 1.9342047112891888e-06, "loss": 46.9375, "step": 15068 }, { "epoch": 0.14264348122414594, "grad_norm": 233.7747802734375, "learning_rate": 1.934193773867697e-06, "loss": 18.4844, "step": 15069 }, { "epoch": 0.14265294724586097, "grad_norm": 298.31866455078125, "learning_rate": 1.934182835568124e-06, "loss": 22.9062, "step": 15070 }, { "epoch": 0.14266241326757603, "grad_norm": 1667.442626953125, "learning_rate": 1.9341718963904802e-06, "loss": 12.1992, "step": 15071 }, { "epoch": 0.1426718792892911, "grad_norm": 499.3363952636719, "learning_rate": 1.9341609563347752e-06, "loss": 39.8906, "step": 15072 }, { "epoch": 0.14268134531100615, "grad_norm": 248.62149047851562, "learning_rate": 1.93415001540102e-06, "loss": 15.1055, "step": 15073 }, { "epoch": 0.14269081133272118, "grad_norm": 306.1246643066406, "learning_rate": 1.934139073589224e-06, "loss": 21.3438, "step": 15074 }, { "epoch": 0.14270027735443624, "grad_norm": 566.353271484375, "learning_rate": 1.9341281308993982e-06, "loss": 40.9219, "step": 15075 }, { "epoch": 0.1427097433761513, "grad_norm": 234.3957061767578, "learning_rate": 1.9341171873315533e-06, "loss": 15.5234, "step": 15076 }, { "epoch": 0.14271920939786636, "grad_norm": 460.35845947265625, "learning_rate": 1.9341062428856985e-06, "loss": 53.1875, "step": 15077 }, { "epoch": 0.14272867541958142, "grad_norm": 315.42718505859375, "learning_rate": 1.9340952975618446e-06, "loss": 22.9531, "step": 15078 }, { "epoch": 0.14273814144129646, "grad_norm": 372.8793640136719, "learning_rate": 1.934084351360002e-06, "loss": 18.8438, "step": 15079 }, { "epoch": 0.14274760746301152, "grad_norm": 405.15338134765625, "learning_rate": 1.934073404280181e-06, "loss": 7.875, "step": 15080 }, { "epoch": 0.14275707348472658, "grad_norm": 3.618328094482422, "learning_rate": 1.9340624563223918e-06, "loss": 1.0308, "step": 15081 }, { "epoch": 0.14276653950644164, "grad_norm": 612.0001831054688, "learning_rate": 1.9340515074866444e-06, "loss": 48.6562, "step": 15082 }, { "epoch": 0.14277600552815667, "grad_norm": 368.4115295410156, "learning_rate": 1.9340405577729494e-06, "loss": 35.2344, "step": 15083 }, { "epoch": 0.14278547154987173, "grad_norm": 385.4376220703125, "learning_rate": 1.9340296071813175e-06, "loss": 17.7266, "step": 15084 }, { "epoch": 0.1427949375715868, "grad_norm": 115.67277526855469, "learning_rate": 1.9340186557117576e-06, "loss": 12.7969, "step": 15085 }, { "epoch": 0.14280440359330185, "grad_norm": 318.2754821777344, "learning_rate": 1.9340077033642815e-06, "loss": 17.6641, "step": 15086 }, { "epoch": 0.1428138696150169, "grad_norm": 478.5979919433594, "learning_rate": 1.933996750138899e-06, "loss": 60.5078, "step": 15087 }, { "epoch": 0.14282333563673194, "grad_norm": 536.027587890625, "learning_rate": 1.9339857960356205e-06, "loss": 34.5234, "step": 15088 }, { "epoch": 0.142832801658447, "grad_norm": 1008.610107421875, "learning_rate": 1.9339748410544555e-06, "loss": 59.125, "step": 15089 }, { "epoch": 0.14284226768016206, "grad_norm": 345.2802734375, "learning_rate": 1.9339638851954154e-06, "loss": 12.5469, "step": 15090 }, { "epoch": 0.14285173370187712, "grad_norm": 272.3673400878906, "learning_rate": 1.9339529284585095e-06, "loss": 17.7031, "step": 15091 }, { "epoch": 0.14286119972359215, "grad_norm": 847.7354736328125, "learning_rate": 1.9339419708437492e-06, "loss": 54.1641, "step": 15092 }, { "epoch": 0.1428706657453072, "grad_norm": 668.4669189453125, "learning_rate": 1.933931012351144e-06, "loss": 44.8438, "step": 15093 }, { "epoch": 0.14288013176702227, "grad_norm": 198.38461303710938, "learning_rate": 1.933920052980704e-06, "loss": 15.8672, "step": 15094 }, { "epoch": 0.14288959778873733, "grad_norm": 1019.2544555664062, "learning_rate": 1.9339090927324406e-06, "loss": 29.0117, "step": 15095 }, { "epoch": 0.1428990638104524, "grad_norm": 273.0321960449219, "learning_rate": 1.933898131606363e-06, "loss": 18.3828, "step": 15096 }, { "epoch": 0.14290852983216742, "grad_norm": 219.0131072998047, "learning_rate": 1.933887169602482e-06, "loss": 19.5703, "step": 15097 }, { "epoch": 0.14291799585388248, "grad_norm": 381.2464904785156, "learning_rate": 1.9338762067208078e-06, "loss": 31.5469, "step": 15098 }, { "epoch": 0.14292746187559754, "grad_norm": 445.5354309082031, "learning_rate": 1.9338652429613504e-06, "loss": 29.1406, "step": 15099 }, { "epoch": 0.1429369278973126, "grad_norm": 304.3021545410156, "learning_rate": 1.933854278324121e-06, "loss": 10.2344, "step": 15100 }, { "epoch": 0.14294639391902764, "grad_norm": 588.8605346679688, "learning_rate": 1.933843312809129e-06, "loss": 66.8906, "step": 15101 }, { "epoch": 0.1429558599407427, "grad_norm": 320.7030334472656, "learning_rate": 1.933832346416385e-06, "loss": 26.0781, "step": 15102 }, { "epoch": 0.14296532596245776, "grad_norm": 210.2862548828125, "learning_rate": 1.9338213791458995e-06, "loss": 18.7539, "step": 15103 }, { "epoch": 0.14297479198417282, "grad_norm": 900.4187622070312, "learning_rate": 1.9338104109976827e-06, "loss": 22.1992, "step": 15104 }, { "epoch": 0.14298425800588788, "grad_norm": 187.70144653320312, "learning_rate": 1.9337994419717446e-06, "loss": 17.7969, "step": 15105 }, { "epoch": 0.1429937240276029, "grad_norm": 316.35308837890625, "learning_rate": 1.933788472068096e-06, "loss": 18.4141, "step": 15106 }, { "epoch": 0.14300319004931797, "grad_norm": 243.87332153320312, "learning_rate": 1.933777501286747e-06, "loss": 16.1094, "step": 15107 }, { "epoch": 0.14301265607103303, "grad_norm": 3.401803731918335, "learning_rate": 1.9337665296277078e-06, "loss": 0.8462, "step": 15108 }, { "epoch": 0.1430221220927481, "grad_norm": 169.64222717285156, "learning_rate": 1.9337555570909883e-06, "loss": 18.7031, "step": 15109 }, { "epoch": 0.14303158811446312, "grad_norm": 214.14158630371094, "learning_rate": 1.9337445836766e-06, "loss": 21.5234, "step": 15110 }, { "epoch": 0.14304105413617818, "grad_norm": 447.16326904296875, "learning_rate": 1.9337336093845526e-06, "loss": 45.3594, "step": 15111 }, { "epoch": 0.14305052015789324, "grad_norm": 607.8016357421875, "learning_rate": 1.9337226342148562e-06, "loss": 28.2969, "step": 15112 }, { "epoch": 0.1430599861796083, "grad_norm": 302.921875, "learning_rate": 1.933711658167521e-06, "loss": 26.4141, "step": 15113 }, { "epoch": 0.14306945220132336, "grad_norm": 364.46844482421875, "learning_rate": 1.9337006812425577e-06, "loss": 27.2891, "step": 15114 }, { "epoch": 0.1430789182230384, "grad_norm": 286.7474365234375, "learning_rate": 1.9336897034399767e-06, "loss": 20.0859, "step": 15115 }, { "epoch": 0.14308838424475345, "grad_norm": 265.363037109375, "learning_rate": 1.933678724759788e-06, "loss": 26.3984, "step": 15116 }, { "epoch": 0.1430978502664685, "grad_norm": 274.6993713378906, "learning_rate": 1.933667745202002e-06, "loss": 8.3027, "step": 15117 }, { "epoch": 0.14310731628818357, "grad_norm": 409.05084228515625, "learning_rate": 1.9336567647666293e-06, "loss": 42.7422, "step": 15118 }, { "epoch": 0.14311678230989863, "grad_norm": 237.0898895263672, "learning_rate": 1.9336457834536795e-06, "loss": 18.375, "step": 15119 }, { "epoch": 0.14312624833161366, "grad_norm": 462.790283203125, "learning_rate": 1.9336348012631637e-06, "loss": 14.1309, "step": 15120 }, { "epoch": 0.14313571435332872, "grad_norm": 314.2847595214844, "learning_rate": 1.933623818195092e-06, "loss": 42.2266, "step": 15121 }, { "epoch": 0.14314518037504378, "grad_norm": 506.0605163574219, "learning_rate": 1.9336128342494743e-06, "loss": 23.7656, "step": 15122 }, { "epoch": 0.14315464639675884, "grad_norm": 768.1614379882812, "learning_rate": 1.9336018494263217e-06, "loss": 16.1484, "step": 15123 }, { "epoch": 0.14316411241847388, "grad_norm": 304.1311950683594, "learning_rate": 1.933590863725644e-06, "loss": 35.8906, "step": 15124 }, { "epoch": 0.14317357844018894, "grad_norm": 440.6781921386719, "learning_rate": 1.9335798771474512e-06, "loss": 27.0703, "step": 15125 }, { "epoch": 0.143183044461904, "grad_norm": 329.81646728515625, "learning_rate": 1.9335688896917546e-06, "loss": 31.5547, "step": 15126 }, { "epoch": 0.14319251048361906, "grad_norm": 238.33580017089844, "learning_rate": 1.9335579013585635e-06, "loss": 32.8594, "step": 15127 }, { "epoch": 0.14320197650533412, "grad_norm": 266.1275329589844, "learning_rate": 1.9335469121478887e-06, "loss": 19.9766, "step": 15128 }, { "epoch": 0.14321144252704915, "grad_norm": 406.73370361328125, "learning_rate": 1.933535922059741e-06, "loss": 18.6875, "step": 15129 }, { "epoch": 0.1432209085487642, "grad_norm": 277.3866271972656, "learning_rate": 1.93352493109413e-06, "loss": 23.7422, "step": 15130 }, { "epoch": 0.14323037457047927, "grad_norm": 234.54129028320312, "learning_rate": 1.9335139392510665e-06, "loss": 17.8867, "step": 15131 }, { "epoch": 0.14323984059219433, "grad_norm": 417.92999267578125, "learning_rate": 1.9335029465305604e-06, "loss": 30.7969, "step": 15132 }, { "epoch": 0.14324930661390936, "grad_norm": 391.29705810546875, "learning_rate": 1.9334919529326225e-06, "loss": 43.9062, "step": 15133 }, { "epoch": 0.14325877263562442, "grad_norm": 276.12969970703125, "learning_rate": 1.9334809584572626e-06, "loss": 23.0547, "step": 15134 }, { "epoch": 0.14326823865733948, "grad_norm": 456.70355224609375, "learning_rate": 1.9334699631044915e-06, "loss": 44.4297, "step": 15135 }, { "epoch": 0.14327770467905454, "grad_norm": 1445.6588134765625, "learning_rate": 1.933458966874319e-06, "loss": 26.3438, "step": 15136 }, { "epoch": 0.1432871707007696, "grad_norm": 518.8522338867188, "learning_rate": 1.933447969766756e-06, "loss": 21.8359, "step": 15137 }, { "epoch": 0.14329663672248463, "grad_norm": 379.80047607421875, "learning_rate": 1.933436971781813e-06, "loss": 28.2188, "step": 15138 }, { "epoch": 0.1433061027441997, "grad_norm": 373.9418640136719, "learning_rate": 1.9334259729194998e-06, "loss": 20.4609, "step": 15139 }, { "epoch": 0.14331556876591475, "grad_norm": 597.2593383789062, "learning_rate": 1.9334149731798268e-06, "loss": 40.1406, "step": 15140 }, { "epoch": 0.1433250347876298, "grad_norm": 616.8663940429688, "learning_rate": 1.9334039725628043e-06, "loss": 47.2188, "step": 15141 }, { "epoch": 0.14333450080934484, "grad_norm": 260.93511962890625, "learning_rate": 1.9333929710684432e-06, "loss": 17.1641, "step": 15142 }, { "epoch": 0.1433439668310599, "grad_norm": 271.7450866699219, "learning_rate": 1.933381968696753e-06, "loss": 16.9844, "step": 15143 }, { "epoch": 0.14335343285277496, "grad_norm": 276.700439453125, "learning_rate": 1.933370965447745e-06, "loss": 20.4141, "step": 15144 }, { "epoch": 0.14336289887449002, "grad_norm": 784.4716186523438, "learning_rate": 1.9333599613214286e-06, "loss": 50.2734, "step": 15145 }, { "epoch": 0.14337236489620508, "grad_norm": 506.24591064453125, "learning_rate": 1.9333489563178143e-06, "loss": 29.2031, "step": 15146 }, { "epoch": 0.14338183091792012, "grad_norm": 443.435546875, "learning_rate": 1.933337950436913e-06, "loss": 48.1094, "step": 15147 }, { "epoch": 0.14339129693963518, "grad_norm": 210.49887084960938, "learning_rate": 1.933326943678735e-06, "loss": 9.0352, "step": 15148 }, { "epoch": 0.14340076296135024, "grad_norm": 599.705810546875, "learning_rate": 1.93331593604329e-06, "loss": 20.6719, "step": 15149 }, { "epoch": 0.1434102289830653, "grad_norm": 3.3060007095336914, "learning_rate": 1.933304927530589e-06, "loss": 0.9785, "step": 15150 }, { "epoch": 0.14341969500478033, "grad_norm": 285.97113037109375, "learning_rate": 1.933293918140642e-06, "loss": 40.8906, "step": 15151 }, { "epoch": 0.1434291610264954, "grad_norm": 847.18798828125, "learning_rate": 1.9332829078734592e-06, "loss": 35.2969, "step": 15152 }, { "epoch": 0.14343862704821045, "grad_norm": 3.4034831523895264, "learning_rate": 1.9332718967290513e-06, "loss": 0.8813, "step": 15153 }, { "epoch": 0.1434480930699255, "grad_norm": 305.17889404296875, "learning_rate": 1.933260884707429e-06, "loss": 43.4219, "step": 15154 }, { "epoch": 0.14345755909164057, "grad_norm": 2.9368278980255127, "learning_rate": 1.9332498718086015e-06, "loss": 0.9097, "step": 15155 }, { "epoch": 0.1434670251133556, "grad_norm": 3.9697649478912354, "learning_rate": 1.9332388580325804e-06, "loss": 1.0278, "step": 15156 }, { "epoch": 0.14347649113507066, "grad_norm": 398.1662902832031, "learning_rate": 1.933227843379375e-06, "loss": 42.5781, "step": 15157 }, { "epoch": 0.14348595715678572, "grad_norm": 173.739990234375, "learning_rate": 1.933216827848996e-06, "loss": 15.4297, "step": 15158 }, { "epoch": 0.14349542317850078, "grad_norm": 3.239828586578369, "learning_rate": 1.9332058114414545e-06, "loss": 0.9136, "step": 15159 }, { "epoch": 0.1435048892002158, "grad_norm": 181.7291717529297, "learning_rate": 1.9331947941567603e-06, "loss": 15.25, "step": 15160 }, { "epoch": 0.14351435522193087, "grad_norm": 842.88037109375, "learning_rate": 1.933183775994923e-06, "loss": 41.1328, "step": 15161 }, { "epoch": 0.14352382124364593, "grad_norm": 319.208251953125, "learning_rate": 1.933172756955954e-06, "loss": 24.4062, "step": 15162 }, { "epoch": 0.143533287265361, "grad_norm": 353.47711181640625, "learning_rate": 1.9331617370398635e-06, "loss": 28.4609, "step": 15163 }, { "epoch": 0.14354275328707605, "grad_norm": 1818.3917236328125, "learning_rate": 1.9331507162466614e-06, "loss": 28.707, "step": 15164 }, { "epoch": 0.14355221930879108, "grad_norm": 1108.818359375, "learning_rate": 1.933139694576359e-06, "loss": 55.2734, "step": 15165 }, { "epoch": 0.14356168533050614, "grad_norm": 349.72412109375, "learning_rate": 1.933128672028965e-06, "loss": 25.0, "step": 15166 }, { "epoch": 0.1435711513522212, "grad_norm": 399.8388977050781, "learning_rate": 1.9331176486044912e-06, "loss": 21.4219, "step": 15167 }, { "epoch": 0.14358061737393626, "grad_norm": 364.91693115234375, "learning_rate": 1.9331066243029478e-06, "loss": 17.6992, "step": 15168 }, { "epoch": 0.1435900833956513, "grad_norm": 401.1291809082031, "learning_rate": 1.9330955991243444e-06, "loss": 49.5, "step": 15169 }, { "epoch": 0.14359954941736636, "grad_norm": 1327.2767333984375, "learning_rate": 1.933084573068692e-06, "loss": 46.7031, "step": 15170 }, { "epoch": 0.14360901543908142, "grad_norm": 497.6336364746094, "learning_rate": 1.933073546136001e-06, "loss": 32.4141, "step": 15171 }, { "epoch": 0.14361848146079648, "grad_norm": 607.8160400390625, "learning_rate": 1.9330625183262813e-06, "loss": 45.7031, "step": 15172 }, { "epoch": 0.14362794748251154, "grad_norm": 2369.32470703125, "learning_rate": 1.9330514896395436e-06, "loss": 21.0371, "step": 15173 }, { "epoch": 0.14363741350422657, "grad_norm": 459.57073974609375, "learning_rate": 1.933040460075798e-06, "loss": 49.4961, "step": 15174 }, { "epoch": 0.14364687952594163, "grad_norm": 300.58367919921875, "learning_rate": 1.9330294296350556e-06, "loss": 31.168, "step": 15175 }, { "epoch": 0.1436563455476567, "grad_norm": 195.72265625, "learning_rate": 1.9330183983173256e-06, "loss": 24.4062, "step": 15176 }, { "epoch": 0.14366581156937175, "grad_norm": 258.92645263671875, "learning_rate": 1.9330073661226197e-06, "loss": 21.6016, "step": 15177 }, { "epoch": 0.14367527759108678, "grad_norm": 270.21636962890625, "learning_rate": 1.9329963330509474e-06, "loss": 16.1953, "step": 15178 }, { "epoch": 0.14368474361280184, "grad_norm": 450.8699951171875, "learning_rate": 1.9329852991023187e-06, "loss": 23.1406, "step": 15179 }, { "epoch": 0.1436942096345169, "grad_norm": 852.3424682617188, "learning_rate": 1.932974264276745e-06, "loss": 40.7422, "step": 15180 }, { "epoch": 0.14370367565623196, "grad_norm": 454.4208679199219, "learning_rate": 1.9329632285742362e-06, "loss": 24.2578, "step": 15181 }, { "epoch": 0.14371314167794702, "grad_norm": 1114.3001708984375, "learning_rate": 1.9329521919948024e-06, "loss": 70.6562, "step": 15182 }, { "epoch": 0.14372260769966205, "grad_norm": 755.0794677734375, "learning_rate": 1.9329411545384543e-06, "loss": 48.5312, "step": 15183 }, { "epoch": 0.1437320737213771, "grad_norm": 569.6973876953125, "learning_rate": 1.9329301162052023e-06, "loss": 22.25, "step": 15184 }, { "epoch": 0.14374153974309217, "grad_norm": 458.7666015625, "learning_rate": 1.9329190769950565e-06, "loss": 32.7266, "step": 15185 }, { "epoch": 0.14375100576480723, "grad_norm": 279.8185119628906, "learning_rate": 1.932908036908028e-06, "loss": 19.1719, "step": 15186 }, { "epoch": 0.14376047178652226, "grad_norm": 209.25303649902344, "learning_rate": 1.932896995944126e-06, "loss": 24.6562, "step": 15187 }, { "epoch": 0.14376993780823732, "grad_norm": 313.22979736328125, "learning_rate": 1.9328859541033616e-06, "loss": 20.9141, "step": 15188 }, { "epoch": 0.14377940382995238, "grad_norm": 300.68292236328125, "learning_rate": 1.9328749113857455e-06, "loss": 21.7266, "step": 15189 }, { "epoch": 0.14378886985166744, "grad_norm": 243.9975128173828, "learning_rate": 1.9328638677912872e-06, "loss": 14.625, "step": 15190 }, { "epoch": 0.1437983358733825, "grad_norm": 341.22747802734375, "learning_rate": 1.932852823319998e-06, "loss": 34.9688, "step": 15191 }, { "epoch": 0.14380780189509754, "grad_norm": 446.56787109375, "learning_rate": 1.9328417779718875e-06, "loss": 33.6953, "step": 15192 }, { "epoch": 0.1438172679168126, "grad_norm": 344.5507507324219, "learning_rate": 1.9328307317469667e-06, "loss": 35.9844, "step": 15193 }, { "epoch": 0.14382673393852766, "grad_norm": 429.9443359375, "learning_rate": 1.9328196846452453e-06, "loss": 33.8555, "step": 15194 }, { "epoch": 0.14383619996024272, "grad_norm": 588.8190307617188, "learning_rate": 1.9328086366667343e-06, "loss": 27.6953, "step": 15195 }, { "epoch": 0.14384566598195775, "grad_norm": 519.2130126953125, "learning_rate": 1.9327975878114438e-06, "loss": 13.0117, "step": 15196 }, { "epoch": 0.1438551320036728, "grad_norm": 399.8504333496094, "learning_rate": 1.9327865380793844e-06, "loss": 39.625, "step": 15197 }, { "epoch": 0.14386459802538787, "grad_norm": 764.2711181640625, "learning_rate": 1.932775487470566e-06, "loss": 34.6016, "step": 15198 }, { "epoch": 0.14387406404710293, "grad_norm": 774.0384521484375, "learning_rate": 1.9327644359849996e-06, "loss": 38.8672, "step": 15199 }, { "epoch": 0.143883530068818, "grad_norm": 184.6457061767578, "learning_rate": 1.9327533836226954e-06, "loss": 14.5, "step": 15200 }, { "epoch": 0.14389299609053302, "grad_norm": 787.7421264648438, "learning_rate": 1.9327423303836633e-06, "loss": 54.4844, "step": 15201 }, { "epoch": 0.14390246211224808, "grad_norm": 246.78732299804688, "learning_rate": 1.9327312762679145e-06, "loss": 27.8828, "step": 15202 }, { "epoch": 0.14391192813396314, "grad_norm": 493.9295654296875, "learning_rate": 1.9327202212754586e-06, "loss": 46.0781, "step": 15203 }, { "epoch": 0.1439213941556782, "grad_norm": 585.8896484375, "learning_rate": 1.9327091654063067e-06, "loss": 33.3594, "step": 15204 }, { "epoch": 0.14393086017739326, "grad_norm": 192.9053497314453, "learning_rate": 1.932698108660469e-06, "loss": 19.4688, "step": 15205 }, { "epoch": 0.1439403261991083, "grad_norm": 322.32177734375, "learning_rate": 1.9326870510379554e-06, "loss": 10.8984, "step": 15206 }, { "epoch": 0.14394979222082335, "grad_norm": 235.56895446777344, "learning_rate": 1.9326759925387768e-06, "loss": 11.9844, "step": 15207 }, { "epoch": 0.1439592582425384, "grad_norm": 281.1301574707031, "learning_rate": 1.932664933162943e-06, "loss": 17.5781, "step": 15208 }, { "epoch": 0.14396872426425347, "grad_norm": 334.0773010253906, "learning_rate": 1.9326538729104657e-06, "loss": 20.9688, "step": 15209 }, { "epoch": 0.1439781902859685, "grad_norm": 246.84591674804688, "learning_rate": 1.932642811781354e-06, "loss": 22.125, "step": 15210 }, { "epoch": 0.14398765630768356, "grad_norm": 764.5955810546875, "learning_rate": 1.9326317497756185e-06, "loss": 45.8359, "step": 15211 }, { "epoch": 0.14399712232939862, "grad_norm": 291.10601806640625, "learning_rate": 1.93262068689327e-06, "loss": 30.5938, "step": 15212 }, { "epoch": 0.14400658835111368, "grad_norm": 225.79205322265625, "learning_rate": 1.932609623134319e-06, "loss": 19.1328, "step": 15213 }, { "epoch": 0.14401605437282874, "grad_norm": 3.3662750720977783, "learning_rate": 1.9325985584987753e-06, "loss": 0.8857, "step": 15214 }, { "epoch": 0.14402552039454378, "grad_norm": 360.12835693359375, "learning_rate": 1.9325874929866493e-06, "loss": 20.0, "step": 15215 }, { "epoch": 0.14403498641625884, "grad_norm": 475.2279052734375, "learning_rate": 1.9325764265979525e-06, "loss": 38.918, "step": 15216 }, { "epoch": 0.1440444524379739, "grad_norm": 429.67559814453125, "learning_rate": 1.932565359332694e-06, "loss": 37.3438, "step": 15217 }, { "epoch": 0.14405391845968896, "grad_norm": 344.075927734375, "learning_rate": 1.932554291190885e-06, "loss": 15.3711, "step": 15218 }, { "epoch": 0.144063384481404, "grad_norm": 443.9195556640625, "learning_rate": 1.9325432221725355e-06, "loss": 21.8516, "step": 15219 }, { "epoch": 0.14407285050311905, "grad_norm": 288.36474609375, "learning_rate": 1.932532152277656e-06, "loss": 21.8125, "step": 15220 }, { "epoch": 0.1440823165248341, "grad_norm": 264.3172912597656, "learning_rate": 1.932521081506257e-06, "loss": 23.3125, "step": 15221 }, { "epoch": 0.14409178254654917, "grad_norm": 369.1775207519531, "learning_rate": 1.932510009858349e-06, "loss": 16.9062, "step": 15222 }, { "epoch": 0.14410124856826423, "grad_norm": 391.6280212402344, "learning_rate": 1.932498937333942e-06, "loss": 35.4219, "step": 15223 }, { "epoch": 0.14411071458997926, "grad_norm": 579.010009765625, "learning_rate": 1.932487863933047e-06, "loss": 44.3594, "step": 15224 }, { "epoch": 0.14412018061169432, "grad_norm": 1068.5322265625, "learning_rate": 1.9324767896556735e-06, "loss": 36.9492, "step": 15225 }, { "epoch": 0.14412964663340938, "grad_norm": 933.8759155273438, "learning_rate": 1.9324657145018328e-06, "loss": 67.0625, "step": 15226 }, { "epoch": 0.14413911265512444, "grad_norm": 593.1398315429688, "learning_rate": 1.932454638471535e-06, "loss": 78.2031, "step": 15227 }, { "epoch": 0.14414857867683947, "grad_norm": 197.66744995117188, "learning_rate": 1.9324435615647904e-06, "loss": 13.7109, "step": 15228 }, { "epoch": 0.14415804469855453, "grad_norm": 340.63165283203125, "learning_rate": 1.93243248378161e-06, "loss": 21.25, "step": 15229 }, { "epoch": 0.1441675107202696, "grad_norm": 246.25238037109375, "learning_rate": 1.932421405122003e-06, "loss": 18.2969, "step": 15230 }, { "epoch": 0.14417697674198465, "grad_norm": 308.67901611328125, "learning_rate": 1.9324103255859808e-06, "loss": 17.6992, "step": 15231 }, { "epoch": 0.1441864427636997, "grad_norm": 442.6431579589844, "learning_rate": 1.9323992451735537e-06, "loss": 18.0469, "step": 15232 }, { "epoch": 0.14419590878541474, "grad_norm": 1405.720947265625, "learning_rate": 1.932388163884732e-06, "loss": 43.5234, "step": 15233 }, { "epoch": 0.1442053748071298, "grad_norm": 150.04327392578125, "learning_rate": 1.932377081719526e-06, "loss": 22.6523, "step": 15234 }, { "epoch": 0.14421484082884486, "grad_norm": 427.5689697265625, "learning_rate": 1.932365998677946e-06, "loss": 33.4219, "step": 15235 }, { "epoch": 0.14422430685055992, "grad_norm": 745.6260986328125, "learning_rate": 1.932354914760003e-06, "loss": 64.875, "step": 15236 }, { "epoch": 0.14423377287227496, "grad_norm": 328.2194519042969, "learning_rate": 1.9323438299657067e-06, "loss": 18.7812, "step": 15237 }, { "epoch": 0.14424323889399002, "grad_norm": 241.6074676513672, "learning_rate": 1.932332744295068e-06, "loss": 22.6719, "step": 15238 }, { "epoch": 0.14425270491570508, "grad_norm": 344.4136657714844, "learning_rate": 1.932321657748097e-06, "loss": 33.8906, "step": 15239 }, { "epoch": 0.14426217093742014, "grad_norm": 691.70703125, "learning_rate": 1.9323105703248047e-06, "loss": 29.3203, "step": 15240 }, { "epoch": 0.1442716369591352, "grad_norm": 354.4599914550781, "learning_rate": 1.9322994820252006e-06, "loss": 9.4141, "step": 15241 }, { "epoch": 0.14428110298085023, "grad_norm": 260.04888916015625, "learning_rate": 1.9322883928492957e-06, "loss": 10.3398, "step": 15242 }, { "epoch": 0.1442905690025653, "grad_norm": 509.20556640625, "learning_rate": 1.932277302797101e-06, "loss": 31.5781, "step": 15243 }, { "epoch": 0.14430003502428035, "grad_norm": 2.838557004928589, "learning_rate": 1.9322662118686256e-06, "loss": 0.7949, "step": 15244 }, { "epoch": 0.1443095010459954, "grad_norm": 555.5860595703125, "learning_rate": 1.932255120063881e-06, "loss": 40.1016, "step": 15245 }, { "epoch": 0.14431896706771044, "grad_norm": 715.8921508789062, "learning_rate": 1.932244027382877e-06, "loss": 76.6562, "step": 15246 }, { "epoch": 0.1443284330894255, "grad_norm": 404.9414978027344, "learning_rate": 1.9322329338256244e-06, "loss": 30.1719, "step": 15247 }, { "epoch": 0.14433789911114056, "grad_norm": 597.4869384765625, "learning_rate": 1.932221839392133e-06, "loss": 45.1719, "step": 15248 }, { "epoch": 0.14434736513285562, "grad_norm": 3.5332748889923096, "learning_rate": 1.9322107440824145e-06, "loss": 1.0039, "step": 15249 }, { "epoch": 0.14435683115457068, "grad_norm": 306.0921325683594, "learning_rate": 1.932199647896478e-06, "loss": 37.5781, "step": 15250 }, { "epoch": 0.1443662971762857, "grad_norm": 270.4045104980469, "learning_rate": 1.932188550834335e-06, "loss": 16.3125, "step": 15251 }, { "epoch": 0.14437576319800077, "grad_norm": 477.49835205078125, "learning_rate": 1.9321774528959947e-06, "loss": 54.8594, "step": 15252 }, { "epoch": 0.14438522921971583, "grad_norm": 328.686767578125, "learning_rate": 1.932166354081469e-06, "loss": 25.3125, "step": 15253 }, { "epoch": 0.1443946952414309, "grad_norm": 666.1035766601562, "learning_rate": 1.932155254390767e-06, "loss": 45.6875, "step": 15254 }, { "epoch": 0.14440416126314592, "grad_norm": 153.6597137451172, "learning_rate": 1.9321441538238997e-06, "loss": 9.8906, "step": 15255 }, { "epoch": 0.14441362728486098, "grad_norm": 151.93350219726562, "learning_rate": 1.932133052380878e-06, "loss": 22.0234, "step": 15256 }, { "epoch": 0.14442309330657604, "grad_norm": 507.10968017578125, "learning_rate": 1.9321219500617113e-06, "loss": 30.6094, "step": 15257 }, { "epoch": 0.1444325593282911, "grad_norm": 2.7130303382873535, "learning_rate": 1.932110846866411e-06, "loss": 0.9194, "step": 15258 }, { "epoch": 0.14444202535000616, "grad_norm": 302.67071533203125, "learning_rate": 1.9320997427949872e-06, "loss": 17.3438, "step": 15259 }, { "epoch": 0.1444514913717212, "grad_norm": 160.24560546875, "learning_rate": 1.93208863784745e-06, "loss": 19.4297, "step": 15260 }, { "epoch": 0.14446095739343626, "grad_norm": 208.5304718017578, "learning_rate": 1.9320775320238104e-06, "loss": 10.1543, "step": 15261 }, { "epoch": 0.14447042341515132, "grad_norm": 220.9407196044922, "learning_rate": 1.932066425324078e-06, "loss": 11.0391, "step": 15262 }, { "epoch": 0.14447988943686638, "grad_norm": 304.44000244140625, "learning_rate": 1.9320553177482644e-06, "loss": 28.8672, "step": 15263 }, { "epoch": 0.1444893554585814, "grad_norm": 283.9429016113281, "learning_rate": 1.932044209296379e-06, "loss": 10.3789, "step": 15264 }, { "epoch": 0.14449882148029647, "grad_norm": 613.2890625, "learning_rate": 1.9320330999684333e-06, "loss": 65.6562, "step": 15265 }, { "epoch": 0.14450828750201153, "grad_norm": 611.1076049804688, "learning_rate": 1.932021989764436e-06, "loss": 18.3008, "step": 15266 }, { "epoch": 0.1445177535237266, "grad_norm": 1202.2972412109375, "learning_rate": 1.9320108786844e-06, "loss": 32.1328, "step": 15267 }, { "epoch": 0.14452721954544165, "grad_norm": 408.3705749511719, "learning_rate": 1.9319997667283332e-06, "loss": 33.4609, "step": 15268 }, { "epoch": 0.14453668556715668, "grad_norm": 728.7333374023438, "learning_rate": 1.9319886538962482e-06, "loss": 37.0703, "step": 15269 }, { "epoch": 0.14454615158887174, "grad_norm": 358.7647399902344, "learning_rate": 1.931977540188154e-06, "loss": 37.1875, "step": 15270 }, { "epoch": 0.1445556176105868, "grad_norm": 268.5113830566406, "learning_rate": 1.931966425604062e-06, "loss": 22.1016, "step": 15271 }, { "epoch": 0.14456508363230186, "grad_norm": 2.757086753845215, "learning_rate": 1.931955310143982e-06, "loss": 0.8408, "step": 15272 }, { "epoch": 0.1445745496540169, "grad_norm": 252.75143432617188, "learning_rate": 1.931944193807924e-06, "loss": 17.9883, "step": 15273 }, { "epoch": 0.14458401567573195, "grad_norm": 235.9954833984375, "learning_rate": 1.9319330765958998e-06, "loss": 16.5391, "step": 15274 }, { "epoch": 0.144593481697447, "grad_norm": 206.99440002441406, "learning_rate": 1.9319219585079186e-06, "loss": 20.1641, "step": 15275 }, { "epoch": 0.14460294771916207, "grad_norm": 536.6063232421875, "learning_rate": 1.9319108395439918e-06, "loss": 34.5078, "step": 15276 }, { "epoch": 0.14461241374087713, "grad_norm": 1250.3209228515625, "learning_rate": 1.931899719704129e-06, "loss": 63.7578, "step": 15277 }, { "epoch": 0.14462187976259216, "grad_norm": 217.1771697998047, "learning_rate": 1.9318885989883418e-06, "loss": 17.8594, "step": 15278 }, { "epoch": 0.14463134578430722, "grad_norm": 191.97279357910156, "learning_rate": 1.9318774773966397e-06, "loss": 21.8594, "step": 15279 }, { "epoch": 0.14464081180602228, "grad_norm": 404.46795654296875, "learning_rate": 1.931866354929033e-06, "loss": 56.7109, "step": 15280 }, { "epoch": 0.14465027782773734, "grad_norm": 263.25494384765625, "learning_rate": 1.931855231585533e-06, "loss": 20.2031, "step": 15281 }, { "epoch": 0.14465974384945238, "grad_norm": 989.7471923828125, "learning_rate": 1.9318441073661494e-06, "loss": 49.8789, "step": 15282 }, { "epoch": 0.14466920987116744, "grad_norm": 386.1849060058594, "learning_rate": 1.931832982270893e-06, "loss": 23.7969, "step": 15283 }, { "epoch": 0.1446786758928825, "grad_norm": 1037.1513671875, "learning_rate": 1.9318218562997742e-06, "loss": 41.0625, "step": 15284 }, { "epoch": 0.14468814191459756, "grad_norm": 292.0083923339844, "learning_rate": 1.9318107294528036e-06, "loss": 16.2812, "step": 15285 }, { "epoch": 0.14469760793631262, "grad_norm": 197.6056365966797, "learning_rate": 1.9317996017299912e-06, "loss": 12.5781, "step": 15286 }, { "epoch": 0.14470707395802765, "grad_norm": 3.1623575687408447, "learning_rate": 1.9317884731313484e-06, "loss": 1.0049, "step": 15287 }, { "epoch": 0.1447165399797427, "grad_norm": 288.075927734375, "learning_rate": 1.9317773436568846e-06, "loss": 20.9688, "step": 15288 }, { "epoch": 0.14472600600145777, "grad_norm": 402.6111145019531, "learning_rate": 1.931766213306611e-06, "loss": 33.3047, "step": 15289 }, { "epoch": 0.14473547202317283, "grad_norm": 2.9730167388916016, "learning_rate": 1.9317550820805374e-06, "loss": 0.7891, "step": 15290 }, { "epoch": 0.1447449380448879, "grad_norm": 263.28118896484375, "learning_rate": 1.931743949978675e-06, "loss": 15.6133, "step": 15291 }, { "epoch": 0.14475440406660292, "grad_norm": 2.9477014541625977, "learning_rate": 1.931732817001034e-06, "loss": 0.9399, "step": 15292 }, { "epoch": 0.14476387008831798, "grad_norm": 1176.673583984375, "learning_rate": 1.9317216831476243e-06, "loss": 61.4062, "step": 15293 }, { "epoch": 0.14477333611003304, "grad_norm": 350.12469482421875, "learning_rate": 1.931710548418457e-06, "loss": 22.3438, "step": 15294 }, { "epoch": 0.1447828021317481, "grad_norm": 550.4080200195312, "learning_rate": 1.9316994128135426e-06, "loss": 18.7578, "step": 15295 }, { "epoch": 0.14479226815346313, "grad_norm": 884.8142700195312, "learning_rate": 1.931688276332891e-06, "loss": 39.4688, "step": 15296 }, { "epoch": 0.1448017341751782, "grad_norm": 800.2797241210938, "learning_rate": 1.9316771389765133e-06, "loss": 27.8398, "step": 15297 }, { "epoch": 0.14481120019689325, "grad_norm": 404.05255126953125, "learning_rate": 1.9316660007444195e-06, "loss": 38.8867, "step": 15298 }, { "epoch": 0.1448206662186083, "grad_norm": 339.6365966796875, "learning_rate": 1.9316548616366208e-06, "loss": 37.5234, "step": 15299 }, { "epoch": 0.14483013224032337, "grad_norm": 1048.458984375, "learning_rate": 1.9316437216531263e-06, "loss": 61.7109, "step": 15300 }, { "epoch": 0.1448395982620384, "grad_norm": 328.7762451171875, "learning_rate": 1.931632580793948e-06, "loss": 22.5703, "step": 15301 }, { "epoch": 0.14484906428375346, "grad_norm": 255.71669006347656, "learning_rate": 1.931621439059095e-06, "loss": 17.8438, "step": 15302 }, { "epoch": 0.14485853030546852, "grad_norm": 267.7637634277344, "learning_rate": 1.9316102964485793e-06, "loss": 27.6562, "step": 15303 }, { "epoch": 0.14486799632718358, "grad_norm": 245.42233276367188, "learning_rate": 1.93159915296241e-06, "loss": 14.6016, "step": 15304 }, { "epoch": 0.14487746234889862, "grad_norm": 2.9895009994506836, "learning_rate": 1.9315880086005984e-06, "loss": 0.9517, "step": 15305 }, { "epoch": 0.14488692837061368, "grad_norm": 905.8873901367188, "learning_rate": 1.9315768633631543e-06, "loss": 52.4062, "step": 15306 }, { "epoch": 0.14489639439232874, "grad_norm": 292.3561096191406, "learning_rate": 1.9315657172500885e-06, "loss": 30.6094, "step": 15307 }, { "epoch": 0.1449058604140438, "grad_norm": 180.87374877929688, "learning_rate": 1.931554570261412e-06, "loss": 23.8906, "step": 15308 }, { "epoch": 0.14491532643575886, "grad_norm": 284.0401916503906, "learning_rate": 1.9315434223971346e-06, "loss": 16.0508, "step": 15309 }, { "epoch": 0.1449247924574739, "grad_norm": 400.8475036621094, "learning_rate": 1.9315322736572668e-06, "loss": 50.0469, "step": 15310 }, { "epoch": 0.14493425847918895, "grad_norm": 3.252338171005249, "learning_rate": 1.9315211240418193e-06, "loss": 0.9819, "step": 15311 }, { "epoch": 0.144943724500904, "grad_norm": 643.2327880859375, "learning_rate": 1.9315099735508028e-06, "loss": 30.6953, "step": 15312 }, { "epoch": 0.14495319052261907, "grad_norm": 322.9156494140625, "learning_rate": 1.9314988221842274e-06, "loss": 25.6719, "step": 15313 }, { "epoch": 0.1449626565443341, "grad_norm": 633.7265014648438, "learning_rate": 1.9314876699421036e-06, "loss": 38.8125, "step": 15314 }, { "epoch": 0.14497212256604916, "grad_norm": 311.7604675292969, "learning_rate": 1.931476516824442e-06, "loss": 18.1875, "step": 15315 }, { "epoch": 0.14498158858776422, "grad_norm": 334.6456298828125, "learning_rate": 1.9314653628312536e-06, "loss": 31.5703, "step": 15316 }, { "epoch": 0.14499105460947928, "grad_norm": 475.5924072265625, "learning_rate": 1.9314542079625476e-06, "loss": 36.75, "step": 15317 }, { "epoch": 0.14500052063119434, "grad_norm": 3.6958518028259277, "learning_rate": 1.9314430522183356e-06, "loss": 0.8772, "step": 15318 }, { "epoch": 0.14500998665290937, "grad_norm": 296.15545654296875, "learning_rate": 1.9314318955986274e-06, "loss": 21.0391, "step": 15319 }, { "epoch": 0.14501945267462443, "grad_norm": 274.33612060546875, "learning_rate": 1.9314207381034345e-06, "loss": 9.7383, "step": 15320 }, { "epoch": 0.1450289186963395, "grad_norm": 172.7273712158203, "learning_rate": 1.931409579732766e-06, "loss": 20.8359, "step": 15321 }, { "epoch": 0.14503838471805455, "grad_norm": 515.306884765625, "learning_rate": 1.9313984204866336e-06, "loss": 35.2656, "step": 15322 }, { "epoch": 0.14504785073976958, "grad_norm": 345.9835205078125, "learning_rate": 1.931387260365047e-06, "loss": 43.0312, "step": 15323 }, { "epoch": 0.14505731676148464, "grad_norm": 823.2337646484375, "learning_rate": 1.9313760993680172e-06, "loss": 18.5508, "step": 15324 }, { "epoch": 0.1450667827831997, "grad_norm": 538.2637329101562, "learning_rate": 1.9313649374955537e-06, "loss": 51.9297, "step": 15325 }, { "epoch": 0.14507624880491476, "grad_norm": 324.6404724121094, "learning_rate": 1.931353774747669e-06, "loss": 24.0859, "step": 15326 }, { "epoch": 0.14508571482662982, "grad_norm": 1087.0177001953125, "learning_rate": 1.9313426111243713e-06, "loss": 19.8633, "step": 15327 }, { "epoch": 0.14509518084834486, "grad_norm": 385.94769287109375, "learning_rate": 1.9313314466256723e-06, "loss": 14.5312, "step": 15328 }, { "epoch": 0.14510464687005992, "grad_norm": 218.30252075195312, "learning_rate": 1.9313202812515826e-06, "loss": 25.75, "step": 15329 }, { "epoch": 0.14511411289177498, "grad_norm": 281.4032287597656, "learning_rate": 1.9313091150021123e-06, "loss": 9.6641, "step": 15330 }, { "epoch": 0.14512357891349004, "grad_norm": 728.920654296875, "learning_rate": 1.9312979478772723e-06, "loss": 14.4961, "step": 15331 }, { "epoch": 0.14513304493520507, "grad_norm": 580.5492553710938, "learning_rate": 1.9312867798770726e-06, "loss": 9.0742, "step": 15332 }, { "epoch": 0.14514251095692013, "grad_norm": 377.46063232421875, "learning_rate": 1.931275611001524e-06, "loss": 36.6797, "step": 15333 }, { "epoch": 0.1451519769786352, "grad_norm": 641.3618774414062, "learning_rate": 1.9312644412506363e-06, "loss": 21.5117, "step": 15334 }, { "epoch": 0.14516144300035025, "grad_norm": 268.2963562011719, "learning_rate": 1.9312532706244214e-06, "loss": 25.9219, "step": 15335 }, { "epoch": 0.1451709090220653, "grad_norm": 615.53271484375, "learning_rate": 1.9312420991228886e-06, "loss": 35.7734, "step": 15336 }, { "epoch": 0.14518037504378034, "grad_norm": 586.6865234375, "learning_rate": 1.931230926746049e-06, "loss": 33.7656, "step": 15337 }, { "epoch": 0.1451898410654954, "grad_norm": 1269.653076171875, "learning_rate": 1.9312197534939128e-06, "loss": 28.4805, "step": 15338 }, { "epoch": 0.14519930708721046, "grad_norm": 419.89361572265625, "learning_rate": 1.9312085793664905e-06, "loss": 39.4844, "step": 15339 }, { "epoch": 0.14520877310892552, "grad_norm": 378.0401916503906, "learning_rate": 1.9311974043637928e-06, "loss": 25.6016, "step": 15340 }, { "epoch": 0.14521823913064055, "grad_norm": 341.60369873046875, "learning_rate": 1.9311862284858304e-06, "loss": 42.125, "step": 15341 }, { "epoch": 0.1452277051523556, "grad_norm": 772.61083984375, "learning_rate": 1.9311750517326133e-06, "loss": 52.3984, "step": 15342 }, { "epoch": 0.14523717117407067, "grad_norm": 604.82421875, "learning_rate": 1.931163874104152e-06, "loss": 31.7188, "step": 15343 }, { "epoch": 0.14524663719578573, "grad_norm": 467.3868103027344, "learning_rate": 1.9311526956004577e-06, "loss": 52.8438, "step": 15344 }, { "epoch": 0.1452561032175008, "grad_norm": 153.61558532714844, "learning_rate": 1.93114151622154e-06, "loss": 10.9844, "step": 15345 }, { "epoch": 0.14526556923921582, "grad_norm": 318.7857971191406, "learning_rate": 1.93113033596741e-06, "loss": 36.2812, "step": 15346 }, { "epoch": 0.14527503526093088, "grad_norm": 3.182764768600464, "learning_rate": 1.931119154838078e-06, "loss": 0.8599, "step": 15347 }, { "epoch": 0.14528450128264594, "grad_norm": 288.125, "learning_rate": 1.9311079728335547e-06, "loss": 20.1406, "step": 15348 }, { "epoch": 0.145293967304361, "grad_norm": 625.4888305664062, "learning_rate": 1.9310967899538503e-06, "loss": 50.4844, "step": 15349 }, { "epoch": 0.14530343332607604, "grad_norm": 3.9516122341156006, "learning_rate": 1.9310856061989758e-06, "loss": 1.0571, "step": 15350 }, { "epoch": 0.1453128993477911, "grad_norm": 373.8796691894531, "learning_rate": 1.9310744215689413e-06, "loss": 20.625, "step": 15351 }, { "epoch": 0.14532236536950616, "grad_norm": 364.927734375, "learning_rate": 1.9310632360637575e-06, "loss": 27.7344, "step": 15352 }, { "epoch": 0.14533183139122122, "grad_norm": 1086.2034912109375, "learning_rate": 1.9310520496834345e-06, "loss": 27.9258, "step": 15353 }, { "epoch": 0.14534129741293628, "grad_norm": 535.2662963867188, "learning_rate": 1.9310408624279833e-06, "loss": 40.5391, "step": 15354 }, { "epoch": 0.1453507634346513, "grad_norm": 397.5802307128906, "learning_rate": 1.9310296742974145e-06, "loss": 34.0156, "step": 15355 }, { "epoch": 0.14536022945636637, "grad_norm": 418.2440490722656, "learning_rate": 1.931018485291738e-06, "loss": 21.2266, "step": 15356 }, { "epoch": 0.14536969547808143, "grad_norm": 399.5590515136719, "learning_rate": 1.931007295410965e-06, "loss": 36.0312, "step": 15357 }, { "epoch": 0.1453791614997965, "grad_norm": 3.081341028213501, "learning_rate": 1.9309961046551055e-06, "loss": 1.0566, "step": 15358 }, { "epoch": 0.14538862752151152, "grad_norm": 398.389404296875, "learning_rate": 1.93098491302417e-06, "loss": 48.6719, "step": 15359 }, { "epoch": 0.14539809354322658, "grad_norm": 420.65216064453125, "learning_rate": 1.9309737205181696e-06, "loss": 51.9531, "step": 15360 }, { "epoch": 0.14540755956494164, "grad_norm": 212.00119018554688, "learning_rate": 1.9309625271371142e-06, "loss": 16.6172, "step": 15361 }, { "epoch": 0.1454170255866567, "grad_norm": 145.61851501464844, "learning_rate": 1.930951332881015e-06, "loss": 25.5391, "step": 15362 }, { "epoch": 0.14542649160837176, "grad_norm": 668.5740966796875, "learning_rate": 1.930940137749882e-06, "loss": 43.4844, "step": 15363 }, { "epoch": 0.1454359576300868, "grad_norm": 514.5537109375, "learning_rate": 1.9309289417437257e-06, "loss": 15.9688, "step": 15364 }, { "epoch": 0.14544542365180185, "grad_norm": 398.42425537109375, "learning_rate": 1.930917744862557e-06, "loss": 26.2422, "step": 15365 }, { "epoch": 0.1454548896735169, "grad_norm": 295.0373229980469, "learning_rate": 1.930906547106386e-06, "loss": 23.6875, "step": 15366 }, { "epoch": 0.14546435569523197, "grad_norm": 491.82635498046875, "learning_rate": 1.9308953484752233e-06, "loss": 45.625, "step": 15367 }, { "epoch": 0.145473821716947, "grad_norm": 446.44293212890625, "learning_rate": 1.9308841489690795e-06, "loss": 36.7188, "step": 15368 }, { "epoch": 0.14548328773866206, "grad_norm": 309.190185546875, "learning_rate": 1.930872948587965e-06, "loss": 11.1211, "step": 15369 }, { "epoch": 0.14549275376037712, "grad_norm": 336.8580627441406, "learning_rate": 1.9308617473318906e-06, "loss": 22.6641, "step": 15370 }, { "epoch": 0.14550221978209218, "grad_norm": 295.9739990234375, "learning_rate": 1.930850545200867e-06, "loss": 17.1328, "step": 15371 }, { "epoch": 0.14551168580380724, "grad_norm": 256.2739562988281, "learning_rate": 1.9308393421949044e-06, "loss": 19.8672, "step": 15372 }, { "epoch": 0.14552115182552228, "grad_norm": 418.40032958984375, "learning_rate": 1.9308281383140133e-06, "loss": 48.5938, "step": 15373 }, { "epoch": 0.14553061784723734, "grad_norm": 830.4214477539062, "learning_rate": 1.930816933558204e-06, "loss": 16.2148, "step": 15374 }, { "epoch": 0.1455400838689524, "grad_norm": 304.38629150390625, "learning_rate": 1.930805727927488e-06, "loss": 16.125, "step": 15375 }, { "epoch": 0.14554954989066746, "grad_norm": 542.42724609375, "learning_rate": 1.9307945214218746e-06, "loss": 41.8594, "step": 15376 }, { "epoch": 0.14555901591238252, "grad_norm": 353.9496154785156, "learning_rate": 1.930783314041375e-06, "loss": 11.9414, "step": 15377 }, { "epoch": 0.14556848193409755, "grad_norm": 289.4814453125, "learning_rate": 1.9307721057859997e-06, "loss": 23.1289, "step": 15378 }, { "epoch": 0.1455779479558126, "grad_norm": 2317.79443359375, "learning_rate": 1.930760896655759e-06, "loss": 20.2891, "step": 15379 }, { "epoch": 0.14558741397752767, "grad_norm": 520.6400756835938, "learning_rate": 1.930749686650664e-06, "loss": 50.9531, "step": 15380 }, { "epoch": 0.14559687999924273, "grad_norm": 310.4266357421875, "learning_rate": 1.930738475770725e-06, "loss": 17.2266, "step": 15381 }, { "epoch": 0.14560634602095776, "grad_norm": 138.8509063720703, "learning_rate": 1.930727264015952e-06, "loss": 13.9688, "step": 15382 }, { "epoch": 0.14561581204267282, "grad_norm": 238.36622619628906, "learning_rate": 1.9307160513863558e-06, "loss": 19.7422, "step": 15383 }, { "epoch": 0.14562527806438788, "grad_norm": 172.1270294189453, "learning_rate": 1.930704837881947e-06, "loss": 16.5625, "step": 15384 }, { "epoch": 0.14563474408610294, "grad_norm": 174.24119567871094, "learning_rate": 1.9306936235027363e-06, "loss": 16.4766, "step": 15385 }, { "epoch": 0.145644210107818, "grad_norm": 281.9092712402344, "learning_rate": 1.9306824082487343e-06, "loss": 16.6914, "step": 15386 }, { "epoch": 0.14565367612953303, "grad_norm": 460.2672119140625, "learning_rate": 1.9306711921199514e-06, "loss": 17.0234, "step": 15387 }, { "epoch": 0.1456631421512481, "grad_norm": 348.0470275878906, "learning_rate": 1.930659975116398e-06, "loss": 27.9297, "step": 15388 }, { "epoch": 0.14567260817296315, "grad_norm": 313.9343566894531, "learning_rate": 1.930648757238085e-06, "loss": 14.9219, "step": 15389 }, { "epoch": 0.1456820741946782, "grad_norm": 223.55345153808594, "learning_rate": 1.9306375384850223e-06, "loss": 20.0078, "step": 15390 }, { "epoch": 0.14569154021639324, "grad_norm": 350.4989318847656, "learning_rate": 1.9306263188572212e-06, "loss": 44.5117, "step": 15391 }, { "epoch": 0.1457010062381083, "grad_norm": 232.56321716308594, "learning_rate": 1.9306150983546918e-06, "loss": 12.8008, "step": 15392 }, { "epoch": 0.14571047225982336, "grad_norm": 192.4589385986328, "learning_rate": 1.9306038769774445e-06, "loss": 17.8086, "step": 15393 }, { "epoch": 0.14571993828153842, "grad_norm": 2.910679817199707, "learning_rate": 1.9305926547254905e-06, "loss": 0.9121, "step": 15394 }, { "epoch": 0.14572940430325348, "grad_norm": 1373.914306640625, "learning_rate": 1.93058143159884e-06, "loss": 8.0703, "step": 15395 }, { "epoch": 0.14573887032496852, "grad_norm": 258.8292236328125, "learning_rate": 1.930570207597503e-06, "loss": 20.8672, "step": 15396 }, { "epoch": 0.14574833634668358, "grad_norm": 401.4604797363281, "learning_rate": 1.9305589827214907e-06, "loss": 36.875, "step": 15397 }, { "epoch": 0.14575780236839864, "grad_norm": 487.74658203125, "learning_rate": 1.9305477569708137e-06, "loss": 44.3281, "step": 15398 }, { "epoch": 0.1457672683901137, "grad_norm": 191.20655822753906, "learning_rate": 1.930536530345482e-06, "loss": 20.6484, "step": 15399 }, { "epoch": 0.14577673441182873, "grad_norm": 312.29095458984375, "learning_rate": 1.9305253028455067e-06, "loss": 23.9844, "step": 15400 }, { "epoch": 0.1457862004335438, "grad_norm": 406.6753845214844, "learning_rate": 1.9305140744708982e-06, "loss": 22.25, "step": 15401 }, { "epoch": 0.14579566645525885, "grad_norm": 333.5839538574219, "learning_rate": 1.9305028452216673e-06, "loss": 18.1406, "step": 15402 }, { "epoch": 0.1458051324769739, "grad_norm": 232.5741424560547, "learning_rate": 1.9304916150978236e-06, "loss": 23.4766, "step": 15403 }, { "epoch": 0.14581459849868897, "grad_norm": 337.94671630859375, "learning_rate": 1.930480384099379e-06, "loss": 10.1836, "step": 15404 }, { "epoch": 0.145824064520404, "grad_norm": 2.7893576622009277, "learning_rate": 1.930469152226343e-06, "loss": 0.832, "step": 15405 }, { "epoch": 0.14583353054211906, "grad_norm": 440.8857116699219, "learning_rate": 1.9304579194787264e-06, "loss": 28.7969, "step": 15406 }, { "epoch": 0.14584299656383412, "grad_norm": 587.2883911132812, "learning_rate": 1.93044668585654e-06, "loss": 43.3984, "step": 15407 }, { "epoch": 0.14585246258554918, "grad_norm": 422.9544982910156, "learning_rate": 1.9304354513597943e-06, "loss": 28.2188, "step": 15408 }, { "epoch": 0.1458619286072642, "grad_norm": 320.99261474609375, "learning_rate": 1.9304242159884998e-06, "loss": 24.75, "step": 15409 }, { "epoch": 0.14587139462897927, "grad_norm": 289.92755126953125, "learning_rate": 1.930412979742667e-06, "loss": 19.8594, "step": 15410 }, { "epoch": 0.14588086065069433, "grad_norm": 3.1352522373199463, "learning_rate": 1.9304017426223066e-06, "loss": 0.9209, "step": 15411 }, { "epoch": 0.1458903266724094, "grad_norm": 307.08514404296875, "learning_rate": 1.930390504627429e-06, "loss": 33.5312, "step": 15412 }, { "epoch": 0.14589979269412445, "grad_norm": 446.5391845703125, "learning_rate": 1.930379265758045e-06, "loss": 44.9688, "step": 15413 }, { "epoch": 0.14590925871583948, "grad_norm": 166.76451110839844, "learning_rate": 1.930368026014165e-06, "loss": 14.3906, "step": 15414 }, { "epoch": 0.14591872473755454, "grad_norm": 215.72486877441406, "learning_rate": 1.9303567853957995e-06, "loss": 22.0781, "step": 15415 }, { "epoch": 0.1459281907592696, "grad_norm": 373.6656494140625, "learning_rate": 1.9303455439029593e-06, "loss": 22.1602, "step": 15416 }, { "epoch": 0.14593765678098466, "grad_norm": 659.1682739257812, "learning_rate": 1.9303343015356545e-06, "loss": 27.3906, "step": 15417 }, { "epoch": 0.1459471228026997, "grad_norm": 2.9057343006134033, "learning_rate": 1.9303230582938963e-06, "loss": 0.7339, "step": 15418 }, { "epoch": 0.14595658882441476, "grad_norm": 492.9624938964844, "learning_rate": 1.9303118141776946e-06, "loss": 41.1406, "step": 15419 }, { "epoch": 0.14596605484612982, "grad_norm": 463.8767395019531, "learning_rate": 1.9303005691870607e-06, "loss": 21.0938, "step": 15420 }, { "epoch": 0.14597552086784488, "grad_norm": 355.8436279296875, "learning_rate": 1.9302893233220045e-06, "loss": 27.2656, "step": 15421 }, { "epoch": 0.14598498688955994, "grad_norm": 520.2819213867188, "learning_rate": 1.9302780765825372e-06, "loss": 13.832, "step": 15422 }, { "epoch": 0.14599445291127497, "grad_norm": 332.2615051269531, "learning_rate": 1.9302668289686685e-06, "loss": 26.5312, "step": 15423 }, { "epoch": 0.14600391893299003, "grad_norm": 664.0385131835938, "learning_rate": 1.9302555804804098e-06, "loss": 52.0625, "step": 15424 }, { "epoch": 0.1460133849547051, "grad_norm": 509.80145263671875, "learning_rate": 1.9302443311177716e-06, "loss": 48.7969, "step": 15425 }, { "epoch": 0.14602285097642015, "grad_norm": 518.6346435546875, "learning_rate": 1.930233080880764e-06, "loss": 25.1328, "step": 15426 }, { "epoch": 0.14603231699813518, "grad_norm": 254.66073608398438, "learning_rate": 1.9302218297693973e-06, "loss": 22.8516, "step": 15427 }, { "epoch": 0.14604178301985024, "grad_norm": 369.2476806640625, "learning_rate": 1.9302105777836832e-06, "loss": 23.2734, "step": 15428 }, { "epoch": 0.1460512490415653, "grad_norm": 554.47607421875, "learning_rate": 1.930199324923632e-06, "loss": 39.7422, "step": 15429 }, { "epoch": 0.14606071506328036, "grad_norm": 295.1675720214844, "learning_rate": 1.930188071189253e-06, "loss": 24.3438, "step": 15430 }, { "epoch": 0.14607018108499542, "grad_norm": 303.25250244140625, "learning_rate": 1.9301768165805584e-06, "loss": 18.3516, "step": 15431 }, { "epoch": 0.14607964710671045, "grad_norm": 359.349609375, "learning_rate": 1.9301655610975584e-06, "loss": 22.6172, "step": 15432 }, { "epoch": 0.1460891131284255, "grad_norm": 621.8168334960938, "learning_rate": 1.9301543047402628e-06, "loss": 36.168, "step": 15433 }, { "epoch": 0.14609857915014057, "grad_norm": 683.5079956054688, "learning_rate": 1.9301430475086826e-06, "loss": 36.8516, "step": 15434 }, { "epoch": 0.14610804517185563, "grad_norm": 336.6072082519531, "learning_rate": 1.9301317894028285e-06, "loss": 22.0234, "step": 15435 }, { "epoch": 0.14611751119357066, "grad_norm": 179.83294677734375, "learning_rate": 1.930120530422711e-06, "loss": 16.3672, "step": 15436 }, { "epoch": 0.14612697721528572, "grad_norm": 279.71673583984375, "learning_rate": 1.930109270568341e-06, "loss": 20.1367, "step": 15437 }, { "epoch": 0.14613644323700078, "grad_norm": 222.47967529296875, "learning_rate": 1.9300980098397284e-06, "loss": 22.3359, "step": 15438 }, { "epoch": 0.14614590925871584, "grad_norm": 179.10020446777344, "learning_rate": 1.9300867482368844e-06, "loss": 12.668, "step": 15439 }, { "epoch": 0.1461553752804309, "grad_norm": 341.98583984375, "learning_rate": 1.9300754857598193e-06, "loss": 40.9297, "step": 15440 }, { "epoch": 0.14616484130214594, "grad_norm": 2.586590528488159, "learning_rate": 1.930064222408544e-06, "loss": 0.8491, "step": 15441 }, { "epoch": 0.146174307323861, "grad_norm": 639.0401611328125, "learning_rate": 1.9300529581830686e-06, "loss": 42.3438, "step": 15442 }, { "epoch": 0.14618377334557606, "grad_norm": 287.8226318359375, "learning_rate": 1.9300416930834036e-06, "loss": 24.9141, "step": 15443 }, { "epoch": 0.14619323936729112, "grad_norm": 1024.8544921875, "learning_rate": 1.9300304271095604e-06, "loss": 22.1641, "step": 15444 }, { "epoch": 0.14620270538900615, "grad_norm": 275.9740905761719, "learning_rate": 1.930019160261549e-06, "loss": 40.2578, "step": 15445 }, { "epoch": 0.1462121714107212, "grad_norm": 228.43661499023438, "learning_rate": 1.93000789253938e-06, "loss": 9.4648, "step": 15446 }, { "epoch": 0.14622163743243627, "grad_norm": 169.8919219970703, "learning_rate": 1.929996623943064e-06, "loss": 19.8203, "step": 15447 }, { "epoch": 0.14623110345415133, "grad_norm": 443.593994140625, "learning_rate": 1.929985354472612e-06, "loss": 21.3594, "step": 15448 }, { "epoch": 0.1462405694758664, "grad_norm": 184.52334594726562, "learning_rate": 1.929974084128034e-06, "loss": 20.625, "step": 15449 }, { "epoch": 0.14625003549758142, "grad_norm": 3.3386003971099854, "learning_rate": 1.9299628129093406e-06, "loss": 1.0498, "step": 15450 }, { "epoch": 0.14625950151929648, "grad_norm": 609.5357055664062, "learning_rate": 1.9299515408165433e-06, "loss": 11.7891, "step": 15451 }, { "epoch": 0.14626896754101154, "grad_norm": 659.7943725585938, "learning_rate": 1.9299402678496515e-06, "loss": 46.5938, "step": 15452 }, { "epoch": 0.1462784335627266, "grad_norm": 632.5967407226562, "learning_rate": 1.929928994008677e-06, "loss": 25.4375, "step": 15453 }, { "epoch": 0.14628789958444163, "grad_norm": 461.90093994140625, "learning_rate": 1.929917719293629e-06, "loss": 32.2812, "step": 15454 }, { "epoch": 0.1462973656061567, "grad_norm": 404.2304992675781, "learning_rate": 1.9299064437045194e-06, "loss": 26.7656, "step": 15455 }, { "epoch": 0.14630683162787175, "grad_norm": 210.68089294433594, "learning_rate": 1.9298951672413577e-06, "loss": 24.1406, "step": 15456 }, { "epoch": 0.1463162976495868, "grad_norm": 328.6700134277344, "learning_rate": 1.9298838899041554e-06, "loss": 19.7578, "step": 15457 }, { "epoch": 0.14632576367130187, "grad_norm": 262.3499450683594, "learning_rate": 1.9298726116929227e-06, "loss": 18.1797, "step": 15458 }, { "epoch": 0.1463352296930169, "grad_norm": 338.2308044433594, "learning_rate": 1.92986133260767e-06, "loss": 18.6016, "step": 15459 }, { "epoch": 0.14634469571473196, "grad_norm": 3.009578227996826, "learning_rate": 1.9298500526484084e-06, "loss": 0.7612, "step": 15460 }, { "epoch": 0.14635416173644702, "grad_norm": 243.5596466064453, "learning_rate": 1.9298387718151484e-06, "loss": 10.9531, "step": 15461 }, { "epoch": 0.14636362775816208, "grad_norm": 375.3368835449219, "learning_rate": 1.9298274901079003e-06, "loss": 44.0, "step": 15462 }, { "epoch": 0.14637309377987714, "grad_norm": 189.6853485107422, "learning_rate": 1.9298162075266747e-06, "loss": 22.1562, "step": 15463 }, { "epoch": 0.14638255980159218, "grad_norm": 555.4299926757812, "learning_rate": 1.929804924071482e-06, "loss": 29.0742, "step": 15464 }, { "epoch": 0.14639202582330724, "grad_norm": 420.4630126953125, "learning_rate": 1.9297936397423338e-06, "loss": 35.3281, "step": 15465 }, { "epoch": 0.1464014918450223, "grad_norm": 246.65846252441406, "learning_rate": 1.92978235453924e-06, "loss": 11.4668, "step": 15466 }, { "epoch": 0.14641095786673736, "grad_norm": 376.1831970214844, "learning_rate": 1.929771068462211e-06, "loss": 21.6328, "step": 15467 }, { "epoch": 0.1464204238884524, "grad_norm": 819.1104736328125, "learning_rate": 1.929759781511258e-06, "loss": 39.9531, "step": 15468 }, { "epoch": 0.14642988991016745, "grad_norm": 306.78387451171875, "learning_rate": 1.929748493686391e-06, "loss": 17.2969, "step": 15469 }, { "epoch": 0.1464393559318825, "grad_norm": 556.6681518554688, "learning_rate": 1.9297372049876207e-06, "loss": 50.0234, "step": 15470 }, { "epoch": 0.14644882195359757, "grad_norm": 363.2102355957031, "learning_rate": 1.9297259154149585e-06, "loss": 21.5312, "step": 15471 }, { "epoch": 0.14645828797531263, "grad_norm": 191.63482666015625, "learning_rate": 1.929714624968414e-06, "loss": 15.4219, "step": 15472 }, { "epoch": 0.14646775399702766, "grad_norm": 389.99542236328125, "learning_rate": 1.9297033336479985e-06, "loss": 33.4844, "step": 15473 }, { "epoch": 0.14647722001874272, "grad_norm": 222.23306274414062, "learning_rate": 1.9296920414537223e-06, "loss": 21.6797, "step": 15474 }, { "epoch": 0.14648668604045778, "grad_norm": 272.63897705078125, "learning_rate": 1.929680748385596e-06, "loss": 29.1719, "step": 15475 }, { "epoch": 0.14649615206217284, "grad_norm": 489.9247131347656, "learning_rate": 1.92966945444363e-06, "loss": 42.0312, "step": 15476 }, { "epoch": 0.14650561808388787, "grad_norm": 149.4058380126953, "learning_rate": 1.9296581596278355e-06, "loss": 21.0781, "step": 15477 }, { "epoch": 0.14651508410560293, "grad_norm": 349.3733825683594, "learning_rate": 1.929646863938223e-06, "loss": 48.7344, "step": 15478 }, { "epoch": 0.146524550127318, "grad_norm": 494.1368713378906, "learning_rate": 1.9296355673748024e-06, "loss": 41.7812, "step": 15479 }, { "epoch": 0.14653401614903305, "grad_norm": 3.13472580909729, "learning_rate": 1.929624269937585e-06, "loss": 0.9355, "step": 15480 }, { "epoch": 0.1465434821707481, "grad_norm": 3.3371899127960205, "learning_rate": 1.9296129716265815e-06, "loss": 0.9385, "step": 15481 }, { "epoch": 0.14655294819246314, "grad_norm": 509.9200134277344, "learning_rate": 1.929601672441802e-06, "loss": 40.3984, "step": 15482 }, { "epoch": 0.1465624142141782, "grad_norm": 213.44924926757812, "learning_rate": 1.9295903723832578e-06, "loss": 25.5859, "step": 15483 }, { "epoch": 0.14657188023589326, "grad_norm": 338.3318786621094, "learning_rate": 1.9295790714509588e-06, "loss": 19.7812, "step": 15484 }, { "epoch": 0.14658134625760832, "grad_norm": 960.7919311523438, "learning_rate": 1.929567769644916e-06, "loss": 56.0, "step": 15485 }, { "epoch": 0.14659081227932336, "grad_norm": 770.890380859375, "learning_rate": 1.9295564669651402e-06, "loss": 60.0938, "step": 15486 }, { "epoch": 0.14660027830103842, "grad_norm": 373.2703552246094, "learning_rate": 1.9295451634116414e-06, "loss": 36.8125, "step": 15487 }, { "epoch": 0.14660974432275348, "grad_norm": 2.5276918411254883, "learning_rate": 1.9295338589844307e-06, "loss": 0.8613, "step": 15488 }, { "epoch": 0.14661921034446854, "grad_norm": 182.7940673828125, "learning_rate": 1.929522553683519e-06, "loss": 21.8281, "step": 15489 }, { "epoch": 0.1466286763661836, "grad_norm": 1699.9224853515625, "learning_rate": 1.929511247508916e-06, "loss": 39.8672, "step": 15490 }, { "epoch": 0.14663814238789863, "grad_norm": 473.5752868652344, "learning_rate": 1.929499940460633e-06, "loss": 21.2656, "step": 15491 }, { "epoch": 0.1466476084096137, "grad_norm": 413.7255554199219, "learning_rate": 1.929488632538681e-06, "loss": 16.2031, "step": 15492 }, { "epoch": 0.14665707443132875, "grad_norm": 412.9191589355469, "learning_rate": 1.9294773237430697e-06, "loss": 35.9688, "step": 15493 }, { "epoch": 0.1466665404530438, "grad_norm": 161.6313018798828, "learning_rate": 1.9294660140738105e-06, "loss": 18.0391, "step": 15494 }, { "epoch": 0.14667600647475884, "grad_norm": 223.23245239257812, "learning_rate": 1.9294547035309135e-06, "loss": 23.6719, "step": 15495 }, { "epoch": 0.1466854724964739, "grad_norm": 1236.330810546875, "learning_rate": 1.9294433921143896e-06, "loss": 55.0312, "step": 15496 }, { "epoch": 0.14669493851818896, "grad_norm": 922.1594848632812, "learning_rate": 1.9294320798242496e-06, "loss": 39.3281, "step": 15497 }, { "epoch": 0.14670440453990402, "grad_norm": 327.15203857421875, "learning_rate": 1.929420766660504e-06, "loss": 29.0781, "step": 15498 }, { "epoch": 0.14671387056161908, "grad_norm": 293.8498229980469, "learning_rate": 1.929409452623163e-06, "loss": 20.4375, "step": 15499 }, { "epoch": 0.1467233365833341, "grad_norm": 249.34933471679688, "learning_rate": 1.9293981377122374e-06, "loss": 18.6484, "step": 15500 }, { "epoch": 0.14673280260504917, "grad_norm": 616.675537109375, "learning_rate": 1.9293868219277383e-06, "loss": 41.7031, "step": 15501 }, { "epoch": 0.14674226862676423, "grad_norm": 269.9367980957031, "learning_rate": 1.929375505269676e-06, "loss": 24.5625, "step": 15502 }, { "epoch": 0.1467517346484793, "grad_norm": 517.479736328125, "learning_rate": 1.9293641877380612e-06, "loss": 24.2891, "step": 15503 }, { "epoch": 0.14676120067019433, "grad_norm": 268.4432373046875, "learning_rate": 1.929352869332905e-06, "loss": 29.2656, "step": 15504 }, { "epoch": 0.14677066669190938, "grad_norm": 3.2432141304016113, "learning_rate": 1.9293415500542165e-06, "loss": 1.0088, "step": 15505 }, { "epoch": 0.14678013271362444, "grad_norm": 175.04969787597656, "learning_rate": 1.929330229902008e-06, "loss": 11.0508, "step": 15506 }, { "epoch": 0.1467895987353395, "grad_norm": 416.4031982421875, "learning_rate": 1.9293189088762898e-06, "loss": 28.9062, "step": 15507 }, { "epoch": 0.14679906475705456, "grad_norm": 3.2946012020111084, "learning_rate": 1.9293075869770717e-06, "loss": 1.0474, "step": 15508 }, { "epoch": 0.1468085307787696, "grad_norm": 343.39630126953125, "learning_rate": 1.9292962642043652e-06, "loss": 19.9688, "step": 15509 }, { "epoch": 0.14681799680048466, "grad_norm": 227.02401733398438, "learning_rate": 1.9292849405581806e-06, "loss": 21.5391, "step": 15510 }, { "epoch": 0.14682746282219972, "grad_norm": 259.226318359375, "learning_rate": 1.929273616038529e-06, "loss": 10.6289, "step": 15511 }, { "epoch": 0.14683692884391478, "grad_norm": 603.3012084960938, "learning_rate": 1.92926229064542e-06, "loss": 51.1875, "step": 15512 }, { "epoch": 0.1468463948656298, "grad_norm": 176.41104125976562, "learning_rate": 1.9292509643788654e-06, "loss": 16.3125, "step": 15513 }, { "epoch": 0.14685586088734487, "grad_norm": 246.45257568359375, "learning_rate": 1.9292396372388754e-06, "loss": 13.2656, "step": 15514 }, { "epoch": 0.14686532690905993, "grad_norm": 265.08721923828125, "learning_rate": 1.92922830922546e-06, "loss": 23.6562, "step": 15515 }, { "epoch": 0.146874792930775, "grad_norm": 223.20782470703125, "learning_rate": 1.929216980338631e-06, "loss": 21.9844, "step": 15516 }, { "epoch": 0.14688425895249005, "grad_norm": 394.58245849609375, "learning_rate": 1.929205650578398e-06, "loss": 36.5938, "step": 15517 }, { "epoch": 0.14689372497420508, "grad_norm": 3.25015926361084, "learning_rate": 1.929194319944773e-06, "loss": 0.8867, "step": 15518 }, { "epoch": 0.14690319099592014, "grad_norm": 172.21092224121094, "learning_rate": 1.929182988437765e-06, "loss": 17.0352, "step": 15519 }, { "epoch": 0.1469126570176352, "grad_norm": 796.4215698242188, "learning_rate": 1.929171656057386e-06, "loss": 31.8828, "step": 15520 }, { "epoch": 0.14692212303935026, "grad_norm": 977.5111083984375, "learning_rate": 1.9291603228036453e-06, "loss": 82.3828, "step": 15521 }, { "epoch": 0.1469315890610653, "grad_norm": 3.178448438644409, "learning_rate": 1.929148988676555e-06, "loss": 0.999, "step": 15522 }, { "epoch": 0.14694105508278035, "grad_norm": 387.2662658691406, "learning_rate": 1.929137653676125e-06, "loss": 18.9297, "step": 15523 }, { "epoch": 0.1469505211044954, "grad_norm": 251.8770751953125, "learning_rate": 1.929126317802366e-06, "loss": 20.3203, "step": 15524 }, { "epoch": 0.14695998712621047, "grad_norm": 190.8336944580078, "learning_rate": 1.9291149810552883e-06, "loss": 16.0703, "step": 15525 }, { "epoch": 0.14696945314792553, "grad_norm": 816.9220581054688, "learning_rate": 1.9291036434349036e-06, "loss": 26.6328, "step": 15526 }, { "epoch": 0.14697891916964057, "grad_norm": 372.9020080566406, "learning_rate": 1.9290923049412217e-06, "loss": 23.6406, "step": 15527 }, { "epoch": 0.14698838519135562, "grad_norm": 176.89552307128906, "learning_rate": 1.9290809655742534e-06, "loss": 19.0391, "step": 15528 }, { "epoch": 0.14699785121307068, "grad_norm": 229.40541076660156, "learning_rate": 1.9290696253340094e-06, "loss": 17.9766, "step": 15529 }, { "epoch": 0.14700731723478574, "grad_norm": 316.967041015625, "learning_rate": 1.9290582842205e-06, "loss": 19.1719, "step": 15530 }, { "epoch": 0.14701678325650078, "grad_norm": 700.8687133789062, "learning_rate": 1.929046942233737e-06, "loss": 36.6406, "step": 15531 }, { "epoch": 0.14702624927821584, "grad_norm": 217.62371826171875, "learning_rate": 1.92903559937373e-06, "loss": 16.3672, "step": 15532 }, { "epoch": 0.1470357152999309, "grad_norm": 373.0626525878906, "learning_rate": 1.92902425564049e-06, "loss": 35.375, "step": 15533 }, { "epoch": 0.14704518132164596, "grad_norm": 910.3222045898438, "learning_rate": 1.9290129110340274e-06, "loss": 40.2969, "step": 15534 }, { "epoch": 0.14705464734336102, "grad_norm": 453.47540283203125, "learning_rate": 1.9290015655543536e-06, "loss": 37.8828, "step": 15535 }, { "epoch": 0.14706411336507605, "grad_norm": 291.3238830566406, "learning_rate": 1.9289902192014785e-06, "loss": 28.5703, "step": 15536 }, { "epoch": 0.1470735793867911, "grad_norm": 246.9541015625, "learning_rate": 1.928978871975413e-06, "loss": 22.875, "step": 15537 }, { "epoch": 0.14708304540850617, "grad_norm": 330.1142272949219, "learning_rate": 1.9289675238761676e-06, "loss": 51.3125, "step": 15538 }, { "epoch": 0.14709251143022123, "grad_norm": 329.915771484375, "learning_rate": 1.9289561749037534e-06, "loss": 23.3359, "step": 15539 }, { "epoch": 0.14710197745193626, "grad_norm": 261.9580993652344, "learning_rate": 1.9289448250581807e-06, "loss": 21.7969, "step": 15540 }, { "epoch": 0.14711144347365132, "grad_norm": 499.9205322265625, "learning_rate": 1.9289334743394603e-06, "loss": 11.4609, "step": 15541 }, { "epoch": 0.14712090949536638, "grad_norm": 214.72122192382812, "learning_rate": 1.928922122747603e-06, "loss": 18.8594, "step": 15542 }, { "epoch": 0.14713037551708144, "grad_norm": 388.1897277832031, "learning_rate": 1.9289107702826195e-06, "loss": 36.2578, "step": 15543 }, { "epoch": 0.1471398415387965, "grad_norm": 347.3939208984375, "learning_rate": 1.92889941694452e-06, "loss": 29.4375, "step": 15544 }, { "epoch": 0.14714930756051153, "grad_norm": 382.0763244628906, "learning_rate": 1.9288880627333154e-06, "loss": 24.625, "step": 15545 }, { "epoch": 0.1471587735822266, "grad_norm": 293.2266540527344, "learning_rate": 1.9288767076490167e-06, "loss": 21.1719, "step": 15546 }, { "epoch": 0.14716823960394165, "grad_norm": 554.9200439453125, "learning_rate": 1.9288653516916342e-06, "loss": 30.9531, "step": 15547 }, { "epoch": 0.1471777056256567, "grad_norm": 275.975830078125, "learning_rate": 1.928853994861179e-06, "loss": 17.1367, "step": 15548 }, { "epoch": 0.14718717164737177, "grad_norm": 730.9281616210938, "learning_rate": 1.928842637157661e-06, "loss": 60.0469, "step": 15549 }, { "epoch": 0.1471966376690868, "grad_norm": 239.47274780273438, "learning_rate": 1.9288312785810915e-06, "loss": 18.3359, "step": 15550 }, { "epoch": 0.14720610369080186, "grad_norm": 374.057861328125, "learning_rate": 1.928819919131481e-06, "loss": 39.7969, "step": 15551 }, { "epoch": 0.14721556971251692, "grad_norm": 271.994873046875, "learning_rate": 1.9288085588088402e-06, "loss": 24.7734, "step": 15552 }, { "epoch": 0.14722503573423198, "grad_norm": 442.10125732421875, "learning_rate": 1.9287971976131802e-06, "loss": 30.7891, "step": 15553 }, { "epoch": 0.14723450175594702, "grad_norm": 534.0426025390625, "learning_rate": 1.928785835544511e-06, "loss": 49.9531, "step": 15554 }, { "epoch": 0.14724396777766208, "grad_norm": 542.2315673828125, "learning_rate": 1.9287744726028433e-06, "loss": 30.5781, "step": 15555 }, { "epoch": 0.14725343379937714, "grad_norm": 156.2601318359375, "learning_rate": 1.928763108788188e-06, "loss": 14.2227, "step": 15556 }, { "epoch": 0.1472628998210922, "grad_norm": 358.9439697265625, "learning_rate": 1.928751744100556e-06, "loss": 24.5156, "step": 15557 }, { "epoch": 0.14727236584280726, "grad_norm": 744.9380493164062, "learning_rate": 1.9287403785399577e-06, "loss": 65.4961, "step": 15558 }, { "epoch": 0.1472818318645223, "grad_norm": 351.4787902832031, "learning_rate": 1.928729012106404e-06, "loss": 22.9766, "step": 15559 }, { "epoch": 0.14729129788623735, "grad_norm": 448.8901672363281, "learning_rate": 1.928717644799905e-06, "loss": 50.1719, "step": 15560 }, { "epoch": 0.1473007639079524, "grad_norm": 256.0363464355469, "learning_rate": 1.928706276620472e-06, "loss": 30.3828, "step": 15561 }, { "epoch": 0.14731022992966747, "grad_norm": 299.56402587890625, "learning_rate": 1.928694907568116e-06, "loss": 29.1133, "step": 15562 }, { "epoch": 0.1473196959513825, "grad_norm": 3.7016592025756836, "learning_rate": 1.928683537642847e-06, "loss": 1.0586, "step": 15563 }, { "epoch": 0.14732916197309756, "grad_norm": 281.7708435058594, "learning_rate": 1.928672166844676e-06, "loss": 26.1719, "step": 15564 }, { "epoch": 0.14733862799481262, "grad_norm": 291.1203918457031, "learning_rate": 1.928660795173613e-06, "loss": 7.0918, "step": 15565 }, { "epoch": 0.14734809401652768, "grad_norm": 373.3625793457031, "learning_rate": 1.92864942262967e-06, "loss": 25.6953, "step": 15566 }, { "epoch": 0.14735756003824274, "grad_norm": 137.21490478515625, "learning_rate": 1.9286380492128563e-06, "loss": 13.2969, "step": 15567 }, { "epoch": 0.14736702605995777, "grad_norm": 262.0765075683594, "learning_rate": 1.9286266749231834e-06, "loss": 20.5703, "step": 15568 }, { "epoch": 0.14737649208167283, "grad_norm": 320.77032470703125, "learning_rate": 1.928615299760662e-06, "loss": 25.875, "step": 15569 }, { "epoch": 0.1473859581033879, "grad_norm": 392.58587646484375, "learning_rate": 1.928603923725303e-06, "loss": 34.2969, "step": 15570 }, { "epoch": 0.14739542412510295, "grad_norm": 354.9620056152344, "learning_rate": 1.928592546817116e-06, "loss": 34.0312, "step": 15571 }, { "epoch": 0.14740489014681799, "grad_norm": 317.7339172363281, "learning_rate": 1.928581169036113e-06, "loss": 19.7969, "step": 15572 }, { "epoch": 0.14741435616853305, "grad_norm": 218.7632598876953, "learning_rate": 1.9285697903823036e-06, "loss": 13.0801, "step": 15573 }, { "epoch": 0.1474238221902481, "grad_norm": 363.4873962402344, "learning_rate": 1.9285584108556995e-06, "loss": 23.3672, "step": 15574 }, { "epoch": 0.14743328821196316, "grad_norm": 369.8494567871094, "learning_rate": 1.92854703045631e-06, "loss": 21.2109, "step": 15575 }, { "epoch": 0.14744275423367822, "grad_norm": 267.354736328125, "learning_rate": 1.928535649184148e-06, "loss": 21.0781, "step": 15576 }, { "epoch": 0.14745222025539326, "grad_norm": 463.1070251464844, "learning_rate": 1.928524267039222e-06, "loss": 39.6484, "step": 15577 }, { "epoch": 0.14746168627710832, "grad_norm": 283.15216064453125, "learning_rate": 1.928512884021544e-06, "loss": 8.1289, "step": 15578 }, { "epoch": 0.14747115229882338, "grad_norm": 492.9833679199219, "learning_rate": 1.928501500131124e-06, "loss": 13.2422, "step": 15579 }, { "epoch": 0.14748061832053844, "grad_norm": 155.0847930908203, "learning_rate": 1.928490115367973e-06, "loss": 20.6797, "step": 15580 }, { "epoch": 0.14749008434225347, "grad_norm": 217.97799682617188, "learning_rate": 1.928478729732102e-06, "loss": 10.4551, "step": 15581 }, { "epoch": 0.14749955036396853, "grad_norm": 991.4630737304688, "learning_rate": 1.928467343223521e-06, "loss": 36.1797, "step": 15582 }, { "epoch": 0.1475090163856836, "grad_norm": 557.9981079101562, "learning_rate": 1.928455955842241e-06, "loss": 51.4922, "step": 15583 }, { "epoch": 0.14751848240739865, "grad_norm": 215.77947998046875, "learning_rate": 1.9284445675882733e-06, "loss": 25.5781, "step": 15584 }, { "epoch": 0.1475279484291137, "grad_norm": 507.5409240722656, "learning_rate": 1.928433178461628e-06, "loss": 42.6406, "step": 15585 }, { "epoch": 0.14753741445082874, "grad_norm": 733.7870483398438, "learning_rate": 1.928421788462316e-06, "loss": 50.7969, "step": 15586 }, { "epoch": 0.1475468804725438, "grad_norm": 422.5614318847656, "learning_rate": 1.9284103975903476e-06, "loss": 22.9141, "step": 15587 }, { "epoch": 0.14755634649425886, "grad_norm": 381.3815002441406, "learning_rate": 1.9283990058457337e-06, "loss": 33.875, "step": 15588 }, { "epoch": 0.14756581251597392, "grad_norm": 469.9471435546875, "learning_rate": 1.9283876132284853e-06, "loss": 34.9688, "step": 15589 }, { "epoch": 0.14757527853768895, "grad_norm": 700.4280395507812, "learning_rate": 1.9283762197386134e-06, "loss": 54.168, "step": 15590 }, { "epoch": 0.147584744559404, "grad_norm": 601.268798828125, "learning_rate": 1.928364825376128e-06, "loss": 30.9844, "step": 15591 }, { "epoch": 0.14759421058111907, "grad_norm": 264.1831359863281, "learning_rate": 1.9283534301410398e-06, "loss": 9.4668, "step": 15592 }, { "epoch": 0.14760367660283413, "grad_norm": 472.2716979980469, "learning_rate": 1.92834203403336e-06, "loss": 29.0508, "step": 15593 }, { "epoch": 0.1476131426245492, "grad_norm": 378.13470458984375, "learning_rate": 1.928330637053099e-06, "loss": 28.7695, "step": 15594 }, { "epoch": 0.14762260864626423, "grad_norm": 363.9725646972656, "learning_rate": 1.9283192392002676e-06, "loss": 18.8438, "step": 15595 }, { "epoch": 0.14763207466797929, "grad_norm": 508.1999206542969, "learning_rate": 1.9283078404748766e-06, "loss": 9.9883, "step": 15596 }, { "epoch": 0.14764154068969434, "grad_norm": 434.307861328125, "learning_rate": 1.928296440876936e-06, "loss": 51.9844, "step": 15597 }, { "epoch": 0.1476510067114094, "grad_norm": 992.5032348632812, "learning_rate": 1.928285040406458e-06, "loss": 37.8125, "step": 15598 }, { "epoch": 0.14766047273312444, "grad_norm": 3.153144359588623, "learning_rate": 1.928273639063452e-06, "loss": 0.8877, "step": 15599 }, { "epoch": 0.1476699387548395, "grad_norm": 780.2010498046875, "learning_rate": 1.928262236847929e-06, "loss": 46.8359, "step": 15600 }, { "epoch": 0.14767940477655456, "grad_norm": 606.2879028320312, "learning_rate": 1.9282508337599004e-06, "loss": 33.9727, "step": 15601 }, { "epoch": 0.14768887079826962, "grad_norm": 313.943115234375, "learning_rate": 1.928239429799376e-06, "loss": 47.2031, "step": 15602 }, { "epoch": 0.14769833681998468, "grad_norm": 306.6687316894531, "learning_rate": 1.9282280249663672e-06, "loss": 18.5938, "step": 15603 }, { "epoch": 0.1477078028416997, "grad_norm": 648.8347778320312, "learning_rate": 1.9282166192608843e-06, "loss": 36.4531, "step": 15604 }, { "epoch": 0.14771726886341477, "grad_norm": 280.39544677734375, "learning_rate": 1.928205212682938e-06, "loss": 32.4062, "step": 15605 }, { "epoch": 0.14772673488512983, "grad_norm": 613.4972534179688, "learning_rate": 1.9281938052325397e-06, "loss": 45.0469, "step": 15606 }, { "epoch": 0.1477362009068449, "grad_norm": 347.4449768066406, "learning_rate": 1.928182396909699e-06, "loss": 32.8125, "step": 15607 }, { "epoch": 0.14774566692855992, "grad_norm": 507.0713195800781, "learning_rate": 1.9281709877144277e-06, "loss": 10.7246, "step": 15608 }, { "epoch": 0.14775513295027498, "grad_norm": 280.8185729980469, "learning_rate": 1.9281595776467357e-06, "loss": 18.3438, "step": 15609 }, { "epoch": 0.14776459897199004, "grad_norm": 802.9524536132812, "learning_rate": 1.9281481667066345e-06, "loss": 20.0469, "step": 15610 }, { "epoch": 0.1477740649937051, "grad_norm": 1142.871826171875, "learning_rate": 1.928136754894134e-06, "loss": 46.6797, "step": 15611 }, { "epoch": 0.14778353101542016, "grad_norm": 625.1661376953125, "learning_rate": 1.9281253422092456e-06, "loss": 21.1602, "step": 15612 }, { "epoch": 0.1477929970371352, "grad_norm": 2.60447096824646, "learning_rate": 1.9281139286519794e-06, "loss": 0.854, "step": 15613 }, { "epoch": 0.14780246305885025, "grad_norm": 928.193603515625, "learning_rate": 1.9281025142223464e-06, "loss": 18.875, "step": 15614 }, { "epoch": 0.1478119290805653, "grad_norm": 990.1611938476562, "learning_rate": 1.928091098920358e-06, "loss": 31.0234, "step": 15615 }, { "epoch": 0.14782139510228037, "grad_norm": 3.0132157802581787, "learning_rate": 1.928079682746024e-06, "loss": 0.9497, "step": 15616 }, { "epoch": 0.1478308611239954, "grad_norm": 220.1630859375, "learning_rate": 1.9280682656993555e-06, "loss": 18.6797, "step": 15617 }, { "epoch": 0.14784032714571047, "grad_norm": 217.16139221191406, "learning_rate": 1.9280568477803635e-06, "loss": 9.8965, "step": 15618 }, { "epoch": 0.14784979316742553, "grad_norm": 477.3331298828125, "learning_rate": 1.928045428989058e-06, "loss": 22.3047, "step": 15619 }, { "epoch": 0.14785925918914058, "grad_norm": 240.98468017578125, "learning_rate": 1.9280340093254504e-06, "loss": 17.1797, "step": 15620 }, { "epoch": 0.14786872521085564, "grad_norm": 952.88232421875, "learning_rate": 1.928022588789551e-06, "loss": 43.2734, "step": 15621 }, { "epoch": 0.14787819123257068, "grad_norm": 206.64207458496094, "learning_rate": 1.928011167381371e-06, "loss": 16.2109, "step": 15622 }, { "epoch": 0.14788765725428574, "grad_norm": 404.6994323730469, "learning_rate": 1.927999745100921e-06, "loss": 19.8359, "step": 15623 }, { "epoch": 0.1478971232760008, "grad_norm": 1819.6339111328125, "learning_rate": 1.9279883219482113e-06, "loss": 19.25, "step": 15624 }, { "epoch": 0.14790658929771586, "grad_norm": 334.6298828125, "learning_rate": 1.9279768979232527e-06, "loss": 22.3359, "step": 15625 }, { "epoch": 0.1479160553194309, "grad_norm": 352.8998718261719, "learning_rate": 1.9279654730260564e-06, "loss": 22.5859, "step": 15626 }, { "epoch": 0.14792552134114595, "grad_norm": 554.5792236328125, "learning_rate": 1.927954047256633e-06, "loss": 57.9688, "step": 15627 }, { "epoch": 0.147934987362861, "grad_norm": 3.091740608215332, "learning_rate": 1.927942620614993e-06, "loss": 0.96, "step": 15628 }, { "epoch": 0.14794445338457607, "grad_norm": 618.4608154296875, "learning_rate": 1.9279311931011478e-06, "loss": 32.8359, "step": 15629 }, { "epoch": 0.14795391940629113, "grad_norm": 316.1160583496094, "learning_rate": 1.9279197647151075e-06, "loss": 7.5391, "step": 15630 }, { "epoch": 0.14796338542800616, "grad_norm": 373.1421203613281, "learning_rate": 1.9279083354568824e-06, "loss": 12.5938, "step": 15631 }, { "epoch": 0.14797285144972122, "grad_norm": 447.1734313964844, "learning_rate": 1.9278969053264845e-06, "loss": 45.1875, "step": 15632 }, { "epoch": 0.14798231747143628, "grad_norm": 388.899169921875, "learning_rate": 1.927885474323924e-06, "loss": 35.1328, "step": 15633 }, { "epoch": 0.14799178349315134, "grad_norm": 3.2035765647888184, "learning_rate": 1.9278740424492107e-06, "loss": 0.9385, "step": 15634 }, { "epoch": 0.1480012495148664, "grad_norm": 425.2375183105469, "learning_rate": 1.9278626097023567e-06, "loss": 46.625, "step": 15635 }, { "epoch": 0.14801071553658143, "grad_norm": 286.401611328125, "learning_rate": 1.9278511760833723e-06, "loss": 25.3242, "step": 15636 }, { "epoch": 0.1480201815582965, "grad_norm": 281.6573486328125, "learning_rate": 1.927839741592268e-06, "loss": 56.1094, "step": 15637 }, { "epoch": 0.14802964758001155, "grad_norm": 276.7017517089844, "learning_rate": 1.927828306229054e-06, "loss": 17.8438, "step": 15638 }, { "epoch": 0.1480391136017266, "grad_norm": 626.1773681640625, "learning_rate": 1.927816869993743e-06, "loss": 65.3984, "step": 15639 }, { "epoch": 0.14804857962344165, "grad_norm": 193.36138916015625, "learning_rate": 1.927805432886344e-06, "loss": 17.6719, "step": 15640 }, { "epoch": 0.1480580456451567, "grad_norm": 634.9346923828125, "learning_rate": 1.927793994906868e-06, "loss": 46.5703, "step": 15641 }, { "epoch": 0.14806751166687177, "grad_norm": 409.6676330566406, "learning_rate": 1.9277825560553263e-06, "loss": 18.5781, "step": 15642 }, { "epoch": 0.14807697768858682, "grad_norm": 454.13916015625, "learning_rate": 1.927771116331729e-06, "loss": 23.1094, "step": 15643 }, { "epoch": 0.14808644371030188, "grad_norm": 222.09632873535156, "learning_rate": 1.9277596757360872e-06, "loss": 22.6484, "step": 15644 }, { "epoch": 0.14809590973201692, "grad_norm": 206.30885314941406, "learning_rate": 1.927748234268412e-06, "loss": 19.2148, "step": 15645 }, { "epoch": 0.14810537575373198, "grad_norm": 3.4163424968719482, "learning_rate": 1.927736791928714e-06, "loss": 1.0234, "step": 15646 }, { "epoch": 0.14811484177544704, "grad_norm": 3.13472056388855, "learning_rate": 1.9277253487170035e-06, "loss": 0.8164, "step": 15647 }, { "epoch": 0.1481243077971621, "grad_norm": 485.8520812988281, "learning_rate": 1.9277139046332916e-06, "loss": 52.4375, "step": 15648 }, { "epoch": 0.14813377381887713, "grad_norm": 188.07386779785156, "learning_rate": 1.927702459677589e-06, "loss": 11.4453, "step": 15649 }, { "epoch": 0.1481432398405922, "grad_norm": 290.31549072265625, "learning_rate": 1.9276910138499058e-06, "loss": 38.4844, "step": 15650 }, { "epoch": 0.14815270586230725, "grad_norm": 404.0642395019531, "learning_rate": 1.927679567150254e-06, "loss": 37.9141, "step": 15651 }, { "epoch": 0.1481621718840223, "grad_norm": 665.4826049804688, "learning_rate": 1.9276681195786436e-06, "loss": 53.375, "step": 15652 }, { "epoch": 0.14817163790573737, "grad_norm": 434.3675842285156, "learning_rate": 1.927656671135086e-06, "loss": 34.3281, "step": 15653 }, { "epoch": 0.1481811039274524, "grad_norm": 541.2559204101562, "learning_rate": 1.9276452218195908e-06, "loss": 26.9531, "step": 15654 }, { "epoch": 0.14819056994916746, "grad_norm": 528.1937866210938, "learning_rate": 1.9276337716321697e-06, "loss": 29.5938, "step": 15655 }, { "epoch": 0.14820003597088252, "grad_norm": 394.1620178222656, "learning_rate": 1.927622320572833e-06, "loss": 37.2734, "step": 15656 }, { "epoch": 0.14820950199259758, "grad_norm": 227.65679931640625, "learning_rate": 1.927610868641592e-06, "loss": 18.3477, "step": 15657 }, { "epoch": 0.1482189680143126, "grad_norm": 343.5088806152344, "learning_rate": 1.9275994158384572e-06, "loss": 36.1953, "step": 15658 }, { "epoch": 0.14822843403602767, "grad_norm": 227.80215454101562, "learning_rate": 1.927587962163439e-06, "loss": 19.0625, "step": 15659 }, { "epoch": 0.14823790005774273, "grad_norm": 317.0726623535156, "learning_rate": 1.9275765076165485e-06, "loss": 45.4844, "step": 15660 }, { "epoch": 0.1482473660794578, "grad_norm": 424.9021301269531, "learning_rate": 1.9275650521977964e-06, "loss": 43.0625, "step": 15661 }, { "epoch": 0.14825683210117285, "grad_norm": 1661.7786865234375, "learning_rate": 1.9275535959071938e-06, "loss": 16.75, "step": 15662 }, { "epoch": 0.14826629812288789, "grad_norm": 3.449712038040161, "learning_rate": 1.9275421387447507e-06, "loss": 1.0107, "step": 15663 }, { "epoch": 0.14827576414460295, "grad_norm": 179.31814575195312, "learning_rate": 1.9275306807104786e-06, "loss": 23.9531, "step": 15664 }, { "epoch": 0.148285230166318, "grad_norm": 195.76657104492188, "learning_rate": 1.927519221804388e-06, "loss": 19.9141, "step": 15665 }, { "epoch": 0.14829469618803306, "grad_norm": 594.778564453125, "learning_rate": 1.92750776202649e-06, "loss": 38.0234, "step": 15666 }, { "epoch": 0.1483041622097481, "grad_norm": 247.01730346679688, "learning_rate": 1.9274963013767943e-06, "loss": 30.6953, "step": 15667 }, { "epoch": 0.14831362823146316, "grad_norm": 374.70166015625, "learning_rate": 1.927484839855313e-06, "loss": 34.4688, "step": 15668 }, { "epoch": 0.14832309425317822, "grad_norm": 222.84719848632812, "learning_rate": 1.927473377462056e-06, "loss": 14.4062, "step": 15669 }, { "epoch": 0.14833256027489328, "grad_norm": 440.94354248046875, "learning_rate": 1.9274619141970346e-06, "loss": 34.5938, "step": 15670 }, { "epoch": 0.14834202629660834, "grad_norm": 277.5548400878906, "learning_rate": 1.927450450060259e-06, "loss": 23.0156, "step": 15671 }, { "epoch": 0.14835149231832337, "grad_norm": 1092.1798095703125, "learning_rate": 1.9274389850517404e-06, "loss": 45.2852, "step": 15672 }, { "epoch": 0.14836095834003843, "grad_norm": 561.7603759765625, "learning_rate": 1.9274275191714894e-06, "loss": 20.0547, "step": 15673 }, { "epoch": 0.1483704243617535, "grad_norm": 473.4435119628906, "learning_rate": 1.927416052419517e-06, "loss": 38.2812, "step": 15674 }, { "epoch": 0.14837989038346855, "grad_norm": 742.7647705078125, "learning_rate": 1.927404584795834e-06, "loss": 61.7188, "step": 15675 }, { "epoch": 0.14838935640518358, "grad_norm": 243.6381378173828, "learning_rate": 1.927393116300451e-06, "loss": 22.0703, "step": 15676 }, { "epoch": 0.14839882242689864, "grad_norm": 243.02879333496094, "learning_rate": 1.9273816469333787e-06, "loss": 26.6094, "step": 15677 }, { "epoch": 0.1484082884486137, "grad_norm": 461.3034362792969, "learning_rate": 1.9273701766946278e-06, "loss": 51.8281, "step": 15678 }, { "epoch": 0.14841775447032876, "grad_norm": 239.20848083496094, "learning_rate": 1.9273587055842096e-06, "loss": 18.4297, "step": 15679 }, { "epoch": 0.14842722049204382, "grad_norm": 1007.4185180664062, "learning_rate": 1.9273472336021344e-06, "loss": 47.6406, "step": 15680 }, { "epoch": 0.14843668651375885, "grad_norm": 457.3825988769531, "learning_rate": 1.927335760748413e-06, "loss": 10.4297, "step": 15681 }, { "epoch": 0.1484461525354739, "grad_norm": 456.6561584472656, "learning_rate": 1.9273242870230565e-06, "loss": 29.0938, "step": 15682 }, { "epoch": 0.14845561855718897, "grad_norm": 386.7440490722656, "learning_rate": 1.9273128124260754e-06, "loss": 25.4375, "step": 15683 }, { "epoch": 0.14846508457890403, "grad_norm": 799.71630859375, "learning_rate": 1.927301336957481e-06, "loss": 87.5312, "step": 15684 }, { "epoch": 0.14847455060061907, "grad_norm": 322.6341247558594, "learning_rate": 1.927289860617283e-06, "loss": 20.9453, "step": 15685 }, { "epoch": 0.14848401662233413, "grad_norm": 2.724047899246216, "learning_rate": 1.9272783834054935e-06, "loss": 0.9248, "step": 15686 }, { "epoch": 0.14849348264404919, "grad_norm": 209.15220642089844, "learning_rate": 1.927266905322122e-06, "loss": 22.375, "step": 15687 }, { "epoch": 0.14850294866576425, "grad_norm": 433.30780029296875, "learning_rate": 1.9272554263671804e-06, "loss": 8.2734, "step": 15688 }, { "epoch": 0.1485124146874793, "grad_norm": 2.940467357635498, "learning_rate": 1.927243946540679e-06, "loss": 1.002, "step": 15689 }, { "epoch": 0.14852188070919434, "grad_norm": 308.1149597167969, "learning_rate": 1.927232465842628e-06, "loss": 8.3945, "step": 15690 }, { "epoch": 0.1485313467309094, "grad_norm": 920.0921020507812, "learning_rate": 1.927220984273039e-06, "loss": 48.2344, "step": 15691 }, { "epoch": 0.14854081275262446, "grad_norm": 386.56982421875, "learning_rate": 1.927209501831923e-06, "loss": 31.0, "step": 15692 }, { "epoch": 0.14855027877433952, "grad_norm": 2.923671245574951, "learning_rate": 1.9271980185192902e-06, "loss": 0.8994, "step": 15693 }, { "epoch": 0.14855974479605455, "grad_norm": 428.05804443359375, "learning_rate": 1.927186534335152e-06, "loss": 10.0195, "step": 15694 }, { "epoch": 0.1485692108177696, "grad_norm": 893.1290283203125, "learning_rate": 1.9271750492795183e-06, "loss": 27.7656, "step": 15695 }, { "epoch": 0.14857867683948467, "grad_norm": 205.18759155273438, "learning_rate": 1.9271635633524004e-06, "loss": 19.9062, "step": 15696 }, { "epoch": 0.14858814286119973, "grad_norm": 385.5495300292969, "learning_rate": 1.927152076553809e-06, "loss": 19.8203, "step": 15697 }, { "epoch": 0.1485976088829148, "grad_norm": 436.5403747558594, "learning_rate": 1.9271405888837552e-06, "loss": 43.5312, "step": 15698 }, { "epoch": 0.14860707490462982, "grad_norm": 260.28863525390625, "learning_rate": 1.9271291003422494e-06, "loss": 15.8398, "step": 15699 }, { "epoch": 0.14861654092634488, "grad_norm": 338.907958984375, "learning_rate": 1.9271176109293026e-06, "loss": 39.0938, "step": 15700 }, { "epoch": 0.14862600694805994, "grad_norm": 908.3390502929688, "learning_rate": 1.9271061206449257e-06, "loss": 26.3281, "step": 15701 }, { "epoch": 0.148635472969775, "grad_norm": 180.68116760253906, "learning_rate": 1.927094629489129e-06, "loss": 21.2422, "step": 15702 }, { "epoch": 0.14864493899149003, "grad_norm": 218.04051208496094, "learning_rate": 1.927083137461924e-06, "loss": 24.9922, "step": 15703 }, { "epoch": 0.1486544050132051, "grad_norm": 716.41162109375, "learning_rate": 1.927071644563321e-06, "loss": 35.0469, "step": 15704 }, { "epoch": 0.14866387103492015, "grad_norm": 3470.238525390625, "learning_rate": 1.927060150793331e-06, "loss": 23.1875, "step": 15705 }, { "epoch": 0.1486733370566352, "grad_norm": 3.616055488586426, "learning_rate": 1.9270486561519647e-06, "loss": 0.9697, "step": 15706 }, { "epoch": 0.14868280307835027, "grad_norm": 273.7857971191406, "learning_rate": 1.9270371606392334e-06, "loss": 16.8555, "step": 15707 }, { "epoch": 0.1486922691000653, "grad_norm": 459.5173645019531, "learning_rate": 1.927025664255147e-06, "loss": 47.7266, "step": 15708 }, { "epoch": 0.14870173512178037, "grad_norm": 258.4297790527344, "learning_rate": 1.927014166999717e-06, "loss": 24.1094, "step": 15709 }, { "epoch": 0.14871120114349543, "grad_norm": 291.7412414550781, "learning_rate": 1.9270026688729537e-06, "loss": 26.0547, "step": 15710 }, { "epoch": 0.14872066716521049, "grad_norm": 469.84332275390625, "learning_rate": 1.9269911698748685e-06, "loss": 23.2109, "step": 15711 }, { "epoch": 0.14873013318692552, "grad_norm": 408.1236267089844, "learning_rate": 1.9269796700054715e-06, "loss": 16.9141, "step": 15712 }, { "epoch": 0.14873959920864058, "grad_norm": 352.71014404296875, "learning_rate": 1.9269681692647744e-06, "loss": 32.0469, "step": 15713 }, { "epoch": 0.14874906523035564, "grad_norm": 562.0230102539062, "learning_rate": 1.9269566676527875e-06, "loss": 26.4062, "step": 15714 }, { "epoch": 0.1487585312520707, "grad_norm": 566.742431640625, "learning_rate": 1.9269451651695213e-06, "loss": 48.25, "step": 15715 }, { "epoch": 0.14876799727378576, "grad_norm": 681.0260009765625, "learning_rate": 1.9269336618149873e-06, "loss": 45.3359, "step": 15716 }, { "epoch": 0.1487774632955008, "grad_norm": 439.9293212890625, "learning_rate": 1.9269221575891956e-06, "loss": 49.3516, "step": 15717 }, { "epoch": 0.14878692931721585, "grad_norm": 584.7760009765625, "learning_rate": 1.9269106524921577e-06, "loss": 16.8984, "step": 15718 }, { "epoch": 0.1487963953389309, "grad_norm": 325.67449951171875, "learning_rate": 1.9268991465238837e-06, "loss": 29.3828, "step": 15719 }, { "epoch": 0.14880586136064597, "grad_norm": 400.5621032714844, "learning_rate": 1.926887639684385e-06, "loss": 26.3867, "step": 15720 }, { "epoch": 0.14881532738236103, "grad_norm": 967.1906127929688, "learning_rate": 1.9268761319736725e-06, "loss": 56.1953, "step": 15721 }, { "epoch": 0.14882479340407606, "grad_norm": 184.33966064453125, "learning_rate": 1.9268646233917564e-06, "loss": 21.5156, "step": 15722 }, { "epoch": 0.14883425942579112, "grad_norm": 369.4918518066406, "learning_rate": 1.9268531139386482e-06, "loss": 45.3906, "step": 15723 }, { "epoch": 0.14884372544750618, "grad_norm": 213.08914184570312, "learning_rate": 1.926841603614358e-06, "loss": 24.9688, "step": 15724 }, { "epoch": 0.14885319146922124, "grad_norm": 749.3993530273438, "learning_rate": 1.926830092418897e-06, "loss": 18.0117, "step": 15725 }, { "epoch": 0.14886265749093627, "grad_norm": 463.123291015625, "learning_rate": 1.926818580352276e-06, "loss": 16.6797, "step": 15726 }, { "epoch": 0.14887212351265133, "grad_norm": 433.26605224609375, "learning_rate": 1.926807067414506e-06, "loss": 34.7031, "step": 15727 }, { "epoch": 0.1488815895343664, "grad_norm": 402.1501159667969, "learning_rate": 1.9267955536055974e-06, "loss": 44.4062, "step": 15728 }, { "epoch": 0.14889105555608145, "grad_norm": 227.80194091796875, "learning_rate": 1.9267840389255616e-06, "loss": 15.2812, "step": 15729 }, { "epoch": 0.1489005215777965, "grad_norm": 567.0005493164062, "learning_rate": 1.926772523374409e-06, "loss": 53.1641, "step": 15730 }, { "epoch": 0.14890998759951155, "grad_norm": 563.1014404296875, "learning_rate": 1.9267610069521503e-06, "loss": 34.4688, "step": 15731 }, { "epoch": 0.1489194536212266, "grad_norm": 354.1593933105469, "learning_rate": 1.926749489658797e-06, "loss": 42.75, "step": 15732 }, { "epoch": 0.14892891964294167, "grad_norm": 622.53076171875, "learning_rate": 1.926737971494359e-06, "loss": 45.9688, "step": 15733 }, { "epoch": 0.14893838566465673, "grad_norm": 834.5885009765625, "learning_rate": 1.926726452458848e-06, "loss": 38.0859, "step": 15734 }, { "epoch": 0.14894785168637176, "grad_norm": 534.9523315429688, "learning_rate": 1.926714932552274e-06, "loss": 54.3594, "step": 15735 }, { "epoch": 0.14895731770808682, "grad_norm": 621.267822265625, "learning_rate": 1.9267034117746484e-06, "loss": 57.5586, "step": 15736 }, { "epoch": 0.14896678372980188, "grad_norm": 717.8208618164062, "learning_rate": 1.9266918901259823e-06, "loss": 36.25, "step": 15737 }, { "epoch": 0.14897624975151694, "grad_norm": 252.95150756835938, "learning_rate": 1.9266803676062856e-06, "loss": 25.2969, "step": 15738 }, { "epoch": 0.148985715773232, "grad_norm": 522.353515625, "learning_rate": 1.9266688442155696e-06, "loss": 40.1719, "step": 15739 }, { "epoch": 0.14899518179494703, "grad_norm": 509.0024108886719, "learning_rate": 1.9266573199538455e-06, "loss": 37.4375, "step": 15740 }, { "epoch": 0.1490046478166621, "grad_norm": 348.7193603515625, "learning_rate": 1.9266457948211233e-06, "loss": 45.2578, "step": 15741 }, { "epoch": 0.14901411383837715, "grad_norm": 2.797466278076172, "learning_rate": 1.926634268817415e-06, "loss": 0.8765, "step": 15742 }, { "epoch": 0.1490235798600922, "grad_norm": 539.4781494140625, "learning_rate": 1.9266227419427304e-06, "loss": 22.7266, "step": 15743 }, { "epoch": 0.14903304588180724, "grad_norm": 233.60662841796875, "learning_rate": 1.9266112141970806e-06, "loss": 10.1992, "step": 15744 }, { "epoch": 0.1490425119035223, "grad_norm": 383.2189025878906, "learning_rate": 1.9265996855804767e-06, "loss": 25.6797, "step": 15745 }, { "epoch": 0.14905197792523736, "grad_norm": 226.02166748046875, "learning_rate": 1.9265881560929294e-06, "loss": 15.8359, "step": 15746 }, { "epoch": 0.14906144394695242, "grad_norm": 483.8922119140625, "learning_rate": 1.9265766257344493e-06, "loss": 17.6797, "step": 15747 }, { "epoch": 0.14907090996866748, "grad_norm": 504.23272705078125, "learning_rate": 1.9265650945050478e-06, "loss": 32.1094, "step": 15748 }, { "epoch": 0.1490803759903825, "grad_norm": 439.8804931640625, "learning_rate": 1.9265535624047354e-06, "loss": 26.9062, "step": 15749 }, { "epoch": 0.14908984201209757, "grad_norm": 707.2577514648438, "learning_rate": 1.9265420294335225e-06, "loss": 43.7969, "step": 15750 }, { "epoch": 0.14909930803381263, "grad_norm": 361.3864440917969, "learning_rate": 1.9265304955914207e-06, "loss": 52.9453, "step": 15751 }, { "epoch": 0.1491087740555277, "grad_norm": 440.5207214355469, "learning_rate": 1.9265189608784404e-06, "loss": 36.6406, "step": 15752 }, { "epoch": 0.14911824007724273, "grad_norm": 420.8001403808594, "learning_rate": 1.9265074252945924e-06, "loss": 42.0781, "step": 15753 }, { "epoch": 0.14912770609895779, "grad_norm": 317.609619140625, "learning_rate": 1.926495888839888e-06, "loss": 19.4531, "step": 15754 }, { "epoch": 0.14913717212067285, "grad_norm": 323.0169677734375, "learning_rate": 1.9264843515143377e-06, "loss": 23.2969, "step": 15755 }, { "epoch": 0.1491466381423879, "grad_norm": 479.4408874511719, "learning_rate": 1.926472813317952e-06, "loss": 48.3281, "step": 15756 }, { "epoch": 0.14915610416410297, "grad_norm": 1234.7120361328125, "learning_rate": 1.9264612742507425e-06, "loss": 16.9453, "step": 15757 }, { "epoch": 0.149165570185818, "grad_norm": 871.9913330078125, "learning_rate": 1.9264497343127196e-06, "loss": 60.9062, "step": 15758 }, { "epoch": 0.14917503620753306, "grad_norm": 483.66180419921875, "learning_rate": 1.9264381935038942e-06, "loss": 37.1016, "step": 15759 }, { "epoch": 0.14918450222924812, "grad_norm": 385.2890625, "learning_rate": 1.9264266518242775e-06, "loss": 8.0703, "step": 15760 }, { "epoch": 0.14919396825096318, "grad_norm": 423.07977294921875, "learning_rate": 1.9264151092738796e-06, "loss": 29.7578, "step": 15761 }, { "epoch": 0.1492034342726782, "grad_norm": 315.2047119140625, "learning_rate": 1.926403565852712e-06, "loss": 25.6641, "step": 15762 }, { "epoch": 0.14921290029439327, "grad_norm": 295.0118713378906, "learning_rate": 1.926392021560785e-06, "loss": 25.6719, "step": 15763 }, { "epoch": 0.14922236631610833, "grad_norm": 1949.8148193359375, "learning_rate": 1.92638047639811e-06, "loss": 8.332, "step": 15764 }, { "epoch": 0.1492318323378234, "grad_norm": 1088.4554443359375, "learning_rate": 1.9263689303646973e-06, "loss": 36.0859, "step": 15765 }, { "epoch": 0.14924129835953845, "grad_norm": 396.4223327636719, "learning_rate": 1.926357383460558e-06, "loss": 21.3281, "step": 15766 }, { "epoch": 0.14925076438125348, "grad_norm": 396.4221496582031, "learning_rate": 1.9263458356857037e-06, "loss": 16.1133, "step": 15767 }, { "epoch": 0.14926023040296854, "grad_norm": 349.9913024902344, "learning_rate": 1.9263342870401443e-06, "loss": 31.0781, "step": 15768 }, { "epoch": 0.1492696964246836, "grad_norm": 247.81800842285156, "learning_rate": 1.9263227375238906e-06, "loss": 16.0312, "step": 15769 }, { "epoch": 0.14927916244639866, "grad_norm": 3.2522964477539062, "learning_rate": 1.9263111871369543e-06, "loss": 0.8857, "step": 15770 }, { "epoch": 0.1492886284681137, "grad_norm": 498.37335205078125, "learning_rate": 1.9262996358793453e-06, "loss": 24.6914, "step": 15771 }, { "epoch": 0.14929809448982875, "grad_norm": 561.4940795898438, "learning_rate": 1.9262880837510753e-06, "loss": 22.3203, "step": 15772 }, { "epoch": 0.1493075605115438, "grad_norm": 426.8276062011719, "learning_rate": 1.9262765307521545e-06, "loss": 36.5312, "step": 15773 }, { "epoch": 0.14931702653325887, "grad_norm": 1703.7567138671875, "learning_rate": 1.926264976882594e-06, "loss": 28.8594, "step": 15774 }, { "epoch": 0.14932649255497393, "grad_norm": 236.64950561523438, "learning_rate": 1.9262534221424048e-06, "loss": 20.7109, "step": 15775 }, { "epoch": 0.14933595857668897, "grad_norm": 220.9412384033203, "learning_rate": 1.9262418665315978e-06, "loss": 7.3945, "step": 15776 }, { "epoch": 0.14934542459840403, "grad_norm": 764.82470703125, "learning_rate": 1.9262303100501832e-06, "loss": 22.8281, "step": 15777 }, { "epoch": 0.14935489062011909, "grad_norm": 170.78890991210938, "learning_rate": 1.9262187526981725e-06, "loss": 18.3438, "step": 15778 }, { "epoch": 0.14936435664183415, "grad_norm": 390.1445617675781, "learning_rate": 1.9262071944755767e-06, "loss": 36.2344, "step": 15779 }, { "epoch": 0.14937382266354918, "grad_norm": 973.1336669921875, "learning_rate": 1.9261956353824064e-06, "loss": 22.2422, "step": 15780 }, { "epoch": 0.14938328868526424, "grad_norm": 344.7187194824219, "learning_rate": 1.926184075418672e-06, "loss": 21.9531, "step": 15781 }, { "epoch": 0.1493927547069793, "grad_norm": 614.89697265625, "learning_rate": 1.9261725145843853e-06, "loss": 11.1719, "step": 15782 }, { "epoch": 0.14940222072869436, "grad_norm": 207.30282592773438, "learning_rate": 1.9261609528795566e-06, "loss": 19.9688, "step": 15783 }, { "epoch": 0.14941168675040942, "grad_norm": 1096.509521484375, "learning_rate": 1.926149390304197e-06, "loss": 45.6484, "step": 15784 }, { "epoch": 0.14942115277212445, "grad_norm": 415.6158752441406, "learning_rate": 1.9261378268583166e-06, "loss": 34.2344, "step": 15785 }, { "epoch": 0.1494306187938395, "grad_norm": 439.1882019042969, "learning_rate": 1.9261262625419273e-06, "loss": 29.1172, "step": 15786 }, { "epoch": 0.14944008481555457, "grad_norm": 482.66015625, "learning_rate": 1.9261146973550397e-06, "loss": 34.2344, "step": 15787 }, { "epoch": 0.14944955083726963, "grad_norm": 390.1897277832031, "learning_rate": 1.9261031312976644e-06, "loss": 19.4844, "step": 15788 }, { "epoch": 0.14945901685898466, "grad_norm": 551.6903686523438, "learning_rate": 1.9260915643698124e-06, "loss": 36.9688, "step": 15789 }, { "epoch": 0.14946848288069972, "grad_norm": 523.8162841796875, "learning_rate": 1.9260799965714948e-06, "loss": 42.4688, "step": 15790 }, { "epoch": 0.14947794890241478, "grad_norm": 450.2486572265625, "learning_rate": 1.9260684279027216e-06, "loss": 12.2344, "step": 15791 }, { "epoch": 0.14948741492412984, "grad_norm": 248.080322265625, "learning_rate": 1.926056858363505e-06, "loss": 26.1328, "step": 15792 }, { "epoch": 0.1494968809458449, "grad_norm": 289.0199890136719, "learning_rate": 1.9260452879538547e-06, "loss": 14.8047, "step": 15793 }, { "epoch": 0.14950634696755993, "grad_norm": 985.5465698242188, "learning_rate": 1.9260337166737823e-06, "loss": 41.3164, "step": 15794 }, { "epoch": 0.149515812989275, "grad_norm": 324.4724426269531, "learning_rate": 1.9260221445232988e-06, "loss": 14.9336, "step": 15795 }, { "epoch": 0.14952527901099005, "grad_norm": 210.2920379638672, "learning_rate": 1.9260105715024144e-06, "loss": 7.1875, "step": 15796 }, { "epoch": 0.1495347450327051, "grad_norm": 538.3289794921875, "learning_rate": 1.92599899761114e-06, "loss": 16.1719, "step": 15797 }, { "epoch": 0.14954421105442015, "grad_norm": 251.3007354736328, "learning_rate": 1.9259874228494874e-06, "loss": 17.8047, "step": 15798 }, { "epoch": 0.1495536770761352, "grad_norm": 750.9677734375, "learning_rate": 1.9259758472174664e-06, "loss": 35.2812, "step": 15799 }, { "epoch": 0.14956314309785027, "grad_norm": 620.5933227539062, "learning_rate": 1.9259642707150886e-06, "loss": 38.8906, "step": 15800 }, { "epoch": 0.14957260911956533, "grad_norm": 630.4015502929688, "learning_rate": 1.9259526933423645e-06, "loss": 34.3125, "step": 15801 }, { "epoch": 0.14958207514128039, "grad_norm": 225.30072021484375, "learning_rate": 1.925941115099305e-06, "loss": 22.3281, "step": 15802 }, { "epoch": 0.14959154116299542, "grad_norm": 468.62457275390625, "learning_rate": 1.925929535985921e-06, "loss": 60.0156, "step": 15803 }, { "epoch": 0.14960100718471048, "grad_norm": 444.92633056640625, "learning_rate": 1.925917956002224e-06, "loss": 51.7031, "step": 15804 }, { "epoch": 0.14961047320642554, "grad_norm": 438.3891906738281, "learning_rate": 1.925906375148224e-06, "loss": 42.9688, "step": 15805 }, { "epoch": 0.1496199392281406, "grad_norm": 4.241981506347656, "learning_rate": 1.925894793423932e-06, "loss": 1.0259, "step": 15806 }, { "epoch": 0.14962940524985566, "grad_norm": 262.8886413574219, "learning_rate": 1.9258832108293595e-06, "loss": 22.6797, "step": 15807 }, { "epoch": 0.1496388712715707, "grad_norm": 647.5609741210938, "learning_rate": 1.925871627364517e-06, "loss": 35.125, "step": 15808 }, { "epoch": 0.14964833729328575, "grad_norm": 521.3070068359375, "learning_rate": 1.925860043029415e-06, "loss": 55.3125, "step": 15809 }, { "epoch": 0.1496578033150008, "grad_norm": 285.0725402832031, "learning_rate": 1.9258484578240654e-06, "loss": 50.6562, "step": 15810 }, { "epoch": 0.14966726933671587, "grad_norm": 192.86045837402344, "learning_rate": 1.925836871748478e-06, "loss": 24.0781, "step": 15811 }, { "epoch": 0.1496767353584309, "grad_norm": 661.7197265625, "learning_rate": 1.925825284802664e-06, "loss": 16.9609, "step": 15812 }, { "epoch": 0.14968620138014596, "grad_norm": 371.3602600097656, "learning_rate": 1.9258136969866354e-06, "loss": 38.6094, "step": 15813 }, { "epoch": 0.14969566740186102, "grad_norm": 445.8435363769531, "learning_rate": 1.9258021083004014e-06, "loss": 20.2266, "step": 15814 }, { "epoch": 0.14970513342357608, "grad_norm": 184.66587829589844, "learning_rate": 1.925790518743974e-06, "loss": 19.3672, "step": 15815 }, { "epoch": 0.14971459944529114, "grad_norm": 353.6419982910156, "learning_rate": 1.9257789283173634e-06, "loss": 20.3359, "step": 15816 }, { "epoch": 0.14972406546700617, "grad_norm": 285.8191223144531, "learning_rate": 1.925767337020581e-06, "loss": 35.0625, "step": 15817 }, { "epoch": 0.14973353148872123, "grad_norm": 378.7111511230469, "learning_rate": 1.9257557448536375e-06, "loss": 23.6719, "step": 15818 }, { "epoch": 0.1497429975104363, "grad_norm": 303.0896301269531, "learning_rate": 1.925744151816544e-06, "loss": 19.4766, "step": 15819 }, { "epoch": 0.14975246353215135, "grad_norm": 352.67694091796875, "learning_rate": 1.925732557909311e-06, "loss": 16.4375, "step": 15820 }, { "epoch": 0.14976192955386639, "grad_norm": 359.3232421875, "learning_rate": 1.9257209631319495e-06, "loss": 39.4688, "step": 15821 }, { "epoch": 0.14977139557558145, "grad_norm": 225.13934326171875, "learning_rate": 1.9257093674844705e-06, "loss": 17.1875, "step": 15822 }, { "epoch": 0.1497808615972965, "grad_norm": 3.3164539337158203, "learning_rate": 1.9256977709668854e-06, "loss": 0.916, "step": 15823 }, { "epoch": 0.14979032761901157, "grad_norm": 209.1502227783203, "learning_rate": 1.9256861735792043e-06, "loss": 20.7656, "step": 15824 }, { "epoch": 0.14979979364072663, "grad_norm": 205.46340942382812, "learning_rate": 1.9256745753214382e-06, "loss": 19.5234, "step": 15825 }, { "epoch": 0.14980925966244166, "grad_norm": 588.6609497070312, "learning_rate": 1.9256629761935987e-06, "loss": 8.3203, "step": 15826 }, { "epoch": 0.14981872568415672, "grad_norm": 302.0202331542969, "learning_rate": 1.9256513761956962e-06, "loss": 35.3594, "step": 15827 }, { "epoch": 0.14982819170587178, "grad_norm": 3.3157570362091064, "learning_rate": 1.925639775327741e-06, "loss": 1.0293, "step": 15828 }, { "epoch": 0.14983765772758684, "grad_norm": 419.89495849609375, "learning_rate": 1.9256281735897453e-06, "loss": 38.4219, "step": 15829 }, { "epoch": 0.14984712374930187, "grad_norm": 651.988525390625, "learning_rate": 1.925616570981719e-06, "loss": 57.4375, "step": 15830 }, { "epoch": 0.14985658977101693, "grad_norm": 402.02642822265625, "learning_rate": 1.9256049675036735e-06, "loss": 30.2188, "step": 15831 }, { "epoch": 0.149866055792732, "grad_norm": 203.0720672607422, "learning_rate": 1.9255933631556194e-06, "loss": 23.3281, "step": 15832 }, { "epoch": 0.14987552181444705, "grad_norm": 4.478382587432861, "learning_rate": 1.9255817579375677e-06, "loss": 0.9048, "step": 15833 }, { "epoch": 0.1498849878361621, "grad_norm": 609.2659301757812, "learning_rate": 1.9255701518495292e-06, "loss": 28.9688, "step": 15834 }, { "epoch": 0.14989445385787714, "grad_norm": 672.2642822265625, "learning_rate": 1.9255585448915155e-06, "loss": 49.0938, "step": 15835 }, { "epoch": 0.1499039198795922, "grad_norm": 622.2691040039062, "learning_rate": 1.9255469370635367e-06, "loss": 29.25, "step": 15836 }, { "epoch": 0.14991338590130726, "grad_norm": 328.4287414550781, "learning_rate": 1.925535328365604e-06, "loss": 21.7891, "step": 15837 }, { "epoch": 0.14992285192302232, "grad_norm": 306.4164123535156, "learning_rate": 1.9255237187977283e-06, "loss": 23.3477, "step": 15838 }, { "epoch": 0.14993231794473735, "grad_norm": 756.99267578125, "learning_rate": 1.9255121083599203e-06, "loss": 25.8359, "step": 15839 }, { "epoch": 0.1499417839664524, "grad_norm": 601.7548828125, "learning_rate": 1.9255004970521917e-06, "loss": 30.2578, "step": 15840 }, { "epoch": 0.14995124998816747, "grad_norm": 385.9080505371094, "learning_rate": 1.925488884874552e-06, "loss": 28.2734, "step": 15841 }, { "epoch": 0.14996071600988253, "grad_norm": 361.01812744140625, "learning_rate": 1.9254772718270136e-06, "loss": 20.3984, "step": 15842 }, { "epoch": 0.1499701820315976, "grad_norm": 455.7162170410156, "learning_rate": 1.9254656579095866e-06, "loss": 24.2266, "step": 15843 }, { "epoch": 0.14997964805331263, "grad_norm": 350.27362060546875, "learning_rate": 1.925454043122282e-06, "loss": 20.1719, "step": 15844 }, { "epoch": 0.14998911407502769, "grad_norm": 304.2891540527344, "learning_rate": 1.925442427465111e-06, "loss": 30.6875, "step": 15845 }, { "epoch": 0.14999858009674275, "grad_norm": 179.34400939941406, "learning_rate": 1.9254308109380843e-06, "loss": 19.4141, "step": 15846 }, { "epoch": 0.1500080461184578, "grad_norm": 913.3694458007812, "learning_rate": 1.925419193541213e-06, "loss": 37.75, "step": 15847 }, { "epoch": 0.15001751214017284, "grad_norm": 137.0207977294922, "learning_rate": 1.9254075752745076e-06, "loss": 15.4531, "step": 15848 }, { "epoch": 0.1500269781618879, "grad_norm": 283.70111083984375, "learning_rate": 1.9253959561379795e-06, "loss": 12.5938, "step": 15849 }, { "epoch": 0.15003644418360296, "grad_norm": 342.1932678222656, "learning_rate": 1.925384336131639e-06, "loss": 20.5703, "step": 15850 }, { "epoch": 0.15004591020531802, "grad_norm": 272.4956359863281, "learning_rate": 1.9253727152554974e-06, "loss": 20.2188, "step": 15851 }, { "epoch": 0.15005537622703308, "grad_norm": 604.15966796875, "learning_rate": 1.9253610935095657e-06, "loss": 51.5977, "step": 15852 }, { "epoch": 0.1500648422487481, "grad_norm": 179.6300811767578, "learning_rate": 1.9253494708938553e-06, "loss": 8.3828, "step": 15853 }, { "epoch": 0.15007430827046317, "grad_norm": 520.267578125, "learning_rate": 1.925337847408376e-06, "loss": 45.3281, "step": 15854 }, { "epoch": 0.15008377429217823, "grad_norm": 582.6358642578125, "learning_rate": 1.92532622305314e-06, "loss": 48.7812, "step": 15855 }, { "epoch": 0.1500932403138933, "grad_norm": 471.0153503417969, "learning_rate": 1.9253145978281567e-06, "loss": 21.9609, "step": 15856 }, { "epoch": 0.15010270633560832, "grad_norm": 239.2001953125, "learning_rate": 1.9253029717334385e-06, "loss": 21.8047, "step": 15857 }, { "epoch": 0.15011217235732338, "grad_norm": 264.435302734375, "learning_rate": 1.9252913447689958e-06, "loss": 21.125, "step": 15858 }, { "epoch": 0.15012163837903844, "grad_norm": 346.58489990234375, "learning_rate": 1.9252797169348386e-06, "loss": 25.4531, "step": 15859 }, { "epoch": 0.1501311044007535, "grad_norm": 2.855743646621704, "learning_rate": 1.9252680882309794e-06, "loss": 0.9292, "step": 15860 }, { "epoch": 0.15014057042246856, "grad_norm": 783.0327758789062, "learning_rate": 1.925256458657428e-06, "loss": 60.9062, "step": 15861 }, { "epoch": 0.1501500364441836, "grad_norm": 204.0355987548828, "learning_rate": 1.9252448282141958e-06, "loss": 21.9688, "step": 15862 }, { "epoch": 0.15015950246589865, "grad_norm": 265.5856628417969, "learning_rate": 1.925233196901294e-06, "loss": 14.8906, "step": 15863 }, { "epoch": 0.1501689684876137, "grad_norm": 546.90087890625, "learning_rate": 1.925221564718733e-06, "loss": 22.1172, "step": 15864 }, { "epoch": 0.15017843450932877, "grad_norm": 505.1416931152344, "learning_rate": 1.9252099316665234e-06, "loss": 20.6562, "step": 15865 }, { "epoch": 0.1501879005310438, "grad_norm": 451.2748718261719, "learning_rate": 1.9251982977446774e-06, "loss": 52.6484, "step": 15866 }, { "epoch": 0.15019736655275887, "grad_norm": 168.306640625, "learning_rate": 1.9251866629532046e-06, "loss": 22.4688, "step": 15867 }, { "epoch": 0.15020683257447393, "grad_norm": 3.0663516521453857, "learning_rate": 1.925175027292117e-06, "loss": 0.9976, "step": 15868 }, { "epoch": 0.15021629859618899, "grad_norm": 564.333984375, "learning_rate": 1.9251633907614248e-06, "loss": 17.3555, "step": 15869 }, { "epoch": 0.15022576461790405, "grad_norm": 250.57960510253906, "learning_rate": 1.9251517533611393e-06, "loss": 19.2031, "step": 15870 }, { "epoch": 0.15023523063961908, "grad_norm": 512.9970092773438, "learning_rate": 1.9251401150912715e-06, "loss": 40.7031, "step": 15871 }, { "epoch": 0.15024469666133414, "grad_norm": 209.144287109375, "learning_rate": 1.925128475951832e-06, "loss": 17.7109, "step": 15872 }, { "epoch": 0.1502541626830492, "grad_norm": 1180.8203125, "learning_rate": 1.9251168359428317e-06, "loss": 45.6875, "step": 15873 }, { "epoch": 0.15026362870476426, "grad_norm": 299.4532775878906, "learning_rate": 1.925105195064282e-06, "loss": 36.2656, "step": 15874 }, { "epoch": 0.1502730947264793, "grad_norm": 346.56121826171875, "learning_rate": 1.925093553316194e-06, "loss": 31.8047, "step": 15875 }, { "epoch": 0.15028256074819435, "grad_norm": 596.4038696289062, "learning_rate": 1.9250819106985775e-06, "loss": 32.2188, "step": 15876 }, { "epoch": 0.1502920267699094, "grad_norm": 334.33587646484375, "learning_rate": 1.9250702672114442e-06, "loss": 19.7031, "step": 15877 }, { "epoch": 0.15030149279162447, "grad_norm": 486.9529724121094, "learning_rate": 1.9250586228548058e-06, "loss": 64.0625, "step": 15878 }, { "epoch": 0.15031095881333953, "grad_norm": 658.6571044921875, "learning_rate": 1.925046977628672e-06, "loss": 39.0586, "step": 15879 }, { "epoch": 0.15032042483505456, "grad_norm": 339.92755126953125, "learning_rate": 1.925035331533054e-06, "loss": 42.4688, "step": 15880 }, { "epoch": 0.15032989085676962, "grad_norm": 902.2074584960938, "learning_rate": 1.925023684567963e-06, "loss": 43.3281, "step": 15881 }, { "epoch": 0.15033935687848468, "grad_norm": 505.3840637207031, "learning_rate": 1.92501203673341e-06, "loss": 22.4688, "step": 15882 }, { "epoch": 0.15034882290019974, "grad_norm": 345.2973327636719, "learning_rate": 1.925000388029406e-06, "loss": 26.6641, "step": 15883 }, { "epoch": 0.15035828892191477, "grad_norm": 716.7442626953125, "learning_rate": 1.924988738455962e-06, "loss": 47.3438, "step": 15884 }, { "epoch": 0.15036775494362983, "grad_norm": 580.680419921875, "learning_rate": 1.9249770880130885e-06, "loss": 25.0781, "step": 15885 }, { "epoch": 0.1503772209653449, "grad_norm": 1043.8282470703125, "learning_rate": 1.9249654367007967e-06, "loss": 38.8906, "step": 15886 }, { "epoch": 0.15038668698705995, "grad_norm": 556.4962158203125, "learning_rate": 1.9249537845190976e-06, "loss": 30.8125, "step": 15887 }, { "epoch": 0.150396153008775, "grad_norm": 815.1170043945312, "learning_rate": 1.924942131468002e-06, "loss": 20.3438, "step": 15888 }, { "epoch": 0.15040561903049005, "grad_norm": 529.4838256835938, "learning_rate": 1.924930477547521e-06, "loss": 50.9688, "step": 15889 }, { "epoch": 0.1504150850522051, "grad_norm": 3.266963005065918, "learning_rate": 1.9249188227576658e-06, "loss": 0.9951, "step": 15890 }, { "epoch": 0.15042455107392017, "grad_norm": 363.1669006347656, "learning_rate": 1.9249071670984467e-06, "loss": 45.625, "step": 15891 }, { "epoch": 0.15043401709563523, "grad_norm": 598.1331787109375, "learning_rate": 1.9248955105698753e-06, "loss": 33.5469, "step": 15892 }, { "epoch": 0.15044348311735026, "grad_norm": 1090.7269287109375, "learning_rate": 1.9248838531719623e-06, "loss": 16.1016, "step": 15893 }, { "epoch": 0.15045294913906532, "grad_norm": 182.5230712890625, "learning_rate": 1.924872194904718e-06, "loss": 11.3008, "step": 15894 }, { "epoch": 0.15046241516078038, "grad_norm": 749.8870239257812, "learning_rate": 1.9248605357681548e-06, "loss": 12.25, "step": 15895 }, { "epoch": 0.15047188118249544, "grad_norm": 683.7249145507812, "learning_rate": 1.9248488757622828e-06, "loss": 26.9531, "step": 15896 }, { "epoch": 0.1504813472042105, "grad_norm": 539.680908203125, "learning_rate": 1.9248372148871127e-06, "loss": 32.6719, "step": 15897 }, { "epoch": 0.15049081322592553, "grad_norm": 341.26513671875, "learning_rate": 1.9248255531426556e-06, "loss": 24.1094, "step": 15898 }, { "epoch": 0.1505002792476406, "grad_norm": 340.134521484375, "learning_rate": 1.9248138905289233e-06, "loss": 21.0, "step": 15899 }, { "epoch": 0.15050974526935565, "grad_norm": 180.4652862548828, "learning_rate": 1.9248022270459256e-06, "loss": 12.7188, "step": 15900 }, { "epoch": 0.1505192112910707, "grad_norm": 3.4405643939971924, "learning_rate": 1.9247905626936743e-06, "loss": 0.8613, "step": 15901 }, { "epoch": 0.15052867731278577, "grad_norm": 244.41018676757812, "learning_rate": 1.92477889747218e-06, "loss": 19.7969, "step": 15902 }, { "epoch": 0.1505381433345008, "grad_norm": 186.23745727539062, "learning_rate": 1.9247672313814533e-06, "loss": 16.2656, "step": 15903 }, { "epoch": 0.15054760935621586, "grad_norm": 385.86920166015625, "learning_rate": 1.924755564421506e-06, "loss": 42.0703, "step": 15904 }, { "epoch": 0.15055707537793092, "grad_norm": 613.3121948242188, "learning_rate": 1.9247438965923483e-06, "loss": 12.3594, "step": 15905 }, { "epoch": 0.15056654139964598, "grad_norm": 364.0429382324219, "learning_rate": 1.9247322278939917e-06, "loss": 22.8984, "step": 15906 }, { "epoch": 0.150576007421361, "grad_norm": 165.5546112060547, "learning_rate": 1.924720558326447e-06, "loss": 17.1172, "step": 15907 }, { "epoch": 0.15058547344307607, "grad_norm": 653.9844360351562, "learning_rate": 1.9247088878897252e-06, "loss": 22.2344, "step": 15908 }, { "epoch": 0.15059493946479113, "grad_norm": 313.48382568359375, "learning_rate": 1.9246972165838366e-06, "loss": 29.4648, "step": 15909 }, { "epoch": 0.1506044054865062, "grad_norm": 516.813720703125, "learning_rate": 1.9246855444087934e-06, "loss": 20.5391, "step": 15910 }, { "epoch": 0.15061387150822125, "grad_norm": 234.4116668701172, "learning_rate": 1.9246738713646055e-06, "loss": 21.6328, "step": 15911 }, { "epoch": 0.15062333752993629, "grad_norm": 585.2155151367188, "learning_rate": 1.9246621974512847e-06, "loss": 22.9609, "step": 15912 }, { "epoch": 0.15063280355165135, "grad_norm": 358.904052734375, "learning_rate": 1.9246505226688416e-06, "loss": 43.7031, "step": 15913 }, { "epoch": 0.1506422695733664, "grad_norm": 352.25506591796875, "learning_rate": 1.924638847017287e-06, "loss": 30.5, "step": 15914 }, { "epoch": 0.15065173559508147, "grad_norm": 378.2925109863281, "learning_rate": 1.9246271704966318e-06, "loss": 36.1719, "step": 15915 }, { "epoch": 0.1506612016167965, "grad_norm": 309.8816223144531, "learning_rate": 1.9246154931068875e-06, "loss": 26.0625, "step": 15916 }, { "epoch": 0.15067066763851156, "grad_norm": 439.50531005859375, "learning_rate": 1.924603814848065e-06, "loss": 44.4297, "step": 15917 }, { "epoch": 0.15068013366022662, "grad_norm": 314.3935852050781, "learning_rate": 1.9245921357201746e-06, "loss": 19.1094, "step": 15918 }, { "epoch": 0.15068959968194168, "grad_norm": 607.9560546875, "learning_rate": 1.924580455723228e-06, "loss": 57.0312, "step": 15919 }, { "epoch": 0.15069906570365674, "grad_norm": 414.8005676269531, "learning_rate": 1.924568774857236e-06, "loss": 33.3828, "step": 15920 }, { "epoch": 0.15070853172537177, "grad_norm": 186.07579040527344, "learning_rate": 1.9245570931222093e-06, "loss": 17.3125, "step": 15921 }, { "epoch": 0.15071799774708683, "grad_norm": 759.4568481445312, "learning_rate": 1.924545410518159e-06, "loss": 33.7812, "step": 15922 }, { "epoch": 0.1507274637688019, "grad_norm": 264.6676940917969, "learning_rate": 1.9245337270450965e-06, "loss": 27.5469, "step": 15923 }, { "epoch": 0.15073692979051695, "grad_norm": 251.0618896484375, "learning_rate": 1.9245220427030323e-06, "loss": 19.0469, "step": 15924 }, { "epoch": 0.15074639581223198, "grad_norm": 167.65887451171875, "learning_rate": 1.9245103574919776e-06, "loss": 20.3125, "step": 15925 }, { "epoch": 0.15075586183394704, "grad_norm": 591.0492553710938, "learning_rate": 1.924498671411943e-06, "loss": 51.0625, "step": 15926 }, { "epoch": 0.1507653278556621, "grad_norm": 579.18115234375, "learning_rate": 1.9244869844629397e-06, "loss": 44.9375, "step": 15927 }, { "epoch": 0.15077479387737716, "grad_norm": 382.61492919921875, "learning_rate": 1.9244752966449793e-06, "loss": 25.6016, "step": 15928 }, { "epoch": 0.15078425989909222, "grad_norm": 598.7092895507812, "learning_rate": 1.924463607958072e-06, "loss": 46.4688, "step": 15929 }, { "epoch": 0.15079372592080725, "grad_norm": 266.32391357421875, "learning_rate": 1.924451918402229e-06, "loss": 25.0078, "step": 15930 }, { "epoch": 0.1508031919425223, "grad_norm": 520.501953125, "learning_rate": 1.9244402279774613e-06, "loss": 38.3906, "step": 15931 }, { "epoch": 0.15081265796423737, "grad_norm": 1033.274169921875, "learning_rate": 1.9244285366837804e-06, "loss": 39.1797, "step": 15932 }, { "epoch": 0.15082212398595243, "grad_norm": 161.0829315185547, "learning_rate": 1.924416844521196e-06, "loss": 8.5195, "step": 15933 }, { "epoch": 0.15083159000766747, "grad_norm": 275.91595458984375, "learning_rate": 1.924405151489721e-06, "loss": 26.6875, "step": 15934 }, { "epoch": 0.15084105602938253, "grad_norm": 325.6798400878906, "learning_rate": 1.9243934575893643e-06, "loss": 25.9453, "step": 15935 }, { "epoch": 0.15085052205109759, "grad_norm": 562.7462768554688, "learning_rate": 1.9243817628201382e-06, "loss": 28.5391, "step": 15936 }, { "epoch": 0.15085998807281265, "grad_norm": 292.0237121582031, "learning_rate": 1.9243700671820533e-06, "loss": 29.7031, "step": 15937 }, { "epoch": 0.1508694540945277, "grad_norm": 380.80743408203125, "learning_rate": 1.924358370675121e-06, "loss": 26.8281, "step": 15938 }, { "epoch": 0.15087892011624274, "grad_norm": 356.9715881347656, "learning_rate": 1.9243466732993518e-06, "loss": 32.7266, "step": 15939 }, { "epoch": 0.1508883861379578, "grad_norm": 747.6587524414062, "learning_rate": 1.9243349750547567e-06, "loss": 48.0312, "step": 15940 }, { "epoch": 0.15089785215967286, "grad_norm": 3.2325010299682617, "learning_rate": 1.924323275941347e-06, "loss": 1.0132, "step": 15941 }, { "epoch": 0.15090731818138792, "grad_norm": 1070.19091796875, "learning_rate": 1.9243115759591336e-06, "loss": 31.9062, "step": 15942 }, { "epoch": 0.15091678420310295, "grad_norm": 538.5552978515625, "learning_rate": 1.924299875108127e-06, "loss": 50.1445, "step": 15943 }, { "epoch": 0.150926250224818, "grad_norm": 1286.1748046875, "learning_rate": 1.9242881733883393e-06, "loss": 30.9375, "step": 15944 }, { "epoch": 0.15093571624653307, "grad_norm": 228.91351318359375, "learning_rate": 1.9242764707997803e-06, "loss": 19.0938, "step": 15945 }, { "epoch": 0.15094518226824813, "grad_norm": 228.2642822265625, "learning_rate": 1.924264767342462e-06, "loss": 26.2891, "step": 15946 }, { "epoch": 0.1509546482899632, "grad_norm": 384.0343017578125, "learning_rate": 1.9242530630163944e-06, "loss": 33.1406, "step": 15947 }, { "epoch": 0.15096411431167822, "grad_norm": 650.8197021484375, "learning_rate": 1.9242413578215895e-06, "loss": 61.5312, "step": 15948 }, { "epoch": 0.15097358033339328, "grad_norm": 315.7159118652344, "learning_rate": 1.924229651758058e-06, "loss": 38.1406, "step": 15949 }, { "epoch": 0.15098304635510834, "grad_norm": 3.356628656387329, "learning_rate": 1.9242179448258106e-06, "loss": 0.9814, "step": 15950 }, { "epoch": 0.1509925123768234, "grad_norm": 500.4942626953125, "learning_rate": 1.924206237024858e-06, "loss": 42.1562, "step": 15951 }, { "epoch": 0.15100197839853843, "grad_norm": 653.0972290039062, "learning_rate": 1.924194528355212e-06, "loss": 36.4531, "step": 15952 }, { "epoch": 0.1510114444202535, "grad_norm": 3.8596463203430176, "learning_rate": 1.9241828188168835e-06, "loss": 1.0127, "step": 15953 }, { "epoch": 0.15102091044196855, "grad_norm": 887.1087646484375, "learning_rate": 1.924171108409883e-06, "loss": 28.6484, "step": 15954 }, { "epoch": 0.1510303764636836, "grad_norm": 370.3211364746094, "learning_rate": 1.924159397134222e-06, "loss": 44.7344, "step": 15955 }, { "epoch": 0.15103984248539867, "grad_norm": 332.69976806640625, "learning_rate": 1.9241476849899106e-06, "loss": 19.4297, "step": 15956 }, { "epoch": 0.1510493085071137, "grad_norm": 181.0024871826172, "learning_rate": 1.924135971976961e-06, "loss": 17.75, "step": 15957 }, { "epoch": 0.15105877452882877, "grad_norm": 2012.956787109375, "learning_rate": 1.924124258095384e-06, "loss": 66.2188, "step": 15958 }, { "epoch": 0.15106824055054383, "grad_norm": 527.802490234375, "learning_rate": 1.9241125433451903e-06, "loss": 24.8359, "step": 15959 }, { "epoch": 0.15107770657225889, "grad_norm": 428.0569763183594, "learning_rate": 1.9241008277263905e-06, "loss": 30.1875, "step": 15960 }, { "epoch": 0.15108717259397392, "grad_norm": 592.8944091796875, "learning_rate": 1.924089111238996e-06, "loss": 45.5156, "step": 15961 }, { "epoch": 0.15109663861568898, "grad_norm": 997.6264038085938, "learning_rate": 1.9240773938830183e-06, "loss": 40.1328, "step": 15962 }, { "epoch": 0.15110610463740404, "grad_norm": 277.46209716796875, "learning_rate": 1.9240656756584674e-06, "loss": 22.9297, "step": 15963 }, { "epoch": 0.1511155706591191, "grad_norm": 290.4744567871094, "learning_rate": 1.9240539565653553e-06, "loss": 16.3828, "step": 15964 }, { "epoch": 0.15112503668083416, "grad_norm": 221.81922912597656, "learning_rate": 1.9240422366036927e-06, "loss": 19.7812, "step": 15965 }, { "epoch": 0.1511345027025492, "grad_norm": 197.1556396484375, "learning_rate": 1.92403051577349e-06, "loss": 17.3203, "step": 15966 }, { "epoch": 0.15114396872426425, "grad_norm": 3.9623067378997803, "learning_rate": 1.924018794074759e-06, "loss": 0.8921, "step": 15967 }, { "epoch": 0.1511534347459793, "grad_norm": 1010.3749389648438, "learning_rate": 1.9240070715075104e-06, "loss": 40.1641, "step": 15968 }, { "epoch": 0.15116290076769437, "grad_norm": 1194.725341796875, "learning_rate": 1.9239953480717553e-06, "loss": 35.9453, "step": 15969 }, { "epoch": 0.1511723667894094, "grad_norm": 708.308837890625, "learning_rate": 1.9239836237675047e-06, "loss": 53.4453, "step": 15970 }, { "epoch": 0.15118183281112446, "grad_norm": 1056.927001953125, "learning_rate": 1.9239718985947696e-06, "loss": 30.9375, "step": 15971 }, { "epoch": 0.15119129883283952, "grad_norm": 235.1652374267578, "learning_rate": 1.923960172553561e-06, "loss": 21.875, "step": 15972 }, { "epoch": 0.15120076485455458, "grad_norm": 514.7023315429688, "learning_rate": 1.92394844564389e-06, "loss": 23.0625, "step": 15973 }, { "epoch": 0.15121023087626964, "grad_norm": 4.495181560516357, "learning_rate": 1.9239367178657676e-06, "loss": 0.895, "step": 15974 }, { "epoch": 0.15121969689798467, "grad_norm": 936.2716674804688, "learning_rate": 1.9239249892192047e-06, "loss": 29.6953, "step": 15975 }, { "epoch": 0.15122916291969973, "grad_norm": 167.79531860351562, "learning_rate": 1.9239132597042124e-06, "loss": 14.9766, "step": 15976 }, { "epoch": 0.1512386289414148, "grad_norm": 514.2603149414062, "learning_rate": 1.923901529320802e-06, "loss": 53.7812, "step": 15977 }, { "epoch": 0.15124809496312985, "grad_norm": 179.00308227539062, "learning_rate": 1.923889798068984e-06, "loss": 20.2266, "step": 15978 }, { "epoch": 0.15125756098484489, "grad_norm": 509.60382080078125, "learning_rate": 1.92387806594877e-06, "loss": 49.0703, "step": 15979 }, { "epoch": 0.15126702700655995, "grad_norm": 446.52850341796875, "learning_rate": 1.9238663329601705e-06, "loss": 39.3906, "step": 15980 }, { "epoch": 0.151276493028275, "grad_norm": 496.1611633300781, "learning_rate": 1.9238545991031966e-06, "loss": 17.6406, "step": 15981 }, { "epoch": 0.15128595904999007, "grad_norm": 315.08013916015625, "learning_rate": 1.9238428643778595e-06, "loss": 8.9238, "step": 15982 }, { "epoch": 0.15129542507170513, "grad_norm": 170.89340209960938, "learning_rate": 1.9238311287841703e-06, "loss": 15.707, "step": 15983 }, { "epoch": 0.15130489109342016, "grad_norm": 897.1996459960938, "learning_rate": 1.92381939232214e-06, "loss": 90.125, "step": 15984 }, { "epoch": 0.15131435711513522, "grad_norm": 590.6878662109375, "learning_rate": 1.9238076549917796e-06, "loss": 44.2031, "step": 15985 }, { "epoch": 0.15132382313685028, "grad_norm": 310.3890686035156, "learning_rate": 1.9237959167931e-06, "loss": 16.1133, "step": 15986 }, { "epoch": 0.15133328915856534, "grad_norm": 160.8904266357422, "learning_rate": 1.9237841777261124e-06, "loss": 17.4141, "step": 15987 }, { "epoch": 0.1513427551802804, "grad_norm": 303.73681640625, "learning_rate": 1.9237724377908276e-06, "loss": 12.1914, "step": 15988 }, { "epoch": 0.15135222120199543, "grad_norm": 199.0948028564453, "learning_rate": 1.923760696987257e-06, "loss": 19.9844, "step": 15989 }, { "epoch": 0.1513616872237105, "grad_norm": 570.3359375, "learning_rate": 1.9237489553154115e-06, "loss": 42.1719, "step": 15990 }, { "epoch": 0.15137115324542555, "grad_norm": 548.3731079101562, "learning_rate": 1.9237372127753017e-06, "loss": 43.75, "step": 15991 }, { "epoch": 0.1513806192671406, "grad_norm": 721.8799438476562, "learning_rate": 1.923725469366939e-06, "loss": 36.3633, "step": 15992 }, { "epoch": 0.15139008528885564, "grad_norm": 191.91952514648438, "learning_rate": 1.923713725090335e-06, "loss": 20.3984, "step": 15993 }, { "epoch": 0.1513995513105707, "grad_norm": 203.49057006835938, "learning_rate": 1.9237019799455e-06, "loss": 21.2773, "step": 15994 }, { "epoch": 0.15140901733228576, "grad_norm": 326.1747741699219, "learning_rate": 1.923690233932445e-06, "loss": 27.6875, "step": 15995 }, { "epoch": 0.15141848335400082, "grad_norm": 334.3539123535156, "learning_rate": 1.9236784870511814e-06, "loss": 44.8047, "step": 15996 }, { "epoch": 0.15142794937571588, "grad_norm": 728.9762573242188, "learning_rate": 1.92366673930172e-06, "loss": 27.4375, "step": 15997 }, { "epoch": 0.1514374153974309, "grad_norm": 218.830810546875, "learning_rate": 1.923654990684072e-06, "loss": 15.4297, "step": 15998 }, { "epoch": 0.15144688141914597, "grad_norm": 405.2086181640625, "learning_rate": 1.9236432411982486e-06, "loss": 34.8125, "step": 15999 }, { "epoch": 0.15145634744086103, "grad_norm": 364.3819580078125, "learning_rate": 1.9236314908442604e-06, "loss": 19.2148, "step": 16000 }, { "epoch": 0.1514658134625761, "grad_norm": 215.13108825683594, "learning_rate": 1.923619739622119e-06, "loss": 9.0039, "step": 16001 }, { "epoch": 0.15147527948429113, "grad_norm": 272.7998962402344, "learning_rate": 1.923607987531835e-06, "loss": 16.6328, "step": 16002 }, { "epoch": 0.15148474550600619, "grad_norm": 687.15966796875, "learning_rate": 1.9235962345734195e-06, "loss": 18.3359, "step": 16003 }, { "epoch": 0.15149421152772125, "grad_norm": 516.9276733398438, "learning_rate": 1.9235844807468837e-06, "loss": 38.4297, "step": 16004 }, { "epoch": 0.1515036775494363, "grad_norm": 324.189697265625, "learning_rate": 1.9235727260522383e-06, "loss": 37.0312, "step": 16005 }, { "epoch": 0.15151314357115137, "grad_norm": 505.9603271484375, "learning_rate": 1.923560970489495e-06, "loss": 41.7734, "step": 16006 }, { "epoch": 0.1515226095928664, "grad_norm": 319.7236022949219, "learning_rate": 1.923549214058664e-06, "loss": 28.4297, "step": 16007 }, { "epoch": 0.15153207561458146, "grad_norm": 603.7570190429688, "learning_rate": 1.923537456759757e-06, "loss": 27.832, "step": 16008 }, { "epoch": 0.15154154163629652, "grad_norm": 667.630615234375, "learning_rate": 1.923525698592785e-06, "loss": 32.5234, "step": 16009 }, { "epoch": 0.15155100765801158, "grad_norm": 193.77467346191406, "learning_rate": 1.923513939557759e-06, "loss": 23.0156, "step": 16010 }, { "epoch": 0.1515604736797266, "grad_norm": 580.5731811523438, "learning_rate": 1.92350217965469e-06, "loss": 33.7734, "step": 16011 }, { "epoch": 0.15156993970144167, "grad_norm": 330.9787292480469, "learning_rate": 1.923490418883589e-06, "loss": 20.6875, "step": 16012 }, { "epoch": 0.15157940572315673, "grad_norm": 349.6717834472656, "learning_rate": 1.9234786572444667e-06, "loss": 43.1406, "step": 16013 }, { "epoch": 0.1515888717448718, "grad_norm": 433.4192810058594, "learning_rate": 1.923466894737335e-06, "loss": 36.75, "step": 16014 }, { "epoch": 0.15159833776658685, "grad_norm": 250.643310546875, "learning_rate": 1.9234551313622043e-06, "loss": 16.2656, "step": 16015 }, { "epoch": 0.15160780378830188, "grad_norm": 447.3929138183594, "learning_rate": 1.9234433671190856e-06, "loss": 49.5703, "step": 16016 }, { "epoch": 0.15161726981001694, "grad_norm": 213.32827758789062, "learning_rate": 1.9234316020079906e-06, "loss": 16.1016, "step": 16017 }, { "epoch": 0.151626735831732, "grad_norm": 261.9588928222656, "learning_rate": 1.92341983602893e-06, "loss": 18.8828, "step": 16018 }, { "epoch": 0.15163620185344706, "grad_norm": 846.6092529296875, "learning_rate": 1.9234080691819148e-06, "loss": 49.4375, "step": 16019 }, { "epoch": 0.1516456678751621, "grad_norm": 2.830075740814209, "learning_rate": 1.923396301466956e-06, "loss": 0.8174, "step": 16020 }, { "epoch": 0.15165513389687715, "grad_norm": 469.00860595703125, "learning_rate": 1.9233845328840643e-06, "loss": 26.4844, "step": 16021 }, { "epoch": 0.1516645999185922, "grad_norm": 202.45687866210938, "learning_rate": 1.923372763433252e-06, "loss": 21.0938, "step": 16022 }, { "epoch": 0.15167406594030727, "grad_norm": 953.3057250976562, "learning_rate": 1.9233609931145285e-06, "loss": 44.6562, "step": 16023 }, { "epoch": 0.15168353196202233, "grad_norm": 512.30517578125, "learning_rate": 1.9233492219279064e-06, "loss": 24.5234, "step": 16024 }, { "epoch": 0.15169299798373737, "grad_norm": 524.45556640625, "learning_rate": 1.9233374498733957e-06, "loss": 27.5859, "step": 16025 }, { "epoch": 0.15170246400545243, "grad_norm": 715.0831298828125, "learning_rate": 1.923325676951008e-06, "loss": 40.3359, "step": 16026 }, { "epoch": 0.15171193002716749, "grad_norm": 685.8060302734375, "learning_rate": 1.9233139031607546e-06, "loss": 42.875, "step": 16027 }, { "epoch": 0.15172139604888255, "grad_norm": 302.92828369140625, "learning_rate": 1.923302128502646e-06, "loss": 20.0664, "step": 16028 }, { "epoch": 0.15173086207059758, "grad_norm": 623.8862915039062, "learning_rate": 1.9232903529766934e-06, "loss": 17.9375, "step": 16029 }, { "epoch": 0.15174032809231264, "grad_norm": 735.4859619140625, "learning_rate": 1.923278576582908e-06, "loss": 50.5, "step": 16030 }, { "epoch": 0.1517497941140277, "grad_norm": 2.821641445159912, "learning_rate": 1.9232667993213006e-06, "loss": 0.9407, "step": 16031 }, { "epoch": 0.15175926013574276, "grad_norm": 304.4107666015625, "learning_rate": 1.9232550211918827e-06, "loss": 20.0781, "step": 16032 }, { "epoch": 0.15176872615745782, "grad_norm": 181.06155395507812, "learning_rate": 1.9232432421946647e-06, "loss": 15.9531, "step": 16033 }, { "epoch": 0.15177819217917285, "grad_norm": 786.2135620117188, "learning_rate": 1.9232314623296585e-06, "loss": 15.0938, "step": 16034 }, { "epoch": 0.1517876582008879, "grad_norm": 217.8589630126953, "learning_rate": 1.9232196815968747e-06, "loss": 15.1719, "step": 16035 }, { "epoch": 0.15179712422260297, "grad_norm": 630.968505859375, "learning_rate": 1.9232078999963242e-06, "loss": 37.5547, "step": 16036 }, { "epoch": 0.15180659024431803, "grad_norm": 209.85504150390625, "learning_rate": 1.9231961175280186e-06, "loss": 20.332, "step": 16037 }, { "epoch": 0.15181605626603306, "grad_norm": 162.24441528320312, "learning_rate": 1.9231843341919688e-06, "loss": 17.6797, "step": 16038 }, { "epoch": 0.15182552228774812, "grad_norm": 321.9690856933594, "learning_rate": 1.923172549988186e-06, "loss": 10.4648, "step": 16039 }, { "epoch": 0.15183498830946318, "grad_norm": 313.15557861328125, "learning_rate": 1.9231607649166807e-06, "loss": 20.2422, "step": 16040 }, { "epoch": 0.15184445433117824, "grad_norm": 223.81187438964844, "learning_rate": 1.923148978977464e-06, "loss": 15.4023, "step": 16041 }, { "epoch": 0.1518539203528933, "grad_norm": 111.59276580810547, "learning_rate": 1.923137192170548e-06, "loss": 13.5, "step": 16042 }, { "epoch": 0.15186338637460833, "grad_norm": 208.4658203125, "learning_rate": 1.9231254044959425e-06, "loss": 16.5742, "step": 16043 }, { "epoch": 0.1518728523963234, "grad_norm": 426.67547607421875, "learning_rate": 1.9231136159536598e-06, "loss": 15.9219, "step": 16044 }, { "epoch": 0.15188231841803845, "grad_norm": 358.7205505371094, "learning_rate": 1.92310182654371e-06, "loss": 25.4258, "step": 16045 }, { "epoch": 0.1518917844397535, "grad_norm": 424.1452941894531, "learning_rate": 1.9230900362661044e-06, "loss": 30.6328, "step": 16046 }, { "epoch": 0.15190125046146855, "grad_norm": 435.9208679199219, "learning_rate": 1.9230782451208547e-06, "loss": 20.6406, "step": 16047 }, { "epoch": 0.1519107164831836, "grad_norm": 459.1526794433594, "learning_rate": 1.923066453107971e-06, "loss": 19.4766, "step": 16048 }, { "epoch": 0.15192018250489867, "grad_norm": 467.2306823730469, "learning_rate": 1.923054660227465e-06, "loss": 42.7344, "step": 16049 }, { "epoch": 0.15192964852661373, "grad_norm": 168.11265563964844, "learning_rate": 1.9230428664793477e-06, "loss": 19.125, "step": 16050 }, { "epoch": 0.15193911454832879, "grad_norm": 856.0687866210938, "learning_rate": 1.9230310718636303e-06, "loss": 63.4688, "step": 16051 }, { "epoch": 0.15194858057004382, "grad_norm": 448.9898681640625, "learning_rate": 1.9230192763803234e-06, "loss": 21.5703, "step": 16052 }, { "epoch": 0.15195804659175888, "grad_norm": 391.4226989746094, "learning_rate": 1.9230074800294387e-06, "loss": 47.5469, "step": 16053 }, { "epoch": 0.15196751261347394, "grad_norm": 364.10992431640625, "learning_rate": 1.922995682810987e-06, "loss": 35.8203, "step": 16054 }, { "epoch": 0.151976978635189, "grad_norm": 184.00637817382812, "learning_rate": 1.9229838847249793e-06, "loss": 9.3086, "step": 16055 }, { "epoch": 0.15198644465690403, "grad_norm": 336.3006591796875, "learning_rate": 1.9229720857714267e-06, "loss": 18.0664, "step": 16056 }, { "epoch": 0.1519959106786191, "grad_norm": 550.2630615234375, "learning_rate": 1.9229602859503408e-06, "loss": 36.0, "step": 16057 }, { "epoch": 0.15200537670033415, "grad_norm": 451.956298828125, "learning_rate": 1.9229484852617318e-06, "loss": 31.5938, "step": 16058 }, { "epoch": 0.1520148427220492, "grad_norm": 652.0521240234375, "learning_rate": 1.9229366837056115e-06, "loss": 32.0781, "step": 16059 }, { "epoch": 0.15202430874376427, "grad_norm": 209.29946899414062, "learning_rate": 1.922924881281991e-06, "loss": 10.3281, "step": 16060 }, { "epoch": 0.1520337747654793, "grad_norm": 439.3474426269531, "learning_rate": 1.922913077990881e-06, "loss": 57.7422, "step": 16061 }, { "epoch": 0.15204324078719436, "grad_norm": 302.68524169921875, "learning_rate": 1.9229012738322926e-06, "loss": 17.2812, "step": 16062 }, { "epoch": 0.15205270680890942, "grad_norm": 167.45062255859375, "learning_rate": 1.922889468806237e-06, "loss": 15.8477, "step": 16063 }, { "epoch": 0.15206217283062448, "grad_norm": 170.8251953125, "learning_rate": 1.9228776629127253e-06, "loss": 14.6016, "step": 16064 }, { "epoch": 0.1520716388523395, "grad_norm": 292.1324157714844, "learning_rate": 1.922865856151769e-06, "loss": 27.3906, "step": 16065 }, { "epoch": 0.15208110487405457, "grad_norm": 164.26805114746094, "learning_rate": 1.9228540485233783e-06, "loss": 14.0742, "step": 16066 }, { "epoch": 0.15209057089576963, "grad_norm": 178.5732879638672, "learning_rate": 1.9228422400275654e-06, "loss": 19.3516, "step": 16067 }, { "epoch": 0.1521000369174847, "grad_norm": 267.71063232421875, "learning_rate": 1.9228304306643403e-06, "loss": 25.1406, "step": 16068 }, { "epoch": 0.15210950293919975, "grad_norm": 336.5518493652344, "learning_rate": 1.922818620433715e-06, "loss": 29.2344, "step": 16069 }, { "epoch": 0.15211896896091479, "grad_norm": 836.1954956054688, "learning_rate": 1.9228068093357e-06, "loss": 57.0, "step": 16070 }, { "epoch": 0.15212843498262985, "grad_norm": 218.77040100097656, "learning_rate": 1.9227949973703067e-06, "loss": 26.8594, "step": 16071 }, { "epoch": 0.1521379010043449, "grad_norm": 298.8296813964844, "learning_rate": 1.922783184537546e-06, "loss": 20.4844, "step": 16072 }, { "epoch": 0.15214736702605997, "grad_norm": 353.08892822265625, "learning_rate": 1.9227713708374294e-06, "loss": 28.1172, "step": 16073 }, { "epoch": 0.15215683304777503, "grad_norm": 387.21868896484375, "learning_rate": 1.922759556269968e-06, "loss": 19.2461, "step": 16074 }, { "epoch": 0.15216629906949006, "grad_norm": 545.0728149414062, "learning_rate": 1.922747740835172e-06, "loss": 43.9219, "step": 16075 }, { "epoch": 0.15217576509120512, "grad_norm": 176.4123077392578, "learning_rate": 1.9227359245330536e-06, "loss": 17.7891, "step": 16076 }, { "epoch": 0.15218523111292018, "grad_norm": 205.753662109375, "learning_rate": 1.9227241073636233e-06, "loss": 15.4805, "step": 16077 }, { "epoch": 0.15219469713463524, "grad_norm": 1785.8316650390625, "learning_rate": 1.9227122893268923e-06, "loss": 15.6875, "step": 16078 }, { "epoch": 0.15220416315635027, "grad_norm": 291.2571105957031, "learning_rate": 1.922700470422872e-06, "loss": 15.9297, "step": 16079 }, { "epoch": 0.15221362917806533, "grad_norm": 276.522705078125, "learning_rate": 1.922688650651573e-06, "loss": 25.6719, "step": 16080 }, { "epoch": 0.1522230951997804, "grad_norm": 509.0711364746094, "learning_rate": 1.922676830013007e-06, "loss": 41.4062, "step": 16081 }, { "epoch": 0.15223256122149545, "grad_norm": 342.739501953125, "learning_rate": 1.9226650085071846e-06, "loss": 23.1367, "step": 16082 }, { "epoch": 0.1522420272432105, "grad_norm": 390.4176940917969, "learning_rate": 1.9226531861341167e-06, "loss": 20.3438, "step": 16083 }, { "epoch": 0.15225149326492554, "grad_norm": 246.78805541992188, "learning_rate": 1.9226413628938154e-06, "loss": 19.6172, "step": 16084 }, { "epoch": 0.1522609592866406, "grad_norm": 205.09378051757812, "learning_rate": 1.922629538786291e-06, "loss": 18.125, "step": 16085 }, { "epoch": 0.15227042530835566, "grad_norm": 332.6913146972656, "learning_rate": 1.922617713811555e-06, "loss": 17.5469, "step": 16086 }, { "epoch": 0.15227989133007072, "grad_norm": 215.2665557861328, "learning_rate": 1.9226058879696184e-06, "loss": 20.4531, "step": 16087 }, { "epoch": 0.15228935735178575, "grad_norm": 195.63018798828125, "learning_rate": 1.9225940612604923e-06, "loss": 17.2344, "step": 16088 }, { "epoch": 0.1522988233735008, "grad_norm": 317.62255859375, "learning_rate": 1.9225822336841877e-06, "loss": 24.9531, "step": 16089 }, { "epoch": 0.15230828939521587, "grad_norm": 3.6642847061157227, "learning_rate": 1.922570405240716e-06, "loss": 1.0938, "step": 16090 }, { "epoch": 0.15231775541693093, "grad_norm": 367.3928527832031, "learning_rate": 1.922558575930088e-06, "loss": 22.8672, "step": 16091 }, { "epoch": 0.152327221438646, "grad_norm": 383.02264404296875, "learning_rate": 1.9225467457523146e-06, "loss": 16.7422, "step": 16092 }, { "epoch": 0.15233668746036103, "grad_norm": 480.639892578125, "learning_rate": 1.9225349147074076e-06, "loss": 38.4375, "step": 16093 }, { "epoch": 0.15234615348207609, "grad_norm": 614.7803955078125, "learning_rate": 1.922523082795378e-06, "loss": 70.3125, "step": 16094 }, { "epoch": 0.15235561950379115, "grad_norm": 306.5180358886719, "learning_rate": 1.9225112500162365e-06, "loss": 33.8594, "step": 16095 }, { "epoch": 0.1523650855255062, "grad_norm": 178.33486938476562, "learning_rate": 1.9224994163699944e-06, "loss": 13.4492, "step": 16096 }, { "epoch": 0.15237455154722124, "grad_norm": 337.2597351074219, "learning_rate": 1.9224875818566633e-06, "loss": 46.3828, "step": 16097 }, { "epoch": 0.1523840175689363, "grad_norm": 637.857177734375, "learning_rate": 1.9224757464762533e-06, "loss": 39.1016, "step": 16098 }, { "epoch": 0.15239348359065136, "grad_norm": 545.854736328125, "learning_rate": 1.922463910228776e-06, "loss": 51.8242, "step": 16099 }, { "epoch": 0.15240294961236642, "grad_norm": 387.1495666503906, "learning_rate": 1.922452073114243e-06, "loss": 8.3652, "step": 16100 }, { "epoch": 0.15241241563408148, "grad_norm": 581.1470336914062, "learning_rate": 1.9224402351326652e-06, "loss": 18.3906, "step": 16101 }, { "epoch": 0.1524218816557965, "grad_norm": 235.78826904296875, "learning_rate": 1.9224283962840534e-06, "loss": 27.1016, "step": 16102 }, { "epoch": 0.15243134767751157, "grad_norm": 2.889896869659424, "learning_rate": 1.9224165565684187e-06, "loss": 0.8589, "step": 16103 }, { "epoch": 0.15244081369922663, "grad_norm": 3.652980327606201, "learning_rate": 1.9224047159857726e-06, "loss": 1.022, "step": 16104 }, { "epoch": 0.1524502797209417, "grad_norm": 361.1519470214844, "learning_rate": 1.9223928745361263e-06, "loss": 17.9141, "step": 16105 }, { "epoch": 0.15245974574265672, "grad_norm": 153.5265655517578, "learning_rate": 1.9223810322194906e-06, "loss": 18.625, "step": 16106 }, { "epoch": 0.15246921176437178, "grad_norm": 697.6231689453125, "learning_rate": 1.9223691890358764e-06, "loss": 32.4688, "step": 16107 }, { "epoch": 0.15247867778608684, "grad_norm": 725.5969848632812, "learning_rate": 1.9223573449852957e-06, "loss": 20.1797, "step": 16108 }, { "epoch": 0.1524881438078019, "grad_norm": 504.4724426269531, "learning_rate": 1.9223455000677585e-06, "loss": 21.4141, "step": 16109 }, { "epoch": 0.15249760982951696, "grad_norm": 222.5875244140625, "learning_rate": 1.9223336542832767e-06, "loss": 10.9766, "step": 16110 }, { "epoch": 0.152507075851232, "grad_norm": 492.6831359863281, "learning_rate": 1.922321807631862e-06, "loss": 22.1641, "step": 16111 }, { "epoch": 0.15251654187294705, "grad_norm": 337.513671875, "learning_rate": 1.922309960113524e-06, "loss": 17.9688, "step": 16112 }, { "epoch": 0.1525260078946621, "grad_norm": 271.1631774902344, "learning_rate": 1.922298111728275e-06, "loss": 19.6328, "step": 16113 }, { "epoch": 0.15253547391637717, "grad_norm": 429.91986083984375, "learning_rate": 1.9222862624761254e-06, "loss": 52.4688, "step": 16114 }, { "epoch": 0.1525449399380922, "grad_norm": 237.381591796875, "learning_rate": 1.9222744123570868e-06, "loss": 22.3438, "step": 16115 }, { "epoch": 0.15255440595980727, "grad_norm": 217.96609497070312, "learning_rate": 1.9222625613711705e-06, "loss": 17.1562, "step": 16116 }, { "epoch": 0.15256387198152233, "grad_norm": 242.33111572265625, "learning_rate": 1.9222507095183876e-06, "loss": 18.8828, "step": 16117 }, { "epoch": 0.15257333800323739, "grad_norm": 180.04541015625, "learning_rate": 1.9222388567987486e-06, "loss": 14.0938, "step": 16118 }, { "epoch": 0.15258280402495245, "grad_norm": 2.954066038131714, "learning_rate": 1.922227003212265e-06, "loss": 0.8506, "step": 16119 }, { "epoch": 0.15259227004666748, "grad_norm": 215.73353576660156, "learning_rate": 1.922215148758948e-06, "loss": 23.6875, "step": 16120 }, { "epoch": 0.15260173606838254, "grad_norm": 3.434169292449951, "learning_rate": 1.922203293438809e-06, "loss": 0.9253, "step": 16121 }, { "epoch": 0.1526112020900976, "grad_norm": 428.19354248046875, "learning_rate": 1.9221914372518595e-06, "loss": 8.6875, "step": 16122 }, { "epoch": 0.15262066811181266, "grad_norm": 520.5916748046875, "learning_rate": 1.922179580198109e-06, "loss": 45.0625, "step": 16123 }, { "epoch": 0.1526301341335277, "grad_norm": 234.01443481445312, "learning_rate": 1.92216772227757e-06, "loss": 19.4922, "step": 16124 }, { "epoch": 0.15263960015524275, "grad_norm": 174.78030395507812, "learning_rate": 1.9221558634902534e-06, "loss": 20.3867, "step": 16125 }, { "epoch": 0.1526490661769578, "grad_norm": 421.09710693359375, "learning_rate": 1.9221440038361704e-06, "loss": 13.8633, "step": 16126 }, { "epoch": 0.15265853219867287, "grad_norm": 442.8919372558594, "learning_rate": 1.9221321433153317e-06, "loss": 43.9375, "step": 16127 }, { "epoch": 0.15266799822038793, "grad_norm": 379.0863342285156, "learning_rate": 1.9221202819277492e-06, "loss": 22.8594, "step": 16128 }, { "epoch": 0.15267746424210296, "grad_norm": 533.1328735351562, "learning_rate": 1.9221084196734335e-06, "loss": 18.7578, "step": 16129 }, { "epoch": 0.15268693026381802, "grad_norm": 217.66603088378906, "learning_rate": 1.922096556552396e-06, "loss": 18.5625, "step": 16130 }, { "epoch": 0.15269639628553308, "grad_norm": 368.08917236328125, "learning_rate": 1.922084692564647e-06, "loss": 30.75, "step": 16131 }, { "epoch": 0.15270586230724814, "grad_norm": 292.1047668457031, "learning_rate": 1.922072827710199e-06, "loss": 19.2969, "step": 16132 }, { "epoch": 0.15271532832896317, "grad_norm": 3.2242071628570557, "learning_rate": 1.9220609619890625e-06, "loss": 0.9419, "step": 16133 }, { "epoch": 0.15272479435067823, "grad_norm": 463.46075439453125, "learning_rate": 1.9220490954012483e-06, "loss": 51.2344, "step": 16134 }, { "epoch": 0.1527342603723933, "grad_norm": 311.667236328125, "learning_rate": 1.9220372279467686e-06, "loss": 26.3594, "step": 16135 }, { "epoch": 0.15274372639410835, "grad_norm": 337.0829162597656, "learning_rate": 1.9220253596256335e-06, "loss": 19.7891, "step": 16136 }, { "epoch": 0.1527531924158234, "grad_norm": 3.1618921756744385, "learning_rate": 1.9220134904378546e-06, "loss": 0.9922, "step": 16137 }, { "epoch": 0.15276265843753845, "grad_norm": 785.076171875, "learning_rate": 1.922001620383443e-06, "loss": 41.6406, "step": 16138 }, { "epoch": 0.1527721244592535, "grad_norm": 591.9149169921875, "learning_rate": 1.9219897494624096e-06, "loss": 30.9297, "step": 16139 }, { "epoch": 0.15278159048096857, "grad_norm": 396.8208312988281, "learning_rate": 1.921977877674766e-06, "loss": 46.5312, "step": 16140 }, { "epoch": 0.15279105650268363, "grad_norm": 490.5712585449219, "learning_rate": 1.9219660050205234e-06, "loss": 27.3477, "step": 16141 }, { "epoch": 0.15280052252439866, "grad_norm": 569.3952026367188, "learning_rate": 1.9219541314996924e-06, "loss": 34.9219, "step": 16142 }, { "epoch": 0.15280998854611372, "grad_norm": 470.43212890625, "learning_rate": 1.921942257112285e-06, "loss": 15.8594, "step": 16143 }, { "epoch": 0.15281945456782878, "grad_norm": 242.21405029296875, "learning_rate": 1.9219303818583114e-06, "loss": 23.0586, "step": 16144 }, { "epoch": 0.15282892058954384, "grad_norm": 144.14479064941406, "learning_rate": 1.921918505737783e-06, "loss": 19.2617, "step": 16145 }, { "epoch": 0.1528383866112589, "grad_norm": 456.53961181640625, "learning_rate": 1.921906628750712e-06, "loss": 30.8359, "step": 16146 }, { "epoch": 0.15284785263297393, "grad_norm": 659.1858520507812, "learning_rate": 1.921894750897108e-06, "loss": 36.3828, "step": 16147 }, { "epoch": 0.152857318654689, "grad_norm": 363.5350646972656, "learning_rate": 1.921882872176983e-06, "loss": 16.625, "step": 16148 }, { "epoch": 0.15286678467640405, "grad_norm": 581.6115112304688, "learning_rate": 1.921870992590348e-06, "loss": 27.5469, "step": 16149 }, { "epoch": 0.1528762506981191, "grad_norm": 463.8111267089844, "learning_rate": 1.9218591121372143e-06, "loss": 37.9688, "step": 16150 }, { "epoch": 0.15288571671983414, "grad_norm": 401.6366271972656, "learning_rate": 1.9218472308175934e-06, "loss": 40.1719, "step": 16151 }, { "epoch": 0.1528951827415492, "grad_norm": 195.3497772216797, "learning_rate": 1.921835348631496e-06, "loss": 18.3555, "step": 16152 }, { "epoch": 0.15290464876326426, "grad_norm": 156.3960418701172, "learning_rate": 1.921823465578933e-06, "loss": 22.875, "step": 16153 }, { "epoch": 0.15291411478497932, "grad_norm": 572.1671752929688, "learning_rate": 1.921811581659916e-06, "loss": 74.6562, "step": 16154 }, { "epoch": 0.15292358080669438, "grad_norm": 222.97964477539062, "learning_rate": 1.921799696874456e-06, "loss": 17.3359, "step": 16155 }, { "epoch": 0.1529330468284094, "grad_norm": 902.150634765625, "learning_rate": 1.9217878112225646e-06, "loss": 18.1406, "step": 16156 }, { "epoch": 0.15294251285012447, "grad_norm": 190.6318817138672, "learning_rate": 1.9217759247042523e-06, "loss": 16.3203, "step": 16157 }, { "epoch": 0.15295197887183953, "grad_norm": 600.290771484375, "learning_rate": 1.921764037319531e-06, "loss": 8.4707, "step": 16158 }, { "epoch": 0.1529614448935546, "grad_norm": 775.888671875, "learning_rate": 1.921752149068411e-06, "loss": 27.5703, "step": 16159 }, { "epoch": 0.15297091091526965, "grad_norm": 394.05804443359375, "learning_rate": 1.921740259950904e-06, "loss": 9.5, "step": 16160 }, { "epoch": 0.15298037693698469, "grad_norm": 193.1033935546875, "learning_rate": 1.9217283699670216e-06, "loss": 16.2578, "step": 16161 }, { "epoch": 0.15298984295869975, "grad_norm": 3.0666539669036865, "learning_rate": 1.921716479116774e-06, "loss": 0.9268, "step": 16162 }, { "epoch": 0.1529993089804148, "grad_norm": 3.2971949577331543, "learning_rate": 1.921704587400173e-06, "loss": 0.8623, "step": 16163 }, { "epoch": 0.15300877500212987, "grad_norm": 269.1973571777344, "learning_rate": 1.92169269481723e-06, "loss": 23.3594, "step": 16164 }, { "epoch": 0.1530182410238449, "grad_norm": 2.9364142417907715, "learning_rate": 1.9216808013679552e-06, "loss": 0.8398, "step": 16165 }, { "epoch": 0.15302770704555996, "grad_norm": 2290.124267578125, "learning_rate": 1.9216689070523608e-06, "loss": 15.0156, "step": 16166 }, { "epoch": 0.15303717306727502, "grad_norm": 942.3045043945312, "learning_rate": 1.9216570118704574e-06, "loss": 67.8281, "step": 16167 }, { "epoch": 0.15304663908899008, "grad_norm": 241.48245239257812, "learning_rate": 1.9216451158222566e-06, "loss": 22.2617, "step": 16168 }, { "epoch": 0.15305610511070514, "grad_norm": 402.2320556640625, "learning_rate": 1.921633218907769e-06, "loss": 46.9922, "step": 16169 }, { "epoch": 0.15306557113242017, "grad_norm": 854.984130859375, "learning_rate": 1.9216213211270063e-06, "loss": 59.4844, "step": 16170 }, { "epoch": 0.15307503715413523, "grad_norm": 536.5677490234375, "learning_rate": 1.9216094224799797e-06, "loss": 7.293, "step": 16171 }, { "epoch": 0.1530845031758503, "grad_norm": 4.172063827514648, "learning_rate": 1.9215975229667e-06, "loss": 1.0195, "step": 16172 }, { "epoch": 0.15309396919756535, "grad_norm": 855.91845703125, "learning_rate": 1.9215856225871786e-06, "loss": 65.7344, "step": 16173 }, { "epoch": 0.15310343521928038, "grad_norm": 284.8402404785156, "learning_rate": 1.921573721341427e-06, "loss": 9.0781, "step": 16174 }, { "epoch": 0.15311290124099544, "grad_norm": 312.882568359375, "learning_rate": 1.9215618192294553e-06, "loss": 24.25, "step": 16175 }, { "epoch": 0.1531223672627105, "grad_norm": 194.31137084960938, "learning_rate": 1.921549916251276e-06, "loss": 7.3008, "step": 16176 }, { "epoch": 0.15313183328442556, "grad_norm": 638.7959594726562, "learning_rate": 1.9215380124068998e-06, "loss": 22.5938, "step": 16177 }, { "epoch": 0.15314129930614062, "grad_norm": 287.4122009277344, "learning_rate": 1.9215261076963374e-06, "loss": 10.418, "step": 16178 }, { "epoch": 0.15315076532785565, "grad_norm": 606.76904296875, "learning_rate": 1.921514202119601e-06, "loss": 11.3398, "step": 16179 }, { "epoch": 0.1531602313495707, "grad_norm": 338.3473815917969, "learning_rate": 1.9215022956767007e-06, "loss": 20.9297, "step": 16180 }, { "epoch": 0.15316969737128577, "grad_norm": 3.516780376434326, "learning_rate": 1.9214903883676483e-06, "loss": 0.9868, "step": 16181 }, { "epoch": 0.15317916339300083, "grad_norm": 260.16229248046875, "learning_rate": 1.9214784801924552e-06, "loss": 17.7559, "step": 16182 }, { "epoch": 0.15318862941471587, "grad_norm": 760.2420043945312, "learning_rate": 1.9214665711511316e-06, "loss": 23.9453, "step": 16183 }, { "epoch": 0.15319809543643093, "grad_norm": 240.76004028320312, "learning_rate": 1.92145466124369e-06, "loss": 20.6523, "step": 16184 }, { "epoch": 0.15320756145814599, "grad_norm": 363.9151611328125, "learning_rate": 1.9214427504701403e-06, "loss": 20.4336, "step": 16185 }, { "epoch": 0.15321702747986105, "grad_norm": 241.7029571533203, "learning_rate": 1.9214308388304953e-06, "loss": 22.0625, "step": 16186 }, { "epoch": 0.1532264935015761, "grad_norm": 165.103271484375, "learning_rate": 1.9214189263247645e-06, "loss": 16.6719, "step": 16187 }, { "epoch": 0.15323595952329114, "grad_norm": 3.057293653488159, "learning_rate": 1.9214070129529603e-06, "loss": 0.9565, "step": 16188 }, { "epoch": 0.1532454255450062, "grad_norm": 366.55706787109375, "learning_rate": 1.921395098715093e-06, "loss": 46.8281, "step": 16189 }, { "epoch": 0.15325489156672126, "grad_norm": 463.0946960449219, "learning_rate": 1.9213831836111745e-06, "loss": 35.3516, "step": 16190 }, { "epoch": 0.15326435758843632, "grad_norm": 757.4456787109375, "learning_rate": 1.921371267641216e-06, "loss": 42.5547, "step": 16191 }, { "epoch": 0.15327382361015135, "grad_norm": 275.6822814941406, "learning_rate": 1.921359350805228e-06, "loss": 19.6953, "step": 16192 }, { "epoch": 0.1532832896318664, "grad_norm": 3.023874521255493, "learning_rate": 1.9213474331032225e-06, "loss": 1.0605, "step": 16193 }, { "epoch": 0.15329275565358147, "grad_norm": 375.99749755859375, "learning_rate": 1.92133551453521e-06, "loss": 46.6211, "step": 16194 }, { "epoch": 0.15330222167529653, "grad_norm": 3.2069520950317383, "learning_rate": 1.921323595101202e-06, "loss": 0.8772, "step": 16195 }, { "epoch": 0.1533116876970116, "grad_norm": 195.20980834960938, "learning_rate": 1.92131167480121e-06, "loss": 22.25, "step": 16196 }, { "epoch": 0.15332115371872662, "grad_norm": 207.5913543701172, "learning_rate": 1.921299753635245e-06, "loss": 16.6016, "step": 16197 }, { "epoch": 0.15333061974044168, "grad_norm": 974.5494384765625, "learning_rate": 1.9212878316033184e-06, "loss": 48.6758, "step": 16198 }, { "epoch": 0.15334008576215674, "grad_norm": 392.8874816894531, "learning_rate": 1.921275908705441e-06, "loss": 34.5469, "step": 16199 }, { "epoch": 0.1533495517838718, "grad_norm": 3.514676570892334, "learning_rate": 1.921263984941624e-06, "loss": 0.9365, "step": 16200 }, { "epoch": 0.15335901780558683, "grad_norm": 328.82684326171875, "learning_rate": 1.9212520603118787e-06, "loss": 56.4219, "step": 16201 }, { "epoch": 0.1533684838273019, "grad_norm": 474.8507995605469, "learning_rate": 1.9212401348162166e-06, "loss": 48.8281, "step": 16202 }, { "epoch": 0.15337794984901695, "grad_norm": 260.08056640625, "learning_rate": 1.921228208454649e-06, "loss": 24.3984, "step": 16203 }, { "epoch": 0.153387415870732, "grad_norm": 691.4534301757812, "learning_rate": 1.921216281227186e-06, "loss": 54.7891, "step": 16204 }, { "epoch": 0.15339688189244707, "grad_norm": 646.875, "learning_rate": 1.92120435313384e-06, "loss": 53.7188, "step": 16205 }, { "epoch": 0.1534063479141621, "grad_norm": 172.63705444335938, "learning_rate": 1.921192424174622e-06, "loss": 16.5625, "step": 16206 }, { "epoch": 0.15341581393587717, "grad_norm": 2.845731496810913, "learning_rate": 1.921180494349543e-06, "loss": 0.8745, "step": 16207 }, { "epoch": 0.15342527995759223, "grad_norm": 427.71417236328125, "learning_rate": 1.921168563658614e-06, "loss": 21.1953, "step": 16208 }, { "epoch": 0.15343474597930729, "grad_norm": 276.7975158691406, "learning_rate": 1.921156632101847e-06, "loss": 20.7656, "step": 16209 }, { "epoch": 0.15344421200102232, "grad_norm": 842.2060546875, "learning_rate": 1.9211446996792524e-06, "loss": 56.0, "step": 16210 }, { "epoch": 0.15345367802273738, "grad_norm": 237.14230346679688, "learning_rate": 1.921132766390842e-06, "loss": 20.3828, "step": 16211 }, { "epoch": 0.15346314404445244, "grad_norm": 315.85504150390625, "learning_rate": 1.921120832236626e-06, "loss": 24.9375, "step": 16212 }, { "epoch": 0.1534726100661675, "grad_norm": 595.228759765625, "learning_rate": 1.921108897216617e-06, "loss": 24.8047, "step": 16213 }, { "epoch": 0.15348207608788256, "grad_norm": 499.7102355957031, "learning_rate": 1.9210969613308252e-06, "loss": 18.7305, "step": 16214 }, { "epoch": 0.1534915421095976, "grad_norm": 541.4906616210938, "learning_rate": 1.9210850245792625e-06, "loss": 21.3984, "step": 16215 }, { "epoch": 0.15350100813131265, "grad_norm": 403.3624572753906, "learning_rate": 1.9210730869619394e-06, "loss": 42.8281, "step": 16216 }, { "epoch": 0.1535104741530277, "grad_norm": 838.8551025390625, "learning_rate": 1.9210611484788678e-06, "loss": 43.6406, "step": 16217 }, { "epoch": 0.15351994017474277, "grad_norm": 588.9100341796875, "learning_rate": 1.9210492091300585e-06, "loss": 59.0938, "step": 16218 }, { "epoch": 0.1535294061964578, "grad_norm": 252.0417022705078, "learning_rate": 1.921037268915523e-06, "loss": 16.3203, "step": 16219 }, { "epoch": 0.15353887221817286, "grad_norm": 462.49920654296875, "learning_rate": 1.9210253278352723e-06, "loss": 16.1172, "step": 16220 }, { "epoch": 0.15354833823988792, "grad_norm": 349.31475830078125, "learning_rate": 1.9210133858893177e-06, "loss": 15.8359, "step": 16221 }, { "epoch": 0.15355780426160298, "grad_norm": 286.87310791015625, "learning_rate": 1.9210014430776705e-06, "loss": 16.9688, "step": 16222 }, { "epoch": 0.15356727028331804, "grad_norm": 375.7095947265625, "learning_rate": 1.920989499400342e-06, "loss": 36.5039, "step": 16223 }, { "epoch": 0.15357673630503307, "grad_norm": 366.8863830566406, "learning_rate": 1.920977554857343e-06, "loss": 17.4844, "step": 16224 }, { "epoch": 0.15358620232674813, "grad_norm": 235.6692352294922, "learning_rate": 1.920965609448685e-06, "loss": 9.4141, "step": 16225 }, { "epoch": 0.1535956683484632, "grad_norm": 2.9796533584594727, "learning_rate": 1.9209536631743796e-06, "loss": 0.8682, "step": 16226 }, { "epoch": 0.15360513437017825, "grad_norm": 264.8631896972656, "learning_rate": 1.9209417160344376e-06, "loss": 18.3906, "step": 16227 }, { "epoch": 0.15361460039189329, "grad_norm": 562.6543579101562, "learning_rate": 1.92092976802887e-06, "loss": 50.0781, "step": 16228 }, { "epoch": 0.15362406641360835, "grad_norm": 415.6836242675781, "learning_rate": 1.9209178191576884e-06, "loss": 18.3711, "step": 16229 }, { "epoch": 0.1536335324353234, "grad_norm": 177.12525939941406, "learning_rate": 1.920905869420904e-06, "loss": 16.2578, "step": 16230 }, { "epoch": 0.15364299845703847, "grad_norm": 349.83013916015625, "learning_rate": 1.9208939188185276e-06, "loss": 30.7969, "step": 16231 }, { "epoch": 0.15365246447875353, "grad_norm": 162.96603393554688, "learning_rate": 1.9208819673505717e-06, "loss": 21.7227, "step": 16232 }, { "epoch": 0.15366193050046856, "grad_norm": 223.2879180908203, "learning_rate": 1.920870015017046e-06, "loss": 19.5312, "step": 16233 }, { "epoch": 0.15367139652218362, "grad_norm": 495.13311767578125, "learning_rate": 1.9208580618179627e-06, "loss": 31.9688, "step": 16234 }, { "epoch": 0.15368086254389868, "grad_norm": 248.5853729248047, "learning_rate": 1.9208461077533324e-06, "loss": 19.8867, "step": 16235 }, { "epoch": 0.15369032856561374, "grad_norm": 524.27099609375, "learning_rate": 1.920834152823167e-06, "loss": 15.8359, "step": 16236 }, { "epoch": 0.15369979458732877, "grad_norm": 370.98968505859375, "learning_rate": 1.920822197027477e-06, "loss": 25.6016, "step": 16237 }, { "epoch": 0.15370926060904383, "grad_norm": 709.5507202148438, "learning_rate": 1.9208102403662746e-06, "loss": 36.9531, "step": 16238 }, { "epoch": 0.1537187266307589, "grad_norm": 791.4130859375, "learning_rate": 1.92079828283957e-06, "loss": 50.7969, "step": 16239 }, { "epoch": 0.15372819265247395, "grad_norm": 485.9677429199219, "learning_rate": 1.920786324447375e-06, "loss": 37.4531, "step": 16240 }, { "epoch": 0.153737658674189, "grad_norm": 386.917724609375, "learning_rate": 1.920774365189701e-06, "loss": 27.6719, "step": 16241 }, { "epoch": 0.15374712469590404, "grad_norm": 3.443755626678467, "learning_rate": 1.9207624050665588e-06, "loss": 1.0396, "step": 16242 }, { "epoch": 0.1537565907176191, "grad_norm": 204.73666381835938, "learning_rate": 1.92075044407796e-06, "loss": 20.4062, "step": 16243 }, { "epoch": 0.15376605673933416, "grad_norm": 294.8785705566406, "learning_rate": 1.9207384822239154e-06, "loss": 25.2031, "step": 16244 }, { "epoch": 0.15377552276104922, "grad_norm": 438.7674255371094, "learning_rate": 1.9207265195044368e-06, "loss": 42.5234, "step": 16245 }, { "epoch": 0.15378498878276428, "grad_norm": 441.4047546386719, "learning_rate": 1.9207145559195352e-06, "loss": 24.3906, "step": 16246 }, { "epoch": 0.1537944548044793, "grad_norm": 215.48118591308594, "learning_rate": 1.920702591469222e-06, "loss": 20.1016, "step": 16247 }, { "epoch": 0.15380392082619437, "grad_norm": 203.98814392089844, "learning_rate": 1.9206906261535075e-06, "loss": 26.6953, "step": 16248 }, { "epoch": 0.15381338684790943, "grad_norm": 286.5159912109375, "learning_rate": 1.9206786599724043e-06, "loss": 23.0156, "step": 16249 }, { "epoch": 0.1538228528696245, "grad_norm": 674.1764526367188, "learning_rate": 1.9206666929259226e-06, "loss": 30.6172, "step": 16250 }, { "epoch": 0.15383231889133953, "grad_norm": 316.13922119140625, "learning_rate": 1.9206547250140744e-06, "loss": 17.7734, "step": 16251 }, { "epoch": 0.15384178491305459, "grad_norm": 277.71868896484375, "learning_rate": 1.9206427562368706e-06, "loss": 31.7656, "step": 16252 }, { "epoch": 0.15385125093476965, "grad_norm": 1113.766357421875, "learning_rate": 1.9206307865943227e-06, "loss": 36.9062, "step": 16253 }, { "epoch": 0.1538607169564847, "grad_norm": 407.31500244140625, "learning_rate": 1.9206188160864418e-06, "loss": 31.293, "step": 16254 }, { "epoch": 0.15387018297819977, "grad_norm": 241.64736938476562, "learning_rate": 1.9206068447132387e-06, "loss": 24.5234, "step": 16255 }, { "epoch": 0.1538796489999148, "grad_norm": 392.5383605957031, "learning_rate": 1.9205948724747255e-06, "loss": 25.0898, "step": 16256 }, { "epoch": 0.15388911502162986, "grad_norm": 475.2239074707031, "learning_rate": 1.920582899370913e-06, "loss": 32.7969, "step": 16257 }, { "epoch": 0.15389858104334492, "grad_norm": 567.0758666992188, "learning_rate": 1.920570925401812e-06, "loss": 29.7188, "step": 16258 }, { "epoch": 0.15390804706505998, "grad_norm": 343.9632873535156, "learning_rate": 1.9205589505674344e-06, "loss": 42.6094, "step": 16259 }, { "epoch": 0.153917513086775, "grad_norm": 340.0948486328125, "learning_rate": 1.920546974867791e-06, "loss": 24.5, "step": 16260 }, { "epoch": 0.15392697910849007, "grad_norm": 476.4005432128906, "learning_rate": 1.920534998302894e-06, "loss": 34.6953, "step": 16261 }, { "epoch": 0.15393644513020513, "grad_norm": 1711.4124755859375, "learning_rate": 1.9205230208727533e-06, "loss": 26.6953, "step": 16262 }, { "epoch": 0.1539459111519202, "grad_norm": 487.4019775390625, "learning_rate": 1.9205110425773814e-06, "loss": 41.8125, "step": 16263 }, { "epoch": 0.15395537717363525, "grad_norm": 299.5681457519531, "learning_rate": 1.9204990634167887e-06, "loss": 31.0156, "step": 16264 }, { "epoch": 0.15396484319535028, "grad_norm": 455.4039611816406, "learning_rate": 1.920487083390987e-06, "loss": 58.3125, "step": 16265 }, { "epoch": 0.15397430921706534, "grad_norm": 572.2139892578125, "learning_rate": 1.9204751024999872e-06, "loss": 32.25, "step": 16266 }, { "epoch": 0.1539837752387804, "grad_norm": 487.3323059082031, "learning_rate": 1.9204631207438005e-06, "loss": 21.0781, "step": 16267 }, { "epoch": 0.15399324126049546, "grad_norm": 1247.009033203125, "learning_rate": 1.9204511381224387e-06, "loss": 17.4531, "step": 16268 }, { "epoch": 0.1540027072822105, "grad_norm": 189.97430419921875, "learning_rate": 1.9204391546359123e-06, "loss": 7.7344, "step": 16269 }, { "epoch": 0.15401217330392555, "grad_norm": 298.3226013183594, "learning_rate": 1.9204271702842333e-06, "loss": 31.3828, "step": 16270 }, { "epoch": 0.1540216393256406, "grad_norm": 3.246371030807495, "learning_rate": 1.9204151850674126e-06, "loss": 0.8481, "step": 16271 }, { "epoch": 0.15403110534735567, "grad_norm": 306.8942565917969, "learning_rate": 1.9204031989854612e-06, "loss": 15.8906, "step": 16272 }, { "epoch": 0.15404057136907073, "grad_norm": 806.4746704101562, "learning_rate": 1.920391212038391e-06, "loss": 36.4297, "step": 16273 }, { "epoch": 0.15405003739078577, "grad_norm": 214.61260986328125, "learning_rate": 1.9203792242262127e-06, "loss": 13.0469, "step": 16274 }, { "epoch": 0.15405950341250083, "grad_norm": 253.20541381835938, "learning_rate": 1.920367235548938e-06, "loss": 17.1562, "step": 16275 }, { "epoch": 0.15406896943421589, "grad_norm": 478.59991455078125, "learning_rate": 1.920355246006578e-06, "loss": 18.1641, "step": 16276 }, { "epoch": 0.15407843545593095, "grad_norm": 218.8177032470703, "learning_rate": 1.920343255599144e-06, "loss": 31.5781, "step": 16277 }, { "epoch": 0.15408790147764598, "grad_norm": 182.7889862060547, "learning_rate": 1.920331264326647e-06, "loss": 13.3984, "step": 16278 }, { "epoch": 0.15409736749936104, "grad_norm": 638.2752685546875, "learning_rate": 1.9203192721890985e-06, "loss": 63.6719, "step": 16279 }, { "epoch": 0.1541068335210761, "grad_norm": 620.19921875, "learning_rate": 1.9203072791865098e-06, "loss": 41.0312, "step": 16280 }, { "epoch": 0.15411629954279116, "grad_norm": 1515.0430908203125, "learning_rate": 1.920295285318892e-06, "loss": 62.4766, "step": 16281 }, { "epoch": 0.15412576556450622, "grad_norm": 693.3588256835938, "learning_rate": 1.9202832905862567e-06, "loss": 42.9297, "step": 16282 }, { "epoch": 0.15413523158622125, "grad_norm": 322.6294860839844, "learning_rate": 1.920271294988615e-06, "loss": 38.4219, "step": 16283 }, { "epoch": 0.1541446976079363, "grad_norm": 284.6968078613281, "learning_rate": 1.920259298525978e-06, "loss": 17.4219, "step": 16284 }, { "epoch": 0.15415416362965137, "grad_norm": 370.7577209472656, "learning_rate": 1.920247301198357e-06, "loss": 31.0469, "step": 16285 }, { "epoch": 0.15416362965136643, "grad_norm": 748.88134765625, "learning_rate": 1.9202353030057637e-06, "loss": 38.9375, "step": 16286 }, { "epoch": 0.15417309567308146, "grad_norm": 217.0111846923828, "learning_rate": 1.920223303948209e-06, "loss": 17.6953, "step": 16287 }, { "epoch": 0.15418256169479652, "grad_norm": 873.84423828125, "learning_rate": 1.9202113040257043e-06, "loss": 27.8594, "step": 16288 }, { "epoch": 0.15419202771651158, "grad_norm": 290.7265625, "learning_rate": 1.920199303238261e-06, "loss": 26.9219, "step": 16289 }, { "epoch": 0.15420149373822664, "grad_norm": 475.5423278808594, "learning_rate": 1.92018730158589e-06, "loss": 21.7969, "step": 16290 }, { "epoch": 0.1542109597599417, "grad_norm": 910.0374145507812, "learning_rate": 1.9201752990686026e-06, "loss": 32.3906, "step": 16291 }, { "epoch": 0.15422042578165673, "grad_norm": 3.294243097305298, "learning_rate": 1.9201632956864103e-06, "loss": 0.9409, "step": 16292 }, { "epoch": 0.1542298918033718, "grad_norm": 433.8780212402344, "learning_rate": 1.9201512914393245e-06, "loss": 7.6855, "step": 16293 }, { "epoch": 0.15423935782508685, "grad_norm": 236.78524780273438, "learning_rate": 1.9201392863273563e-06, "loss": 18.5703, "step": 16294 }, { "epoch": 0.1542488238468019, "grad_norm": 630.2100830078125, "learning_rate": 1.9201272803505174e-06, "loss": 34.3359, "step": 16295 }, { "epoch": 0.15425828986851695, "grad_norm": 397.4283752441406, "learning_rate": 1.920115273508818e-06, "loss": 28.4609, "step": 16296 }, { "epoch": 0.154267755890232, "grad_norm": 590.8140869140625, "learning_rate": 1.920103265802271e-06, "loss": 58.1562, "step": 16297 }, { "epoch": 0.15427722191194707, "grad_norm": 254.15878295898438, "learning_rate": 1.920091257230886e-06, "loss": 18.8242, "step": 16298 }, { "epoch": 0.15428668793366213, "grad_norm": 282.3572082519531, "learning_rate": 1.9200792477946756e-06, "loss": 20.3281, "step": 16299 }, { "epoch": 0.15429615395537719, "grad_norm": 1193.404296875, "learning_rate": 1.92006723749365e-06, "loss": 66.8203, "step": 16300 }, { "epoch": 0.15430561997709222, "grad_norm": 526.6163940429688, "learning_rate": 1.9200552263278217e-06, "loss": 23.4062, "step": 16301 }, { "epoch": 0.15431508599880728, "grad_norm": 338.5609130859375, "learning_rate": 1.920043214297201e-06, "loss": 43.5781, "step": 16302 }, { "epoch": 0.15432455202052234, "grad_norm": 203.55650329589844, "learning_rate": 1.9200312014017992e-06, "loss": 22.5938, "step": 16303 }, { "epoch": 0.1543340180422374, "grad_norm": 237.24232482910156, "learning_rate": 1.9200191876416284e-06, "loss": 16.2031, "step": 16304 }, { "epoch": 0.15434348406395243, "grad_norm": 459.88873291015625, "learning_rate": 1.920007173016699e-06, "loss": 18.2344, "step": 16305 }, { "epoch": 0.1543529500856675, "grad_norm": 278.4903564453125, "learning_rate": 1.9199951575270234e-06, "loss": 18.4141, "step": 16306 }, { "epoch": 0.15436241610738255, "grad_norm": 403.04034423828125, "learning_rate": 1.919983141172612e-06, "loss": 34.9922, "step": 16307 }, { "epoch": 0.1543718821290976, "grad_norm": 156.9527130126953, "learning_rate": 1.919971123953476e-06, "loss": 5.9395, "step": 16308 }, { "epoch": 0.15438134815081267, "grad_norm": 182.5918731689453, "learning_rate": 1.919959105869627e-06, "loss": 20.2812, "step": 16309 }, { "epoch": 0.1543908141725277, "grad_norm": 337.80682373046875, "learning_rate": 1.9199470869210763e-06, "loss": 18.4453, "step": 16310 }, { "epoch": 0.15440028019424276, "grad_norm": 2.7976958751678467, "learning_rate": 1.919935067107835e-06, "loss": 0.9351, "step": 16311 }, { "epoch": 0.15440974621595782, "grad_norm": 233.53773498535156, "learning_rate": 1.9199230464299154e-06, "loss": 17.4609, "step": 16312 }, { "epoch": 0.15441921223767288, "grad_norm": 1171.8572998046875, "learning_rate": 1.919911024887327e-06, "loss": 51.7734, "step": 16313 }, { "epoch": 0.1544286782593879, "grad_norm": 213.5642852783203, "learning_rate": 1.9198990024800826e-06, "loss": 17.1875, "step": 16314 }, { "epoch": 0.15443814428110297, "grad_norm": 304.3841247558594, "learning_rate": 1.919886979208193e-06, "loss": 23.4453, "step": 16315 }, { "epoch": 0.15444761030281803, "grad_norm": 379.419921875, "learning_rate": 1.9198749550716695e-06, "loss": 47.7188, "step": 16316 }, { "epoch": 0.1544570763245331, "grad_norm": 230.04103088378906, "learning_rate": 1.919862930070523e-06, "loss": 22.6406, "step": 16317 }, { "epoch": 0.15446654234624815, "grad_norm": 710.1038818359375, "learning_rate": 1.9198509042047654e-06, "loss": 48.9922, "step": 16318 }, { "epoch": 0.15447600836796319, "grad_norm": 527.7965698242188, "learning_rate": 1.919838877474408e-06, "loss": 44.5625, "step": 16319 }, { "epoch": 0.15448547438967825, "grad_norm": 1157.0361328125, "learning_rate": 1.9198268498794617e-06, "loss": 26.8203, "step": 16320 }, { "epoch": 0.1544949404113933, "grad_norm": 535.78173828125, "learning_rate": 1.9198148214199383e-06, "loss": 15.0391, "step": 16321 }, { "epoch": 0.15450440643310837, "grad_norm": 450.09906005859375, "learning_rate": 1.9198027920958483e-06, "loss": 36.0625, "step": 16322 }, { "epoch": 0.1545138724548234, "grad_norm": 372.124267578125, "learning_rate": 1.9197907619072037e-06, "loss": 8.5469, "step": 16323 }, { "epoch": 0.15452333847653846, "grad_norm": 480.92022705078125, "learning_rate": 1.919778730854016e-06, "loss": 53.6406, "step": 16324 }, { "epoch": 0.15453280449825352, "grad_norm": 312.5768737792969, "learning_rate": 1.9197666989362953e-06, "loss": 24.0391, "step": 16325 }, { "epoch": 0.15454227051996858, "grad_norm": 383.5633239746094, "learning_rate": 1.9197546661540544e-06, "loss": 37.375, "step": 16326 }, { "epoch": 0.15455173654168364, "grad_norm": 619.4132080078125, "learning_rate": 1.9197426325073038e-06, "loss": 8.0273, "step": 16327 }, { "epoch": 0.15456120256339867, "grad_norm": 230.07249450683594, "learning_rate": 1.9197305979960548e-06, "loss": 16.2891, "step": 16328 }, { "epoch": 0.15457066858511373, "grad_norm": 172.0858154296875, "learning_rate": 1.9197185626203193e-06, "loss": 17.3594, "step": 16329 }, { "epoch": 0.1545801346068288, "grad_norm": 493.27178955078125, "learning_rate": 1.919706526380108e-06, "loss": 39.5781, "step": 16330 }, { "epoch": 0.15458960062854385, "grad_norm": 225.18907165527344, "learning_rate": 1.919694489275432e-06, "loss": 20.5859, "step": 16331 }, { "epoch": 0.1545990666502589, "grad_norm": 181.7891387939453, "learning_rate": 1.919682451306303e-06, "loss": 18.8047, "step": 16332 }, { "epoch": 0.15460853267197394, "grad_norm": 472.5028076171875, "learning_rate": 1.919670412472733e-06, "loss": 44.6328, "step": 16333 }, { "epoch": 0.154617998693689, "grad_norm": 1273.8048095703125, "learning_rate": 1.9196583727747325e-06, "loss": 21.4375, "step": 16334 }, { "epoch": 0.15462746471540406, "grad_norm": 322.2464294433594, "learning_rate": 1.9196463322123124e-06, "loss": 26.4062, "step": 16335 }, { "epoch": 0.15463693073711912, "grad_norm": 451.5979309082031, "learning_rate": 1.919634290785485e-06, "loss": 22.6562, "step": 16336 }, { "epoch": 0.15464639675883415, "grad_norm": 246.59523010253906, "learning_rate": 1.919622248494261e-06, "loss": 20.1016, "step": 16337 }, { "epoch": 0.1546558627805492, "grad_norm": 154.71571350097656, "learning_rate": 1.919610205338652e-06, "loss": 17.4805, "step": 16338 }, { "epoch": 0.15466532880226427, "grad_norm": 334.3149108886719, "learning_rate": 1.9195981613186693e-06, "loss": 45.0938, "step": 16339 }, { "epoch": 0.15467479482397933, "grad_norm": 343.0191345214844, "learning_rate": 1.919586116434324e-06, "loss": 23.2266, "step": 16340 }, { "epoch": 0.1546842608456944, "grad_norm": 345.5526123046875, "learning_rate": 1.919574070685628e-06, "loss": 21.8438, "step": 16341 }, { "epoch": 0.15469372686740943, "grad_norm": 465.7305603027344, "learning_rate": 1.9195620240725917e-06, "loss": 37.5469, "step": 16342 }, { "epoch": 0.15470319288912449, "grad_norm": 778.9481811523438, "learning_rate": 1.919549976595227e-06, "loss": 43.8516, "step": 16343 }, { "epoch": 0.15471265891083955, "grad_norm": 472.4157409667969, "learning_rate": 1.9195379282535453e-06, "loss": 38.0391, "step": 16344 }, { "epoch": 0.1547221249325546, "grad_norm": 545.0217895507812, "learning_rate": 1.9195258790475575e-06, "loss": 16.9922, "step": 16345 }, { "epoch": 0.15473159095426964, "grad_norm": 192.59535217285156, "learning_rate": 1.9195138289772755e-06, "loss": 17.0859, "step": 16346 }, { "epoch": 0.1547410569759847, "grad_norm": 241.14108276367188, "learning_rate": 1.9195017780427103e-06, "loss": 30.4922, "step": 16347 }, { "epoch": 0.15475052299769976, "grad_norm": 191.71517944335938, "learning_rate": 1.919489726243873e-06, "loss": 18.7578, "step": 16348 }, { "epoch": 0.15475998901941482, "grad_norm": 447.7306823730469, "learning_rate": 1.9194776735807756e-06, "loss": 23.2266, "step": 16349 }, { "epoch": 0.15476945504112988, "grad_norm": 167.48573303222656, "learning_rate": 1.9194656200534285e-06, "loss": 9.6914, "step": 16350 }, { "epoch": 0.1547789210628449, "grad_norm": 390.9641418457031, "learning_rate": 1.9194535656618436e-06, "loss": 51.0312, "step": 16351 }, { "epoch": 0.15478838708455997, "grad_norm": 187.15635681152344, "learning_rate": 1.9194415104060322e-06, "loss": 24.1641, "step": 16352 }, { "epoch": 0.15479785310627503, "grad_norm": 3.0754077434539795, "learning_rate": 1.9194294542860063e-06, "loss": 0.8623, "step": 16353 }, { "epoch": 0.1548073191279901, "grad_norm": 187.8343505859375, "learning_rate": 1.9194173973017755e-06, "loss": 17.5938, "step": 16354 }, { "epoch": 0.15481678514970512, "grad_norm": 518.291748046875, "learning_rate": 1.9194053394533526e-06, "loss": 36.4453, "step": 16355 }, { "epoch": 0.15482625117142018, "grad_norm": 602.2943115234375, "learning_rate": 1.9193932807407485e-06, "loss": 54.8906, "step": 16356 }, { "epoch": 0.15483571719313524, "grad_norm": 343.6151428222656, "learning_rate": 1.9193812211639747e-06, "loss": 8.2715, "step": 16357 }, { "epoch": 0.1548451832148503, "grad_norm": 319.1268005371094, "learning_rate": 1.919369160723042e-06, "loss": 6.582, "step": 16358 }, { "epoch": 0.15485464923656536, "grad_norm": 187.23049926757812, "learning_rate": 1.9193570994179626e-06, "loss": 22.3281, "step": 16359 }, { "epoch": 0.1548641152582804, "grad_norm": 3.2234842777252197, "learning_rate": 1.9193450372487467e-06, "loss": 0.8984, "step": 16360 }, { "epoch": 0.15487358127999545, "grad_norm": 520.8546752929688, "learning_rate": 1.919332974215407e-06, "loss": 17.8359, "step": 16361 }, { "epoch": 0.1548830473017105, "grad_norm": 382.053955078125, "learning_rate": 1.919320910317953e-06, "loss": 42.8438, "step": 16362 }, { "epoch": 0.15489251332342557, "grad_norm": 716.7582397460938, "learning_rate": 1.9193088455563984e-06, "loss": 44.0078, "step": 16363 }, { "epoch": 0.1549019793451406, "grad_norm": 506.0771484375, "learning_rate": 1.9192967799307523e-06, "loss": 28.875, "step": 16364 }, { "epoch": 0.15491144536685567, "grad_norm": 452.3298645019531, "learning_rate": 1.9192847134410275e-06, "loss": 31.1406, "step": 16365 }, { "epoch": 0.15492091138857073, "grad_norm": 350.7535095214844, "learning_rate": 1.919272646087235e-06, "loss": 31.5078, "step": 16366 }, { "epoch": 0.15493037741028579, "grad_norm": 187.03021240234375, "learning_rate": 1.9192605778693857e-06, "loss": 21.1641, "step": 16367 }, { "epoch": 0.15493984343200085, "grad_norm": 182.61865234375, "learning_rate": 1.9192485087874916e-06, "loss": 26.2266, "step": 16368 }, { "epoch": 0.15494930945371588, "grad_norm": 493.9795837402344, "learning_rate": 1.9192364388415633e-06, "loss": 39.3906, "step": 16369 }, { "epoch": 0.15495877547543094, "grad_norm": 273.8565368652344, "learning_rate": 1.919224368031613e-06, "loss": 21.3438, "step": 16370 }, { "epoch": 0.154968241497146, "grad_norm": 190.79254150390625, "learning_rate": 1.919212296357651e-06, "loss": 8.6719, "step": 16371 }, { "epoch": 0.15497770751886106, "grad_norm": 375.5628662109375, "learning_rate": 1.9192002238196897e-06, "loss": 47.5156, "step": 16372 }, { "epoch": 0.1549871735405761, "grad_norm": 539.9116821289062, "learning_rate": 1.91918815041774e-06, "loss": 11.8203, "step": 16373 }, { "epoch": 0.15499663956229115, "grad_norm": 3.299675226211548, "learning_rate": 1.919176076151813e-06, "loss": 0.9844, "step": 16374 }, { "epoch": 0.1550061055840062, "grad_norm": 248.8745574951172, "learning_rate": 1.9191640010219207e-06, "loss": 15.7578, "step": 16375 }, { "epoch": 0.15501557160572127, "grad_norm": 439.4442138671875, "learning_rate": 1.9191519250280734e-06, "loss": 23.4453, "step": 16376 }, { "epoch": 0.15502503762743633, "grad_norm": 440.1876525878906, "learning_rate": 1.919139848170284e-06, "loss": 44.3906, "step": 16377 }, { "epoch": 0.15503450364915136, "grad_norm": 841.2374877929688, "learning_rate": 1.919127770448562e-06, "loss": 47.25, "step": 16378 }, { "epoch": 0.15504396967086642, "grad_norm": 932.9439697265625, "learning_rate": 1.9191156918629197e-06, "loss": 45.8438, "step": 16379 }, { "epoch": 0.15505343569258148, "grad_norm": 378.23980712890625, "learning_rate": 1.919103612413369e-06, "loss": 19.6875, "step": 16380 }, { "epoch": 0.15506290171429654, "grad_norm": 611.8768920898438, "learning_rate": 1.9190915320999204e-06, "loss": 21.9922, "step": 16381 }, { "epoch": 0.15507236773601157, "grad_norm": 879.3230590820312, "learning_rate": 1.919079450922586e-06, "loss": 67.457, "step": 16382 }, { "epoch": 0.15508183375772663, "grad_norm": 330.8801574707031, "learning_rate": 1.919067368881376e-06, "loss": 41.6875, "step": 16383 }, { "epoch": 0.1550912997794417, "grad_norm": 207.39588928222656, "learning_rate": 1.9190552859763028e-06, "loss": 8.9961, "step": 16384 }, { "epoch": 0.15510076580115675, "grad_norm": 261.1646423339844, "learning_rate": 1.9190432022073774e-06, "loss": 16.8633, "step": 16385 }, { "epoch": 0.1551102318228718, "grad_norm": 430.41510009765625, "learning_rate": 1.9190311175746114e-06, "loss": 28.9062, "step": 16386 }, { "epoch": 0.15511969784458685, "grad_norm": 647.6255493164062, "learning_rate": 1.9190190320780157e-06, "loss": 34.1719, "step": 16387 }, { "epoch": 0.1551291638663019, "grad_norm": 364.625, "learning_rate": 1.919006945717602e-06, "loss": 21.6953, "step": 16388 }, { "epoch": 0.15513862988801697, "grad_norm": 308.61419677734375, "learning_rate": 1.9189948584933814e-06, "loss": 22.5938, "step": 16389 }, { "epoch": 0.15514809590973203, "grad_norm": 531.9016723632812, "learning_rate": 1.9189827704053655e-06, "loss": 25.6953, "step": 16390 }, { "epoch": 0.15515756193144706, "grad_norm": 815.558837890625, "learning_rate": 1.918970681453566e-06, "loss": 50.7188, "step": 16391 }, { "epoch": 0.15516702795316212, "grad_norm": 353.7265319824219, "learning_rate": 1.9189585916379933e-06, "loss": 16.1797, "step": 16392 }, { "epoch": 0.15517649397487718, "grad_norm": 302.4866943359375, "learning_rate": 1.9189465009586593e-06, "loss": 23.1562, "step": 16393 }, { "epoch": 0.15518595999659224, "grad_norm": 420.8631286621094, "learning_rate": 1.9189344094155753e-06, "loss": 53.7188, "step": 16394 }, { "epoch": 0.1551954260183073, "grad_norm": 488.3992614746094, "learning_rate": 1.918922317008753e-06, "loss": 45.6562, "step": 16395 }, { "epoch": 0.15520489204002233, "grad_norm": 165.83763122558594, "learning_rate": 1.9189102237382035e-06, "loss": 15.0469, "step": 16396 }, { "epoch": 0.1552143580617374, "grad_norm": 462.24609375, "learning_rate": 1.918898129603938e-06, "loss": 48.9531, "step": 16397 }, { "epoch": 0.15522382408345245, "grad_norm": 382.1523742675781, "learning_rate": 1.918886034605968e-06, "loss": 48.1875, "step": 16398 }, { "epoch": 0.1552332901051675, "grad_norm": 579.11572265625, "learning_rate": 1.9188739387443053e-06, "loss": 27.7734, "step": 16399 }, { "epoch": 0.15524275612688254, "grad_norm": 272.75494384765625, "learning_rate": 1.9188618420189607e-06, "loss": 15.3125, "step": 16400 }, { "epoch": 0.1552522221485976, "grad_norm": 220.265869140625, "learning_rate": 1.9188497444299456e-06, "loss": 14.9102, "step": 16401 }, { "epoch": 0.15526168817031266, "grad_norm": 877.8680419921875, "learning_rate": 1.9188376459772713e-06, "loss": 30.4062, "step": 16402 }, { "epoch": 0.15527115419202772, "grad_norm": 469.42327880859375, "learning_rate": 1.91882554666095e-06, "loss": 46.9062, "step": 16403 }, { "epoch": 0.15528062021374278, "grad_norm": 442.28564453125, "learning_rate": 1.9188134464809923e-06, "loss": 28.7109, "step": 16404 }, { "epoch": 0.1552900862354578, "grad_norm": 183.0595245361328, "learning_rate": 1.9188013454374094e-06, "loss": 24.9531, "step": 16405 }, { "epoch": 0.15529955225717287, "grad_norm": 381.1760559082031, "learning_rate": 1.918789243530213e-06, "loss": 31.8828, "step": 16406 }, { "epoch": 0.15530901827888793, "grad_norm": 298.19024658203125, "learning_rate": 1.918777140759415e-06, "loss": 21.0859, "step": 16407 }, { "epoch": 0.155318484300603, "grad_norm": 851.986328125, "learning_rate": 1.9187650371250257e-06, "loss": 23.6641, "step": 16408 }, { "epoch": 0.15532795032231803, "grad_norm": 474.7716979980469, "learning_rate": 1.9187529326270573e-06, "loss": 44.0625, "step": 16409 }, { "epoch": 0.15533741634403309, "grad_norm": 239.886474609375, "learning_rate": 1.9187408272655206e-06, "loss": 24.1016, "step": 16410 }, { "epoch": 0.15534688236574815, "grad_norm": 314.10858154296875, "learning_rate": 1.918728721040428e-06, "loss": 52.625, "step": 16411 }, { "epoch": 0.1553563483874632, "grad_norm": 363.9648132324219, "learning_rate": 1.9187166139517893e-06, "loss": 34.125, "step": 16412 }, { "epoch": 0.15536581440917827, "grad_norm": 537.5307006835938, "learning_rate": 1.918704505999617e-06, "loss": 43.9844, "step": 16413 }, { "epoch": 0.1553752804308933, "grad_norm": 770.3675537109375, "learning_rate": 1.918692397183923e-06, "loss": 55.9688, "step": 16414 }, { "epoch": 0.15538474645260836, "grad_norm": 228.67697143554688, "learning_rate": 1.918680287504717e-06, "loss": 17.9141, "step": 16415 }, { "epoch": 0.15539421247432342, "grad_norm": 513.72509765625, "learning_rate": 1.918668176962012e-06, "loss": 25.2031, "step": 16416 }, { "epoch": 0.15540367849603848, "grad_norm": 1324.215087890625, "learning_rate": 1.918656065555818e-06, "loss": 48.6406, "step": 16417 }, { "epoch": 0.15541314451775354, "grad_norm": 379.3255615234375, "learning_rate": 1.918643953286147e-06, "loss": 36.5781, "step": 16418 }, { "epoch": 0.15542261053946857, "grad_norm": 281.9704895019531, "learning_rate": 1.918631840153011e-06, "loss": 17.7188, "step": 16419 }, { "epoch": 0.15543207656118363, "grad_norm": 273.12835693359375, "learning_rate": 1.9186197261564206e-06, "loss": 15.2305, "step": 16420 }, { "epoch": 0.1554415425828987, "grad_norm": 376.79083251953125, "learning_rate": 1.9186076112963877e-06, "loss": 46.2656, "step": 16421 }, { "epoch": 0.15545100860461375, "grad_norm": 909.6356811523438, "learning_rate": 1.918595495572923e-06, "loss": 59.3281, "step": 16422 }, { "epoch": 0.15546047462632878, "grad_norm": 841.6072387695312, "learning_rate": 1.9185833789860385e-06, "loss": 46.4375, "step": 16423 }, { "epoch": 0.15546994064804384, "grad_norm": 487.0523986816406, "learning_rate": 1.9185712615357453e-06, "loss": 10.3242, "step": 16424 }, { "epoch": 0.1554794066697589, "grad_norm": 206.18630981445312, "learning_rate": 1.9185591432220546e-06, "loss": 15.7266, "step": 16425 }, { "epoch": 0.15548887269147396, "grad_norm": 260.0870361328125, "learning_rate": 1.9185470240449785e-06, "loss": 28.1719, "step": 16426 }, { "epoch": 0.15549833871318902, "grad_norm": 338.20843505859375, "learning_rate": 1.9185349040045277e-06, "loss": 20.7656, "step": 16427 }, { "epoch": 0.15550780473490405, "grad_norm": 377.23675537109375, "learning_rate": 1.9185227831007144e-06, "loss": 52.0469, "step": 16428 }, { "epoch": 0.1555172707566191, "grad_norm": 477.5021667480469, "learning_rate": 1.918510661333549e-06, "loss": 32.3438, "step": 16429 }, { "epoch": 0.15552673677833417, "grad_norm": 437.10614013671875, "learning_rate": 1.918498538703043e-06, "loss": 41.1094, "step": 16430 }, { "epoch": 0.15553620280004923, "grad_norm": 396.8966369628906, "learning_rate": 1.9184864152092085e-06, "loss": 25.0703, "step": 16431 }, { "epoch": 0.15554566882176427, "grad_norm": 268.1622314453125, "learning_rate": 1.9184742908520563e-06, "loss": 19.7031, "step": 16432 }, { "epoch": 0.15555513484347933, "grad_norm": 3.605226755142212, "learning_rate": 1.9184621656315984e-06, "loss": 0.9263, "step": 16433 }, { "epoch": 0.15556460086519439, "grad_norm": 389.4281921386719, "learning_rate": 1.9184500395478457e-06, "loss": 30.3281, "step": 16434 }, { "epoch": 0.15557406688690945, "grad_norm": 246.17724609375, "learning_rate": 1.9184379126008094e-06, "loss": 21.1094, "step": 16435 }, { "epoch": 0.1555835329086245, "grad_norm": 2.813278913497925, "learning_rate": 1.9184257847905016e-06, "loss": 0.8062, "step": 16436 }, { "epoch": 0.15559299893033954, "grad_norm": 1023.4330444335938, "learning_rate": 1.918413656116933e-06, "loss": 11.3008, "step": 16437 }, { "epoch": 0.1556024649520546, "grad_norm": 358.2649841308594, "learning_rate": 1.9184015265801155e-06, "loss": 24.5938, "step": 16438 }, { "epoch": 0.15561193097376966, "grad_norm": 680.5565795898438, "learning_rate": 1.91838939618006e-06, "loss": 49.1953, "step": 16439 }, { "epoch": 0.15562139699548472, "grad_norm": 355.8365478515625, "learning_rate": 1.9183772649167787e-06, "loss": 18.5234, "step": 16440 }, { "epoch": 0.15563086301719975, "grad_norm": 3.1078226566314697, "learning_rate": 1.9183651327902824e-06, "loss": 0.8799, "step": 16441 }, { "epoch": 0.1556403290389148, "grad_norm": 692.1497802734375, "learning_rate": 1.9183529998005825e-06, "loss": 53.9062, "step": 16442 }, { "epoch": 0.15564979506062987, "grad_norm": 452.36187744140625, "learning_rate": 1.9183408659476903e-06, "loss": 26.4453, "step": 16443 }, { "epoch": 0.15565926108234493, "grad_norm": 627.5320434570312, "learning_rate": 1.918328731231618e-06, "loss": 20.0625, "step": 16444 }, { "epoch": 0.15566872710406, "grad_norm": 178.78173828125, "learning_rate": 1.9183165956523755e-06, "loss": 19.2969, "step": 16445 }, { "epoch": 0.15567819312577502, "grad_norm": 640.6980590820312, "learning_rate": 1.9183044592099758e-06, "loss": 45.5625, "step": 16446 }, { "epoch": 0.15568765914749008, "grad_norm": 620.7662353515625, "learning_rate": 1.91829232190443e-06, "loss": 59.3125, "step": 16447 }, { "epoch": 0.15569712516920514, "grad_norm": 405.415283203125, "learning_rate": 1.9182801837357486e-06, "loss": 62.7656, "step": 16448 }, { "epoch": 0.1557065911909202, "grad_norm": 413.3269348144531, "learning_rate": 1.9182680447039433e-06, "loss": 16.3672, "step": 16449 }, { "epoch": 0.15571605721263523, "grad_norm": 425.98724365234375, "learning_rate": 1.9182559048090266e-06, "loss": 20.5469, "step": 16450 }, { "epoch": 0.1557255232343503, "grad_norm": 289.1714782714844, "learning_rate": 1.9182437640510084e-06, "loss": 21.8906, "step": 16451 }, { "epoch": 0.15573498925606535, "grad_norm": 304.91912841796875, "learning_rate": 1.9182316224299012e-06, "loss": 26.0703, "step": 16452 }, { "epoch": 0.1557444552777804, "grad_norm": 3.5759549140930176, "learning_rate": 1.918219479945716e-06, "loss": 1.0854, "step": 16453 }, { "epoch": 0.15575392129949547, "grad_norm": 349.3283996582031, "learning_rate": 1.918207336598464e-06, "loss": 47.0, "step": 16454 }, { "epoch": 0.1557633873212105, "grad_norm": 397.14849853515625, "learning_rate": 1.9181951923881567e-06, "loss": 23.3672, "step": 16455 }, { "epoch": 0.15577285334292557, "grad_norm": 264.03912353515625, "learning_rate": 1.918183047314806e-06, "loss": 15.5547, "step": 16456 }, { "epoch": 0.15578231936464063, "grad_norm": 670.0082397460938, "learning_rate": 1.918170901378423e-06, "loss": 31.1641, "step": 16457 }, { "epoch": 0.15579178538635569, "grad_norm": 367.660888671875, "learning_rate": 1.918158754579019e-06, "loss": 24.4688, "step": 16458 }, { "epoch": 0.15580125140807072, "grad_norm": 333.1833190917969, "learning_rate": 1.918146606916605e-06, "loss": 25.5469, "step": 16459 }, { "epoch": 0.15581071742978578, "grad_norm": 922.9368896484375, "learning_rate": 1.9181344583911934e-06, "loss": 36.9688, "step": 16460 }, { "epoch": 0.15582018345150084, "grad_norm": 527.8322143554688, "learning_rate": 1.918122309002795e-06, "loss": 45.125, "step": 16461 }, { "epoch": 0.1558296494732159, "grad_norm": 920.4498291015625, "learning_rate": 1.9181101587514217e-06, "loss": 63.5625, "step": 16462 }, { "epoch": 0.15583911549493096, "grad_norm": 154.07032775878906, "learning_rate": 1.9180980076370844e-06, "loss": 14.6172, "step": 16463 }, { "epoch": 0.155848581516646, "grad_norm": 746.3416137695312, "learning_rate": 1.9180858556597946e-06, "loss": 28.4531, "step": 16464 }, { "epoch": 0.15585804753836105, "grad_norm": 275.1768798828125, "learning_rate": 1.9180737028195637e-06, "loss": 24.8359, "step": 16465 }, { "epoch": 0.1558675135600761, "grad_norm": 970.7776489257812, "learning_rate": 1.9180615491164036e-06, "loss": 38.4219, "step": 16466 }, { "epoch": 0.15587697958179117, "grad_norm": 209.15170288085938, "learning_rate": 1.918049394550325e-06, "loss": 24.0859, "step": 16467 }, { "epoch": 0.1558864456035062, "grad_norm": 847.5222778320312, "learning_rate": 1.9180372391213398e-06, "loss": 56.4844, "step": 16468 }, { "epoch": 0.15589591162522126, "grad_norm": 3.550759792327881, "learning_rate": 1.9180250828294594e-06, "loss": 0.9102, "step": 16469 }, { "epoch": 0.15590537764693632, "grad_norm": 548.6339721679688, "learning_rate": 1.918012925674695e-06, "loss": 20.2188, "step": 16470 }, { "epoch": 0.15591484366865138, "grad_norm": 283.82269287109375, "learning_rate": 1.9180007676570583e-06, "loss": 18.2891, "step": 16471 }, { "epoch": 0.15592430969036644, "grad_norm": 857.8822631835938, "learning_rate": 1.9179886087765605e-06, "loss": 37.0078, "step": 16472 }, { "epoch": 0.15593377571208147, "grad_norm": 3.346806764602661, "learning_rate": 1.917976449033213e-06, "loss": 0.9316, "step": 16473 }, { "epoch": 0.15594324173379653, "grad_norm": 316.6571350097656, "learning_rate": 1.917964288427028e-06, "loss": 30.0781, "step": 16474 }, { "epoch": 0.1559527077555116, "grad_norm": 369.745361328125, "learning_rate": 1.9179521269580157e-06, "loss": 26.8125, "step": 16475 }, { "epoch": 0.15596217377722665, "grad_norm": 496.2659606933594, "learning_rate": 1.917939964626188e-06, "loss": 31.5859, "step": 16476 }, { "epoch": 0.15597163979894169, "grad_norm": 3283.56298828125, "learning_rate": 1.9179278014315568e-06, "loss": 33.5625, "step": 16477 }, { "epoch": 0.15598110582065675, "grad_norm": 438.6822509765625, "learning_rate": 1.9179156373741334e-06, "loss": 39.4609, "step": 16478 }, { "epoch": 0.1559905718423718, "grad_norm": 522.319091796875, "learning_rate": 1.9179034724539285e-06, "loss": 45.7969, "step": 16479 }, { "epoch": 0.15600003786408687, "grad_norm": 357.8663024902344, "learning_rate": 1.917891306670954e-06, "loss": 22.2422, "step": 16480 }, { "epoch": 0.15600950388580193, "grad_norm": 206.53590393066406, "learning_rate": 1.9178791400252217e-06, "loss": 22.207, "step": 16481 }, { "epoch": 0.15601896990751696, "grad_norm": 496.60321044921875, "learning_rate": 1.917866972516743e-06, "loss": 32.2188, "step": 16482 }, { "epoch": 0.15602843592923202, "grad_norm": 321.3220520019531, "learning_rate": 1.9178548041455286e-06, "loss": 16.5234, "step": 16483 }, { "epoch": 0.15603790195094708, "grad_norm": 940.5941162109375, "learning_rate": 1.9178426349115908e-06, "loss": 48.5156, "step": 16484 }, { "epoch": 0.15604736797266214, "grad_norm": 2.6129918098449707, "learning_rate": 1.91783046481494e-06, "loss": 0.8887, "step": 16485 }, { "epoch": 0.15605683399437717, "grad_norm": 298.32244873046875, "learning_rate": 1.9178182938555887e-06, "loss": 26.7188, "step": 16486 }, { "epoch": 0.15606630001609223, "grad_norm": 751.0360107421875, "learning_rate": 1.9178061220335476e-06, "loss": 11.7812, "step": 16487 }, { "epoch": 0.1560757660378073, "grad_norm": 286.6654052734375, "learning_rate": 1.917793949348829e-06, "loss": 21.4141, "step": 16488 }, { "epoch": 0.15608523205952235, "grad_norm": 476.4429016113281, "learning_rate": 1.9177817758014432e-06, "loss": 36.832, "step": 16489 }, { "epoch": 0.1560946980812374, "grad_norm": 227.92446899414062, "learning_rate": 1.9177696013914027e-06, "loss": 16.8438, "step": 16490 }, { "epoch": 0.15610416410295244, "grad_norm": 1441.44091796875, "learning_rate": 1.9177574261187185e-06, "loss": 30.7031, "step": 16491 }, { "epoch": 0.1561136301246675, "grad_norm": 472.5524597167969, "learning_rate": 1.9177452499834014e-06, "loss": 46.7188, "step": 16492 }, { "epoch": 0.15612309614638256, "grad_norm": 363.026611328125, "learning_rate": 1.917733072985464e-06, "loss": 37.8906, "step": 16493 }, { "epoch": 0.15613256216809762, "grad_norm": 333.2865905761719, "learning_rate": 1.917720895124917e-06, "loss": 34.4844, "step": 16494 }, { "epoch": 0.15614202818981265, "grad_norm": 3.355358839035034, "learning_rate": 1.9177087164017724e-06, "loss": 0.8564, "step": 16495 }, { "epoch": 0.1561514942115277, "grad_norm": 586.096435546875, "learning_rate": 1.917696536816041e-06, "loss": 40.875, "step": 16496 }, { "epoch": 0.15616096023324277, "grad_norm": 196.24667358398438, "learning_rate": 1.9176843563677346e-06, "loss": 20.7734, "step": 16497 }, { "epoch": 0.15617042625495783, "grad_norm": 343.7352294921875, "learning_rate": 1.9176721750568645e-06, "loss": 23.7891, "step": 16498 }, { "epoch": 0.1561798922766729, "grad_norm": 718.52685546875, "learning_rate": 1.9176599928834427e-06, "loss": 59.8867, "step": 16499 }, { "epoch": 0.15618935829838793, "grad_norm": 302.9837646484375, "learning_rate": 1.91764780984748e-06, "loss": 24.4297, "step": 16500 }, { "epoch": 0.15619882432010299, "grad_norm": 787.551025390625, "learning_rate": 1.9176356259489874e-06, "loss": 66.2031, "step": 16501 }, { "epoch": 0.15620829034181805, "grad_norm": 650.3306274414062, "learning_rate": 1.9176234411879777e-06, "loss": 36.293, "step": 16502 }, { "epoch": 0.1562177563635331, "grad_norm": 549.24072265625, "learning_rate": 1.9176112555644614e-06, "loss": 49.0781, "step": 16503 }, { "epoch": 0.15622722238524817, "grad_norm": 372.9659423828125, "learning_rate": 1.9175990690784505e-06, "loss": 61.0469, "step": 16504 }, { "epoch": 0.1562366884069632, "grad_norm": 430.0223693847656, "learning_rate": 1.917586881729956e-06, "loss": 39.2188, "step": 16505 }, { "epoch": 0.15624615442867826, "grad_norm": 719.133544921875, "learning_rate": 1.9175746935189894e-06, "loss": 40.5547, "step": 16506 }, { "epoch": 0.15625562045039332, "grad_norm": 232.16043090820312, "learning_rate": 1.917562504445562e-06, "loss": 21.2812, "step": 16507 }, { "epoch": 0.15626508647210838, "grad_norm": 382.67681884765625, "learning_rate": 1.9175503145096864e-06, "loss": 28.5938, "step": 16508 }, { "epoch": 0.1562745524938234, "grad_norm": 219.78182983398438, "learning_rate": 1.9175381237113728e-06, "loss": 15.9961, "step": 16509 }, { "epoch": 0.15628401851553847, "grad_norm": 325.71856689453125, "learning_rate": 1.917525932050633e-06, "loss": 36.4531, "step": 16510 }, { "epoch": 0.15629348453725353, "grad_norm": 331.7279357910156, "learning_rate": 1.917513739527478e-06, "loss": 21.3359, "step": 16511 }, { "epoch": 0.1563029505589686, "grad_norm": 495.0187072753906, "learning_rate": 1.9175015461419205e-06, "loss": 22.2422, "step": 16512 }, { "epoch": 0.15631241658068365, "grad_norm": 277.2162170410156, "learning_rate": 1.917489351893971e-06, "loss": 16.3594, "step": 16513 }, { "epoch": 0.15632188260239868, "grad_norm": 375.4522399902344, "learning_rate": 1.9174771567836414e-06, "loss": 32.4961, "step": 16514 }, { "epoch": 0.15633134862411374, "grad_norm": 270.26275634765625, "learning_rate": 1.9174649608109424e-06, "loss": 18.6328, "step": 16515 }, { "epoch": 0.1563408146458288, "grad_norm": 1607.1103515625, "learning_rate": 1.9174527639758867e-06, "loss": 30.7969, "step": 16516 }, { "epoch": 0.15635028066754386, "grad_norm": 289.5047912597656, "learning_rate": 1.9174405662784846e-06, "loss": 26.2266, "step": 16517 }, { "epoch": 0.1563597466892589, "grad_norm": 413.7156982421875, "learning_rate": 1.9174283677187483e-06, "loss": 45.4219, "step": 16518 }, { "epoch": 0.15636921271097395, "grad_norm": 542.1063232421875, "learning_rate": 1.917416168296689e-06, "loss": 24.2852, "step": 16519 }, { "epoch": 0.156378678732689, "grad_norm": 280.4878234863281, "learning_rate": 1.917403968012318e-06, "loss": 18.0859, "step": 16520 }, { "epoch": 0.15638814475440407, "grad_norm": 382.4866638183594, "learning_rate": 1.9173917668656474e-06, "loss": 24.0312, "step": 16521 }, { "epoch": 0.15639761077611913, "grad_norm": 515.8401489257812, "learning_rate": 1.9173795648566875e-06, "loss": 35.2812, "step": 16522 }, { "epoch": 0.15640707679783417, "grad_norm": 497.1317443847656, "learning_rate": 1.917367361985451e-06, "loss": 22.3359, "step": 16523 }, { "epoch": 0.15641654281954923, "grad_norm": 2.8423643112182617, "learning_rate": 1.917355158251949e-06, "loss": 0.9443, "step": 16524 }, { "epoch": 0.15642600884126429, "grad_norm": 390.5072021484375, "learning_rate": 1.9173429536561923e-06, "loss": 38.9219, "step": 16525 }, { "epoch": 0.15643547486297935, "grad_norm": 214.38699340820312, "learning_rate": 1.917330748198193e-06, "loss": 12.9062, "step": 16526 }, { "epoch": 0.15644494088469438, "grad_norm": 549.4683837890625, "learning_rate": 1.917318541877963e-06, "loss": 33.0781, "step": 16527 }, { "epoch": 0.15645440690640944, "grad_norm": 359.5817565917969, "learning_rate": 1.9173063346955125e-06, "loss": 16.1367, "step": 16528 }, { "epoch": 0.1564638729281245, "grad_norm": 325.8199462890625, "learning_rate": 1.917294126650854e-06, "loss": 18.6562, "step": 16529 }, { "epoch": 0.15647333894983956, "grad_norm": 514.154541015625, "learning_rate": 1.917281917743999e-06, "loss": 22.2188, "step": 16530 }, { "epoch": 0.15648280497155462, "grad_norm": 290.4158630371094, "learning_rate": 1.9172697079749584e-06, "loss": 25.0625, "step": 16531 }, { "epoch": 0.15649227099326965, "grad_norm": 233.24105834960938, "learning_rate": 1.917257497343744e-06, "loss": 22.1719, "step": 16532 }, { "epoch": 0.1565017370149847, "grad_norm": 561.7127685546875, "learning_rate": 1.917245285850367e-06, "loss": 28.0547, "step": 16533 }, { "epoch": 0.15651120303669977, "grad_norm": 249.157958984375, "learning_rate": 1.9172330734948397e-06, "loss": 21.6016, "step": 16534 }, { "epoch": 0.15652066905841483, "grad_norm": 176.96229553222656, "learning_rate": 1.9172208602771724e-06, "loss": 18.7188, "step": 16535 }, { "epoch": 0.15653013508012986, "grad_norm": 347.1831970214844, "learning_rate": 1.9172086461973776e-06, "loss": 23.9297, "step": 16536 }, { "epoch": 0.15653960110184492, "grad_norm": 3.0802738666534424, "learning_rate": 1.917196431255466e-06, "loss": 1.002, "step": 16537 }, { "epoch": 0.15654906712355998, "grad_norm": 540.3087158203125, "learning_rate": 1.9171842154514496e-06, "loss": 40.2344, "step": 16538 }, { "epoch": 0.15655853314527504, "grad_norm": 698.4066162109375, "learning_rate": 1.9171719987853396e-06, "loss": 42.3516, "step": 16539 }, { "epoch": 0.1565679991669901, "grad_norm": 402.79595947265625, "learning_rate": 1.9171597812571475e-06, "loss": 36.3594, "step": 16540 }, { "epoch": 0.15657746518870513, "grad_norm": 226.0202178955078, "learning_rate": 1.917147562866885e-06, "loss": 17.6992, "step": 16541 }, { "epoch": 0.1565869312104202, "grad_norm": 291.8571472167969, "learning_rate": 1.9171353436145637e-06, "loss": 14.9727, "step": 16542 }, { "epoch": 0.15659639723213525, "grad_norm": 475.0023498535156, "learning_rate": 1.9171231235001945e-06, "loss": 39.2969, "step": 16543 }, { "epoch": 0.1566058632538503, "grad_norm": 580.7921142578125, "learning_rate": 1.9171109025237893e-06, "loss": 30.5, "step": 16544 }, { "epoch": 0.15661532927556535, "grad_norm": 434.2157897949219, "learning_rate": 1.91709868068536e-06, "loss": 38.9062, "step": 16545 }, { "epoch": 0.1566247952972804, "grad_norm": 548.2258911132812, "learning_rate": 1.917086457984917e-06, "loss": 44.25, "step": 16546 }, { "epoch": 0.15663426131899547, "grad_norm": 249.34210205078125, "learning_rate": 1.9170742344224722e-06, "loss": 17.9844, "step": 16547 }, { "epoch": 0.15664372734071053, "grad_norm": 375.05352783203125, "learning_rate": 1.9170620099980377e-06, "loss": 45.5, "step": 16548 }, { "epoch": 0.15665319336242559, "grad_norm": 310.9997253417969, "learning_rate": 1.9170497847116243e-06, "loss": 32.9375, "step": 16549 }, { "epoch": 0.15666265938414062, "grad_norm": 471.3978576660156, "learning_rate": 1.917037558563244e-06, "loss": 57.0, "step": 16550 }, { "epoch": 0.15667212540585568, "grad_norm": 501.3362731933594, "learning_rate": 1.917025331552908e-06, "loss": 43.5312, "step": 16551 }, { "epoch": 0.15668159142757074, "grad_norm": 335.9559020996094, "learning_rate": 1.917013103680628e-06, "loss": 7.3125, "step": 16552 }, { "epoch": 0.1566910574492858, "grad_norm": 225.4190673828125, "learning_rate": 1.917000874946415e-06, "loss": 15.4453, "step": 16553 }, { "epoch": 0.15670052347100083, "grad_norm": 186.1197052001953, "learning_rate": 1.916988645350281e-06, "loss": 21.3906, "step": 16554 }, { "epoch": 0.1567099894927159, "grad_norm": 3.2301716804504395, "learning_rate": 1.916976414892237e-06, "loss": 0.9128, "step": 16555 }, { "epoch": 0.15671945551443095, "grad_norm": 719.0043334960938, "learning_rate": 1.9169641835722955e-06, "loss": 34.8516, "step": 16556 }, { "epoch": 0.156728921536146, "grad_norm": 283.2489929199219, "learning_rate": 1.9169519513904673e-06, "loss": 15.6328, "step": 16557 }, { "epoch": 0.15673838755786107, "grad_norm": 557.70654296875, "learning_rate": 1.9169397183467635e-06, "loss": 31.5156, "step": 16558 }, { "epoch": 0.1567478535795761, "grad_norm": 793.2528686523438, "learning_rate": 1.916927484441196e-06, "loss": 42.9453, "step": 16559 }, { "epoch": 0.15675731960129116, "grad_norm": 3.5094308853149414, "learning_rate": 1.9169152496737766e-06, "loss": 0.9731, "step": 16560 }, { "epoch": 0.15676678562300622, "grad_norm": 948.0128173828125, "learning_rate": 1.916903014044516e-06, "loss": 20.0781, "step": 16561 }, { "epoch": 0.15677625164472128, "grad_norm": 529.05322265625, "learning_rate": 1.916890777553427e-06, "loss": 36.2734, "step": 16562 }, { "epoch": 0.15678571766643631, "grad_norm": 308.5867614746094, "learning_rate": 1.9168785402005197e-06, "loss": 18.3594, "step": 16563 }, { "epoch": 0.15679518368815137, "grad_norm": 289.1612854003906, "learning_rate": 1.9168663019858064e-06, "loss": 28.6641, "step": 16564 }, { "epoch": 0.15680464970986643, "grad_norm": 592.051025390625, "learning_rate": 1.9168540629092986e-06, "loss": 20.8242, "step": 16565 }, { "epoch": 0.1568141157315815, "grad_norm": 476.79241943359375, "learning_rate": 1.916841822971007e-06, "loss": 27.3438, "step": 16566 }, { "epoch": 0.15682358175329655, "grad_norm": 2.976701498031616, "learning_rate": 1.9168295821709443e-06, "loss": 0.8879, "step": 16567 }, { "epoch": 0.15683304777501159, "grad_norm": 351.5396728515625, "learning_rate": 1.9168173405091212e-06, "loss": 49.3359, "step": 16568 }, { "epoch": 0.15684251379672665, "grad_norm": 537.328125, "learning_rate": 1.9168050979855497e-06, "loss": 58.3906, "step": 16569 }, { "epoch": 0.1568519798184417, "grad_norm": 335.12884521484375, "learning_rate": 1.9167928546002407e-06, "loss": 30.9844, "step": 16570 }, { "epoch": 0.15686144584015677, "grad_norm": 2165.3369140625, "learning_rate": 1.9167806103532066e-06, "loss": 22.2461, "step": 16571 }, { "epoch": 0.1568709118618718, "grad_norm": 288.483154296875, "learning_rate": 1.916768365244458e-06, "loss": 17.7695, "step": 16572 }, { "epoch": 0.15688037788358686, "grad_norm": 343.6012268066406, "learning_rate": 1.916756119274007e-06, "loss": 19.4766, "step": 16573 }, { "epoch": 0.15688984390530192, "grad_norm": 4.689981460571289, "learning_rate": 1.916743872441864e-06, "loss": 0.981, "step": 16574 }, { "epoch": 0.15689930992701698, "grad_norm": 535.2788696289062, "learning_rate": 1.9167316247480425e-06, "loss": 46.9141, "step": 16575 }, { "epoch": 0.15690877594873204, "grad_norm": 302.0567932128906, "learning_rate": 1.916719376192553e-06, "loss": 8.5781, "step": 16576 }, { "epoch": 0.15691824197044707, "grad_norm": 296.0642395019531, "learning_rate": 1.9167071267754057e-06, "loss": 7.9336, "step": 16577 }, { "epoch": 0.15692770799216213, "grad_norm": 210.5123748779297, "learning_rate": 1.9166948764966144e-06, "loss": 18.9453, "step": 16578 }, { "epoch": 0.1569371740138772, "grad_norm": 753.578125, "learning_rate": 1.916682625356189e-06, "loss": 31.6211, "step": 16579 }, { "epoch": 0.15694664003559225, "grad_norm": 480.4267272949219, "learning_rate": 1.9166703733541417e-06, "loss": 33.8125, "step": 16580 }, { "epoch": 0.15695610605730728, "grad_norm": 363.88690185546875, "learning_rate": 1.916658120490484e-06, "loss": 24.5625, "step": 16581 }, { "epoch": 0.15696557207902234, "grad_norm": 359.9840087890625, "learning_rate": 1.9166458667652275e-06, "loss": 54.3906, "step": 16582 }, { "epoch": 0.1569750381007374, "grad_norm": 637.7329711914062, "learning_rate": 1.916633612178383e-06, "loss": 49.1562, "step": 16583 }, { "epoch": 0.15698450412245246, "grad_norm": 238.236328125, "learning_rate": 1.916621356729963e-06, "loss": 26.5078, "step": 16584 }, { "epoch": 0.15699397014416752, "grad_norm": 525.8947143554688, "learning_rate": 1.9166091004199784e-06, "loss": 58.5938, "step": 16585 }, { "epoch": 0.15700343616588255, "grad_norm": 3.1564717292785645, "learning_rate": 1.9165968432484408e-06, "loss": 1.0493, "step": 16586 }, { "epoch": 0.1570129021875976, "grad_norm": 287.0762939453125, "learning_rate": 1.916584585215362e-06, "loss": 17.875, "step": 16587 }, { "epoch": 0.15702236820931267, "grad_norm": 277.54901123046875, "learning_rate": 1.916572326320753e-06, "loss": 9.0898, "step": 16588 }, { "epoch": 0.15703183423102773, "grad_norm": 895.5343627929688, "learning_rate": 1.9165600665646256e-06, "loss": 6.7734, "step": 16589 }, { "epoch": 0.15704130025274277, "grad_norm": 182.8898162841797, "learning_rate": 1.9165478059469916e-06, "loss": 21.6016, "step": 16590 }, { "epoch": 0.15705076627445783, "grad_norm": 3.121965169906616, "learning_rate": 1.9165355444678624e-06, "loss": 0.8115, "step": 16591 }, { "epoch": 0.15706023229617289, "grad_norm": 1630.626708984375, "learning_rate": 1.916523282127249e-06, "loss": 43.4648, "step": 16592 }, { "epoch": 0.15706969831788795, "grad_norm": 427.5636291503906, "learning_rate": 1.916511018925164e-06, "loss": 27.1797, "step": 16593 }, { "epoch": 0.157079164339603, "grad_norm": 614.0263061523438, "learning_rate": 1.9164987548616177e-06, "loss": 52.0703, "step": 16594 }, { "epoch": 0.15708863036131804, "grad_norm": 532.046630859375, "learning_rate": 1.916486489936622e-06, "loss": 37.875, "step": 16595 }, { "epoch": 0.1570980963830331, "grad_norm": 400.97015380859375, "learning_rate": 1.9164742241501894e-06, "loss": 49.0781, "step": 16596 }, { "epoch": 0.15710756240474816, "grad_norm": 209.12342834472656, "learning_rate": 1.91646195750233e-06, "loss": 8.6914, "step": 16597 }, { "epoch": 0.15711702842646322, "grad_norm": 306.1680908203125, "learning_rate": 1.9164496899930565e-06, "loss": 40.9531, "step": 16598 }, { "epoch": 0.15712649444817828, "grad_norm": 3.032686233520508, "learning_rate": 1.916437421622379e-06, "loss": 0.8828, "step": 16599 }, { "epoch": 0.1571359604698933, "grad_norm": 468.6991271972656, "learning_rate": 1.916425152390311e-06, "loss": 24.4375, "step": 16600 }, { "epoch": 0.15714542649160837, "grad_norm": 502.57818603515625, "learning_rate": 1.9164128822968625e-06, "loss": 54.4844, "step": 16601 }, { "epoch": 0.15715489251332343, "grad_norm": 218.59664916992188, "learning_rate": 1.9164006113420456e-06, "loss": 19.1406, "step": 16602 }, { "epoch": 0.1571643585350385, "grad_norm": 886.2828979492188, "learning_rate": 1.9163883395258717e-06, "loss": 45.0625, "step": 16603 }, { "epoch": 0.15717382455675352, "grad_norm": 242.91552734375, "learning_rate": 1.916376066848352e-06, "loss": 18.8047, "step": 16604 }, { "epoch": 0.15718329057846858, "grad_norm": 229.9532470703125, "learning_rate": 1.916363793309499e-06, "loss": 23.3984, "step": 16605 }, { "epoch": 0.15719275660018364, "grad_norm": 154.60174560546875, "learning_rate": 1.9163515189093234e-06, "loss": 18.3984, "step": 16606 }, { "epoch": 0.1572022226218987, "grad_norm": 1134.1595458984375, "learning_rate": 1.916339243647837e-06, "loss": 23.6562, "step": 16607 }, { "epoch": 0.15721168864361376, "grad_norm": 321.1361083984375, "learning_rate": 1.9163269675250514e-06, "loss": 19.2656, "step": 16608 }, { "epoch": 0.1572211546653288, "grad_norm": 329.7748718261719, "learning_rate": 1.916314690540978e-06, "loss": 31.5781, "step": 16609 }, { "epoch": 0.15723062068704385, "grad_norm": 677.8848266601562, "learning_rate": 1.9163024126956284e-06, "loss": 33.5312, "step": 16610 }, { "epoch": 0.1572400867087589, "grad_norm": 350.4939270019531, "learning_rate": 1.916290133989014e-06, "loss": 36.875, "step": 16611 }, { "epoch": 0.15724955273047397, "grad_norm": 375.0602111816406, "learning_rate": 1.9162778544211466e-06, "loss": 35.0078, "step": 16612 }, { "epoch": 0.157259018752189, "grad_norm": 685.34130859375, "learning_rate": 1.9162655739920375e-06, "loss": 31.9062, "step": 16613 }, { "epoch": 0.15726848477390407, "grad_norm": 198.237548828125, "learning_rate": 1.9162532927016983e-06, "loss": 20.1719, "step": 16614 }, { "epoch": 0.15727795079561913, "grad_norm": 539.9365234375, "learning_rate": 1.9162410105501414e-06, "loss": 21.5547, "step": 16615 }, { "epoch": 0.15728741681733419, "grad_norm": 609.3953247070312, "learning_rate": 1.916228727537377e-06, "loss": 46.6875, "step": 16616 }, { "epoch": 0.15729688283904925, "grad_norm": 228.87779235839844, "learning_rate": 1.916216443663417e-06, "loss": 19.6758, "step": 16617 }, { "epoch": 0.15730634886076428, "grad_norm": 392.754150390625, "learning_rate": 1.916204158928273e-06, "loss": 46.3047, "step": 16618 }, { "epoch": 0.15731581488247934, "grad_norm": 206.27879333496094, "learning_rate": 1.9161918733319573e-06, "loss": 16.6875, "step": 16619 }, { "epoch": 0.1573252809041944, "grad_norm": 483.047607421875, "learning_rate": 1.9161795868744804e-06, "loss": 18.0469, "step": 16620 }, { "epoch": 0.15733474692590946, "grad_norm": 3.042423963546753, "learning_rate": 1.9161672995558544e-06, "loss": 0.9089, "step": 16621 }, { "epoch": 0.1573442129476245, "grad_norm": 686.5133666992188, "learning_rate": 1.916155011376091e-06, "loss": 53.8438, "step": 16622 }, { "epoch": 0.15735367896933955, "grad_norm": 403.2264099121094, "learning_rate": 1.916142722335201e-06, "loss": 39.0234, "step": 16623 }, { "epoch": 0.1573631449910546, "grad_norm": 173.27500915527344, "learning_rate": 1.9161304324331972e-06, "loss": 20.2031, "step": 16624 }, { "epoch": 0.15737261101276967, "grad_norm": 637.0682983398438, "learning_rate": 1.9161181416700897e-06, "loss": 50.3438, "step": 16625 }, { "epoch": 0.15738207703448473, "grad_norm": 436.74090576171875, "learning_rate": 1.916105850045891e-06, "loss": 16.7031, "step": 16626 }, { "epoch": 0.15739154305619976, "grad_norm": 3.2130258083343506, "learning_rate": 1.916093557560612e-06, "loss": 0.843, "step": 16627 }, { "epoch": 0.15740100907791482, "grad_norm": 457.4765930175781, "learning_rate": 1.916081264214265e-06, "loss": 35.4922, "step": 16628 }, { "epoch": 0.15741047509962988, "grad_norm": 733.1608276367188, "learning_rate": 1.9160689700068614e-06, "loss": 56.3125, "step": 16629 }, { "epoch": 0.15741994112134494, "grad_norm": 838.9122314453125, "learning_rate": 1.9160566749384125e-06, "loss": 50.8008, "step": 16630 }, { "epoch": 0.15742940714305997, "grad_norm": 507.49365234375, "learning_rate": 1.9160443790089295e-06, "loss": 19.2578, "step": 16631 }, { "epoch": 0.15743887316477503, "grad_norm": 495.65997314453125, "learning_rate": 1.916032082218425e-06, "loss": 31.2031, "step": 16632 }, { "epoch": 0.1574483391864901, "grad_norm": 560.3295288085938, "learning_rate": 1.9160197845669096e-06, "loss": 56.5, "step": 16633 }, { "epoch": 0.15745780520820515, "grad_norm": 436.8662109375, "learning_rate": 1.916007486054395e-06, "loss": 51.6875, "step": 16634 }, { "epoch": 0.1574672712299202, "grad_norm": 303.48095703125, "learning_rate": 1.915995186680893e-06, "loss": 32.5391, "step": 16635 }, { "epoch": 0.15747673725163525, "grad_norm": 335.89422607421875, "learning_rate": 1.9159828864464155e-06, "loss": 22.5156, "step": 16636 }, { "epoch": 0.1574862032733503, "grad_norm": 473.04534912109375, "learning_rate": 1.915970585350973e-06, "loss": 36.5938, "step": 16637 }, { "epoch": 0.15749566929506537, "grad_norm": 273.86309814453125, "learning_rate": 1.9159582833945784e-06, "loss": 19.8203, "step": 16638 }, { "epoch": 0.15750513531678043, "grad_norm": 263.9110107421875, "learning_rate": 1.9159459805772424e-06, "loss": 19.4141, "step": 16639 }, { "epoch": 0.15751460133849546, "grad_norm": 350.29425048828125, "learning_rate": 1.9159336768989767e-06, "loss": 17.0469, "step": 16640 }, { "epoch": 0.15752406736021052, "grad_norm": 735.8990478515625, "learning_rate": 1.915921372359793e-06, "loss": 70.7695, "step": 16641 }, { "epoch": 0.15753353338192558, "grad_norm": 427.37255859375, "learning_rate": 1.9159090669597025e-06, "loss": 33.0156, "step": 16642 }, { "epoch": 0.15754299940364064, "grad_norm": 2.9002742767333984, "learning_rate": 1.9158967606987177e-06, "loss": 0.9109, "step": 16643 }, { "epoch": 0.1575524654253557, "grad_norm": 189.94271850585938, "learning_rate": 1.915884453576849e-06, "loss": 19.0312, "step": 16644 }, { "epoch": 0.15756193144707073, "grad_norm": 378.9605407714844, "learning_rate": 1.9158721455941083e-06, "loss": 22.5234, "step": 16645 }, { "epoch": 0.1575713974687858, "grad_norm": 311.0993347167969, "learning_rate": 1.915859836750508e-06, "loss": 16.3867, "step": 16646 }, { "epoch": 0.15758086349050085, "grad_norm": 535.3472290039062, "learning_rate": 1.9158475270460588e-06, "loss": 17.6484, "step": 16647 }, { "epoch": 0.1575903295122159, "grad_norm": 236.158447265625, "learning_rate": 1.9158352164807723e-06, "loss": 21.2188, "step": 16648 }, { "epoch": 0.15759979553393094, "grad_norm": 565.2423095703125, "learning_rate": 1.9158229050546602e-06, "loss": 17.0312, "step": 16649 }, { "epoch": 0.157609261555646, "grad_norm": 367.9898376464844, "learning_rate": 1.915810592767735e-06, "loss": 35.4844, "step": 16650 }, { "epoch": 0.15761872757736106, "grad_norm": 1287.79443359375, "learning_rate": 1.9157982796200067e-06, "loss": 72.8203, "step": 16651 }, { "epoch": 0.15762819359907612, "grad_norm": 313.0833740234375, "learning_rate": 1.9157859656114872e-06, "loss": 19.7266, "step": 16652 }, { "epoch": 0.15763765962079118, "grad_norm": 398.5746154785156, "learning_rate": 1.915773650742189e-06, "loss": 32.9375, "step": 16653 }, { "epoch": 0.15764712564250621, "grad_norm": 240.6494140625, "learning_rate": 1.915761335012123e-06, "loss": 23.3359, "step": 16654 }, { "epoch": 0.15765659166422127, "grad_norm": 206.2313690185547, "learning_rate": 1.9157490184213013e-06, "loss": 24.1406, "step": 16655 }, { "epoch": 0.15766605768593633, "grad_norm": 563.893798828125, "learning_rate": 1.915736700969735e-06, "loss": 14.9766, "step": 16656 }, { "epoch": 0.1576755237076514, "grad_norm": 315.50897216796875, "learning_rate": 1.9157243826574355e-06, "loss": 25.2031, "step": 16657 }, { "epoch": 0.15768498972936643, "grad_norm": 738.5973510742188, "learning_rate": 1.9157120634844147e-06, "loss": 45.75, "step": 16658 }, { "epoch": 0.15769445575108149, "grad_norm": 343.2291259765625, "learning_rate": 1.915699743450684e-06, "loss": 23.0703, "step": 16659 }, { "epoch": 0.15770392177279655, "grad_norm": 319.7482604980469, "learning_rate": 1.9156874225562555e-06, "loss": 17.3945, "step": 16660 }, { "epoch": 0.1577133877945116, "grad_norm": 171.5980987548828, "learning_rate": 1.91567510080114e-06, "loss": 14.9258, "step": 16661 }, { "epoch": 0.15772285381622667, "grad_norm": 198.6282501220703, "learning_rate": 1.9156627781853495e-06, "loss": 17.4922, "step": 16662 }, { "epoch": 0.1577323198379417, "grad_norm": 214.5541534423828, "learning_rate": 1.915650454708896e-06, "loss": 14.7812, "step": 16663 }, { "epoch": 0.15774178585965676, "grad_norm": 276.412353515625, "learning_rate": 1.9156381303717902e-06, "loss": 18.5156, "step": 16664 }, { "epoch": 0.15775125188137182, "grad_norm": 212.73748779296875, "learning_rate": 1.9156258051740443e-06, "loss": 15.1875, "step": 16665 }, { "epoch": 0.15776071790308688, "grad_norm": 153.9167938232422, "learning_rate": 1.9156134791156698e-06, "loss": 17.4688, "step": 16666 }, { "epoch": 0.1577701839248019, "grad_norm": 582.4440307617188, "learning_rate": 1.915601152196678e-06, "loss": 27.8125, "step": 16667 }, { "epoch": 0.15777964994651697, "grad_norm": 318.6811828613281, "learning_rate": 1.9155888244170808e-06, "loss": 19.4062, "step": 16668 }, { "epoch": 0.15778911596823203, "grad_norm": 484.5511474609375, "learning_rate": 1.915576495776889e-06, "loss": 72.9062, "step": 16669 }, { "epoch": 0.1577985819899471, "grad_norm": 795.3704223632812, "learning_rate": 1.915564166276116e-06, "loss": 45.2344, "step": 16670 }, { "epoch": 0.15780804801166215, "grad_norm": 503.2070617675781, "learning_rate": 1.9155518359147717e-06, "loss": 22.1719, "step": 16671 }, { "epoch": 0.15781751403337718, "grad_norm": 371.6351623535156, "learning_rate": 1.915539504692868e-06, "loss": 53.9219, "step": 16672 }, { "epoch": 0.15782698005509224, "grad_norm": 325.371337890625, "learning_rate": 1.9155271726104173e-06, "loss": 23.25, "step": 16673 }, { "epoch": 0.1578364460768073, "grad_norm": 409.01165771484375, "learning_rate": 1.91551483966743e-06, "loss": 37.0156, "step": 16674 }, { "epoch": 0.15784591209852236, "grad_norm": 3.349351167678833, "learning_rate": 1.9155025058639188e-06, "loss": 0.9541, "step": 16675 }, { "epoch": 0.1578553781202374, "grad_norm": 269.1447448730469, "learning_rate": 1.9154901711998944e-06, "loss": 27.1562, "step": 16676 }, { "epoch": 0.15786484414195245, "grad_norm": 450.0188293457031, "learning_rate": 1.9154778356753693e-06, "loss": 28.1445, "step": 16677 }, { "epoch": 0.15787431016366751, "grad_norm": 585.004150390625, "learning_rate": 1.915465499290354e-06, "loss": 63.75, "step": 16678 }, { "epoch": 0.15788377618538257, "grad_norm": 409.2629699707031, "learning_rate": 1.9154531620448612e-06, "loss": 16.8828, "step": 16679 }, { "epoch": 0.15789324220709763, "grad_norm": 261.5265808105469, "learning_rate": 1.9154408239389016e-06, "loss": 18.8438, "step": 16680 }, { "epoch": 0.15790270822881267, "grad_norm": 386.06103515625, "learning_rate": 1.9154284849724873e-06, "loss": 27.5469, "step": 16681 }, { "epoch": 0.15791217425052773, "grad_norm": 3.2375829219818115, "learning_rate": 1.91541614514563e-06, "loss": 0.8945, "step": 16682 }, { "epoch": 0.15792164027224279, "grad_norm": 545.2178344726562, "learning_rate": 1.9154038044583407e-06, "loss": 21.2422, "step": 16683 }, { "epoch": 0.15793110629395785, "grad_norm": 385.0981140136719, "learning_rate": 1.9153914629106316e-06, "loss": 28.2031, "step": 16684 }, { "epoch": 0.1579405723156729, "grad_norm": 407.74755859375, "learning_rate": 1.915379120502514e-06, "loss": 31.4766, "step": 16685 }, { "epoch": 0.15795003833738794, "grad_norm": 2.93766713142395, "learning_rate": 1.9153667772339996e-06, "loss": 0.9463, "step": 16686 }, { "epoch": 0.157959504359103, "grad_norm": 259.8419189453125, "learning_rate": 1.9153544331051e-06, "loss": 23.8281, "step": 16687 }, { "epoch": 0.15796897038081806, "grad_norm": 314.89801025390625, "learning_rate": 1.915342088115827e-06, "loss": 41.0938, "step": 16688 }, { "epoch": 0.15797843640253312, "grad_norm": 608.0072021484375, "learning_rate": 1.9153297422661918e-06, "loss": 37.7109, "step": 16689 }, { "epoch": 0.15798790242424815, "grad_norm": 418.8529357910156, "learning_rate": 1.9153173955562057e-06, "loss": 28.6797, "step": 16690 }, { "epoch": 0.1579973684459632, "grad_norm": 520.9064331054688, "learning_rate": 1.9153050479858814e-06, "loss": 37.5312, "step": 16691 }, { "epoch": 0.15800683446767827, "grad_norm": 461.8236389160156, "learning_rate": 1.9152926995552297e-06, "loss": 36.0312, "step": 16692 }, { "epoch": 0.15801630048939333, "grad_norm": 364.763671875, "learning_rate": 1.9152803502642626e-06, "loss": 9.332, "step": 16693 }, { "epoch": 0.1580257665111084, "grad_norm": 297.52447509765625, "learning_rate": 1.9152680001129913e-06, "loss": 17.543, "step": 16694 }, { "epoch": 0.15803523253282342, "grad_norm": 277.56390380859375, "learning_rate": 1.9152556491014276e-06, "loss": 26.4531, "step": 16695 }, { "epoch": 0.15804469855453848, "grad_norm": 360.039306640625, "learning_rate": 1.9152432972295833e-06, "loss": 29.0547, "step": 16696 }, { "epoch": 0.15805416457625354, "grad_norm": 622.3440551757812, "learning_rate": 1.9152309444974696e-06, "loss": 65.625, "step": 16697 }, { "epoch": 0.1580636305979686, "grad_norm": 261.2226867675781, "learning_rate": 1.9152185909050985e-06, "loss": 14.7852, "step": 16698 }, { "epoch": 0.15807309661968363, "grad_norm": 968.8908081054688, "learning_rate": 1.9152062364524817e-06, "loss": 44.2461, "step": 16699 }, { "epoch": 0.1580825626413987, "grad_norm": 878.2884521484375, "learning_rate": 1.91519388113963e-06, "loss": 26.0156, "step": 16700 }, { "epoch": 0.15809202866311375, "grad_norm": 228.66180419921875, "learning_rate": 1.9151815249665563e-06, "loss": 17.4414, "step": 16701 }, { "epoch": 0.1581014946848288, "grad_norm": 829.703125, "learning_rate": 1.915169167933271e-06, "loss": 56.3047, "step": 16702 }, { "epoch": 0.15811096070654387, "grad_norm": 557.052490234375, "learning_rate": 1.9151568100397865e-06, "loss": 20.5312, "step": 16703 }, { "epoch": 0.1581204267282589, "grad_norm": 3.4059550762176514, "learning_rate": 1.915144451286114e-06, "loss": 0.9795, "step": 16704 }, { "epoch": 0.15812989274997397, "grad_norm": 1066.88037109375, "learning_rate": 1.915132091672265e-06, "loss": 48.3984, "step": 16705 }, { "epoch": 0.15813935877168903, "grad_norm": 272.6195983886719, "learning_rate": 1.9151197311982517e-06, "loss": 15.5195, "step": 16706 }, { "epoch": 0.15814882479340409, "grad_norm": 515.0051879882812, "learning_rate": 1.9151073698640855e-06, "loss": 28.2812, "step": 16707 }, { "epoch": 0.15815829081511912, "grad_norm": 396.6145324707031, "learning_rate": 1.9150950076697776e-06, "loss": 8.7812, "step": 16708 }, { "epoch": 0.15816775683683418, "grad_norm": 2.904782772064209, "learning_rate": 1.91508264461534e-06, "loss": 0.8579, "step": 16709 }, { "epoch": 0.15817722285854924, "grad_norm": 1289.9766845703125, "learning_rate": 1.915070280700784e-06, "loss": 40.8359, "step": 16710 }, { "epoch": 0.1581866888802643, "grad_norm": 587.4703979492188, "learning_rate": 1.915057915926122e-06, "loss": 40.875, "step": 16711 }, { "epoch": 0.15819615490197936, "grad_norm": 197.47503662109375, "learning_rate": 1.9150455502913647e-06, "loss": 20.5859, "step": 16712 }, { "epoch": 0.1582056209236944, "grad_norm": 325.01605224609375, "learning_rate": 1.9150331837965244e-06, "loss": 24.6719, "step": 16713 }, { "epoch": 0.15821508694540945, "grad_norm": 358.17620849609375, "learning_rate": 1.915020816441612e-06, "loss": 26.7578, "step": 16714 }, { "epoch": 0.1582245529671245, "grad_norm": 266.7516174316406, "learning_rate": 1.9150084482266398e-06, "loss": 33.9531, "step": 16715 }, { "epoch": 0.15823401898883957, "grad_norm": 199.58592224121094, "learning_rate": 1.9149960791516195e-06, "loss": 17.6797, "step": 16716 }, { "epoch": 0.1582434850105546, "grad_norm": 245.19932556152344, "learning_rate": 1.9149837092165617e-06, "loss": 25.2031, "step": 16717 }, { "epoch": 0.15825295103226966, "grad_norm": 450.3280029296875, "learning_rate": 1.9149713384214792e-06, "loss": 64.0352, "step": 16718 }, { "epoch": 0.15826241705398472, "grad_norm": 3.350313901901245, "learning_rate": 1.9149589667663834e-06, "loss": 0.8521, "step": 16719 }, { "epoch": 0.15827188307569978, "grad_norm": 438.7167663574219, "learning_rate": 1.9149465942512854e-06, "loss": 46.9062, "step": 16720 }, { "epoch": 0.15828134909741484, "grad_norm": 371.8592224121094, "learning_rate": 1.914934220876197e-06, "loss": 26.7109, "step": 16721 }, { "epoch": 0.15829081511912987, "grad_norm": 381.557861328125, "learning_rate": 1.91492184664113e-06, "loss": 9.2461, "step": 16722 }, { "epoch": 0.15830028114084493, "grad_norm": 456.6440124511719, "learning_rate": 1.914909471546096e-06, "loss": 30.2031, "step": 16723 }, { "epoch": 0.15830974716256, "grad_norm": 267.07965087890625, "learning_rate": 1.914897095591107e-06, "loss": 10.6055, "step": 16724 }, { "epoch": 0.15831921318427505, "grad_norm": 526.7637939453125, "learning_rate": 1.9148847187761735e-06, "loss": 9.9688, "step": 16725 }, { "epoch": 0.15832867920599009, "grad_norm": 234.38644409179688, "learning_rate": 1.9148723411013085e-06, "loss": 27.9922, "step": 16726 }, { "epoch": 0.15833814522770515, "grad_norm": 292.95458984375, "learning_rate": 1.9148599625665226e-06, "loss": 22.5469, "step": 16727 }, { "epoch": 0.1583476112494202, "grad_norm": 824.0736694335938, "learning_rate": 1.9148475831718285e-06, "loss": 38.1641, "step": 16728 }, { "epoch": 0.15835707727113527, "grad_norm": 327.6979064941406, "learning_rate": 1.9148352029172366e-06, "loss": 20.1484, "step": 16729 }, { "epoch": 0.15836654329285033, "grad_norm": 145.2967529296875, "learning_rate": 1.914822821802759e-06, "loss": 15.0547, "step": 16730 }, { "epoch": 0.15837600931456536, "grad_norm": 539.2099609375, "learning_rate": 1.914810439828408e-06, "loss": 57.2109, "step": 16731 }, { "epoch": 0.15838547533628042, "grad_norm": 3.2426750659942627, "learning_rate": 1.9147980569941943e-06, "loss": 0.9316, "step": 16732 }, { "epoch": 0.15839494135799548, "grad_norm": 3.0679337978363037, "learning_rate": 1.9147856733001303e-06, "loss": 0.792, "step": 16733 }, { "epoch": 0.15840440737971054, "grad_norm": 355.65570068359375, "learning_rate": 1.914773288746227e-06, "loss": 20.5156, "step": 16734 }, { "epoch": 0.15841387340142557, "grad_norm": 439.00390625, "learning_rate": 1.9147609033324965e-06, "loss": 48.875, "step": 16735 }, { "epoch": 0.15842333942314063, "grad_norm": 203.95919799804688, "learning_rate": 1.9147485170589503e-06, "loss": 17.9297, "step": 16736 }, { "epoch": 0.1584328054448557, "grad_norm": 150.6885223388672, "learning_rate": 1.9147361299255996e-06, "loss": 17.5625, "step": 16737 }, { "epoch": 0.15844227146657075, "grad_norm": 832.0249633789062, "learning_rate": 1.9147237419324567e-06, "loss": 57.6562, "step": 16738 }, { "epoch": 0.1584517374882858, "grad_norm": 294.4139709472656, "learning_rate": 1.914711353079533e-06, "loss": 28.7031, "step": 16739 }, { "epoch": 0.15846120351000084, "grad_norm": 292.7087097167969, "learning_rate": 1.91469896336684e-06, "loss": 20.5312, "step": 16740 }, { "epoch": 0.1584706695317159, "grad_norm": 3.4786717891693115, "learning_rate": 1.91468657279439e-06, "loss": 0.9482, "step": 16741 }, { "epoch": 0.15848013555343096, "grad_norm": 288.1067199707031, "learning_rate": 1.9146741813621933e-06, "loss": 38.9531, "step": 16742 }, { "epoch": 0.15848960157514602, "grad_norm": 275.5558166503906, "learning_rate": 1.914661789070263e-06, "loss": 44.5781, "step": 16743 }, { "epoch": 0.15849906759686105, "grad_norm": 871.09716796875, "learning_rate": 1.9146493959186098e-06, "loss": 45.5234, "step": 16744 }, { "epoch": 0.15850853361857611, "grad_norm": 217.4517059326172, "learning_rate": 1.9146370019072457e-06, "loss": 18.2188, "step": 16745 }, { "epoch": 0.15851799964029117, "grad_norm": 583.5205688476562, "learning_rate": 1.9146246070361826e-06, "loss": 48.6016, "step": 16746 }, { "epoch": 0.15852746566200623, "grad_norm": 320.3880615234375, "learning_rate": 1.9146122113054314e-06, "loss": 26.8359, "step": 16747 }, { "epoch": 0.1585369316837213, "grad_norm": 306.0249938964844, "learning_rate": 1.9145998147150045e-06, "loss": 29.2734, "step": 16748 }, { "epoch": 0.15854639770543633, "grad_norm": 349.4996337890625, "learning_rate": 1.914587417264913e-06, "loss": 16.4766, "step": 16749 }, { "epoch": 0.15855586372715139, "grad_norm": 327.65399169921875, "learning_rate": 1.9145750189551695e-06, "loss": 41.8594, "step": 16750 }, { "epoch": 0.15856532974886645, "grad_norm": 269.5568542480469, "learning_rate": 1.9145626197857843e-06, "loss": 26.3125, "step": 16751 }, { "epoch": 0.1585747957705815, "grad_norm": 530.5887451171875, "learning_rate": 1.9145502197567704e-06, "loss": 26.8281, "step": 16752 }, { "epoch": 0.15858426179229654, "grad_norm": 587.6936645507812, "learning_rate": 1.9145378188681383e-06, "loss": 15.4102, "step": 16753 }, { "epoch": 0.1585937278140116, "grad_norm": 126.08260345458984, "learning_rate": 1.9145254171199003e-06, "loss": 21.4375, "step": 16754 }, { "epoch": 0.15860319383572666, "grad_norm": 1099.384521484375, "learning_rate": 1.9145130145120675e-06, "loss": 21.7305, "step": 16755 }, { "epoch": 0.15861265985744172, "grad_norm": 449.0958557128906, "learning_rate": 1.9145006110446524e-06, "loss": 9.0586, "step": 16756 }, { "epoch": 0.15862212587915678, "grad_norm": 493.82916259765625, "learning_rate": 1.914488206717666e-06, "loss": 36.2422, "step": 16757 }, { "epoch": 0.1586315919008718, "grad_norm": 308.0479736328125, "learning_rate": 1.9144758015311204e-06, "loss": 27.4922, "step": 16758 }, { "epoch": 0.15864105792258687, "grad_norm": 314.78985595703125, "learning_rate": 1.914463395485027e-06, "loss": 37.8125, "step": 16759 }, { "epoch": 0.15865052394430193, "grad_norm": 687.5770874023438, "learning_rate": 1.9144509885793974e-06, "loss": 45.25, "step": 16760 }, { "epoch": 0.158659989966017, "grad_norm": 302.0760803222656, "learning_rate": 1.9144385808142435e-06, "loss": 41.6641, "step": 16761 }, { "epoch": 0.15866945598773202, "grad_norm": 277.14703369140625, "learning_rate": 1.9144261721895765e-06, "loss": 30.8984, "step": 16762 }, { "epoch": 0.15867892200944708, "grad_norm": 214.63186645507812, "learning_rate": 1.9144137627054086e-06, "loss": 21.2266, "step": 16763 }, { "epoch": 0.15868838803116214, "grad_norm": 162.1470947265625, "learning_rate": 1.9144013523617513e-06, "loss": 16.1641, "step": 16764 }, { "epoch": 0.1586978540528772, "grad_norm": 192.6674346923828, "learning_rate": 1.914388941158616e-06, "loss": 14.1953, "step": 16765 }, { "epoch": 0.15870732007459226, "grad_norm": 206.794921875, "learning_rate": 1.9143765290960147e-06, "loss": 18.0625, "step": 16766 }, { "epoch": 0.1587167860963073, "grad_norm": 332.32342529296875, "learning_rate": 1.914364116173959e-06, "loss": 25.7578, "step": 16767 }, { "epoch": 0.15872625211802235, "grad_norm": 615.2371215820312, "learning_rate": 1.9143517023924606e-06, "loss": 20.5156, "step": 16768 }, { "epoch": 0.15873571813973741, "grad_norm": 323.914794921875, "learning_rate": 1.914339287751531e-06, "loss": 38.7031, "step": 16769 }, { "epoch": 0.15874518416145247, "grad_norm": 247.9381561279297, "learning_rate": 1.914326872251182e-06, "loss": 24.25, "step": 16770 }, { "epoch": 0.15875465018316753, "grad_norm": 259.513427734375, "learning_rate": 1.914314455891425e-06, "loss": 34.1562, "step": 16771 }, { "epoch": 0.15876411620488257, "grad_norm": 1694.21240234375, "learning_rate": 1.9143020386722723e-06, "loss": 22.4766, "step": 16772 }, { "epoch": 0.15877358222659763, "grad_norm": 263.5105895996094, "learning_rate": 1.9142896205937346e-06, "loss": 31.8281, "step": 16773 }, { "epoch": 0.15878304824831269, "grad_norm": 427.7369079589844, "learning_rate": 1.914277201655825e-06, "loss": 36.5625, "step": 16774 }, { "epoch": 0.15879251427002775, "grad_norm": 278.5095520019531, "learning_rate": 1.9142647818585535e-06, "loss": 18.3281, "step": 16775 }, { "epoch": 0.15880198029174278, "grad_norm": 119.75418853759766, "learning_rate": 1.9142523612019326e-06, "loss": 11.7812, "step": 16776 }, { "epoch": 0.15881144631345784, "grad_norm": 752.1878662109375, "learning_rate": 1.9142399396859742e-06, "loss": 45.0312, "step": 16777 }, { "epoch": 0.1588209123351729, "grad_norm": 403.5720520019531, "learning_rate": 1.9142275173106896e-06, "loss": 19.2656, "step": 16778 }, { "epoch": 0.15883037835688796, "grad_norm": 370.3413391113281, "learning_rate": 1.914215094076091e-06, "loss": 17.6797, "step": 16779 }, { "epoch": 0.15883984437860302, "grad_norm": 248.7978515625, "learning_rate": 1.914202669982189e-06, "loss": 19.4141, "step": 16780 }, { "epoch": 0.15884931040031805, "grad_norm": 3.473383903503418, "learning_rate": 1.914190245028997e-06, "loss": 1.0474, "step": 16781 }, { "epoch": 0.1588587764220331, "grad_norm": 460.45263671875, "learning_rate": 1.914177819216525e-06, "loss": 23.4375, "step": 16782 }, { "epoch": 0.15886824244374817, "grad_norm": 497.06591796875, "learning_rate": 1.914165392544785e-06, "loss": 36.125, "step": 16783 }, { "epoch": 0.15887770846546323, "grad_norm": 466.79791259765625, "learning_rate": 1.9141529650137895e-06, "loss": 49.7031, "step": 16784 }, { "epoch": 0.15888717448717826, "grad_norm": 490.60211181640625, "learning_rate": 1.9141405366235493e-06, "loss": 11.5312, "step": 16785 }, { "epoch": 0.15889664050889332, "grad_norm": 274.3878479003906, "learning_rate": 1.914128107374077e-06, "loss": 10.9766, "step": 16786 }, { "epoch": 0.15890610653060838, "grad_norm": 3.437119483947754, "learning_rate": 1.914115677265383e-06, "loss": 0.8721, "step": 16787 }, { "epoch": 0.15891557255232344, "grad_norm": 416.5845642089844, "learning_rate": 1.9141032462974802e-06, "loss": 27.3203, "step": 16788 }, { "epoch": 0.1589250385740385, "grad_norm": 433.0208435058594, "learning_rate": 1.91409081447038e-06, "loss": 14.5234, "step": 16789 }, { "epoch": 0.15893450459575353, "grad_norm": 438.4230651855469, "learning_rate": 1.9140783817840936e-06, "loss": 61.4375, "step": 16790 }, { "epoch": 0.1589439706174686, "grad_norm": 458.2090759277344, "learning_rate": 1.914065948238633e-06, "loss": 19.2148, "step": 16791 }, { "epoch": 0.15895343663918365, "grad_norm": 501.81927490234375, "learning_rate": 1.91405351383401e-06, "loss": 40.9688, "step": 16792 }, { "epoch": 0.15896290266089871, "grad_norm": 282.976806640625, "learning_rate": 1.914041078570236e-06, "loss": 36.5938, "step": 16793 }, { "epoch": 0.15897236868261375, "grad_norm": 1047.6583251953125, "learning_rate": 1.914028642447323e-06, "loss": 42.4688, "step": 16794 }, { "epoch": 0.1589818347043288, "grad_norm": 2.8649582862854004, "learning_rate": 1.9140162054652824e-06, "loss": 0.9424, "step": 16795 }, { "epoch": 0.15899130072604387, "grad_norm": 560.88134765625, "learning_rate": 1.9140037676241265e-06, "loss": 38.5312, "step": 16796 }, { "epoch": 0.15900076674775893, "grad_norm": 3.7380740642547607, "learning_rate": 1.913991328923866e-06, "loss": 1.0503, "step": 16797 }, { "epoch": 0.15901023276947399, "grad_norm": 438.25543212890625, "learning_rate": 1.913978889364513e-06, "loss": 41.1719, "step": 16798 }, { "epoch": 0.15901969879118902, "grad_norm": 405.7065734863281, "learning_rate": 1.9139664489460795e-06, "loss": 40.3125, "step": 16799 }, { "epoch": 0.15902916481290408, "grad_norm": 297.6296081542969, "learning_rate": 1.913954007668577e-06, "loss": 10.5469, "step": 16800 }, { "epoch": 0.15903863083461914, "grad_norm": 829.7300415039062, "learning_rate": 1.9139415655320175e-06, "loss": 8.4609, "step": 16801 }, { "epoch": 0.1590480968563342, "grad_norm": 467.1604919433594, "learning_rate": 1.913929122536412e-06, "loss": 41.8906, "step": 16802 }, { "epoch": 0.15905756287804923, "grad_norm": 421.3735046386719, "learning_rate": 1.9139166786817727e-06, "loss": 38.1172, "step": 16803 }, { "epoch": 0.1590670288997643, "grad_norm": 557.0076293945312, "learning_rate": 1.9139042339681112e-06, "loss": 37.1055, "step": 16804 }, { "epoch": 0.15907649492147935, "grad_norm": 166.70140075683594, "learning_rate": 1.913891788395439e-06, "loss": 18.543, "step": 16805 }, { "epoch": 0.1590859609431944, "grad_norm": 414.6969299316406, "learning_rate": 1.9138793419637684e-06, "loss": 44.6406, "step": 16806 }, { "epoch": 0.15909542696490947, "grad_norm": 3.4006152153015137, "learning_rate": 1.9138668946731104e-06, "loss": 0.856, "step": 16807 }, { "epoch": 0.1591048929866245, "grad_norm": 340.63897705078125, "learning_rate": 1.913854446523477e-06, "loss": 26.3945, "step": 16808 }, { "epoch": 0.15911435900833956, "grad_norm": 226.16517639160156, "learning_rate": 1.9138419975148795e-06, "loss": 18.6211, "step": 16809 }, { "epoch": 0.15912382503005462, "grad_norm": 174.06768798828125, "learning_rate": 1.9138295476473305e-06, "loss": 25.1562, "step": 16810 }, { "epoch": 0.15913329105176968, "grad_norm": 3.0575830936431885, "learning_rate": 1.913817096920841e-06, "loss": 0.9136, "step": 16811 }, { "epoch": 0.15914275707348471, "grad_norm": 392.5357360839844, "learning_rate": 1.9138046453354225e-06, "loss": 41.1875, "step": 16812 }, { "epoch": 0.15915222309519977, "grad_norm": 517.9003295898438, "learning_rate": 1.913792192891088e-06, "loss": 46.2188, "step": 16813 }, { "epoch": 0.15916168911691483, "grad_norm": 783.1094970703125, "learning_rate": 1.9137797395878473e-06, "loss": 57.8438, "step": 16814 }, { "epoch": 0.1591711551386299, "grad_norm": 398.61834716796875, "learning_rate": 1.9137672854257137e-06, "loss": 56.9375, "step": 16815 }, { "epoch": 0.15918062116034495, "grad_norm": 300.68438720703125, "learning_rate": 1.913754830404698e-06, "loss": 31.0547, "step": 16816 }, { "epoch": 0.15919008718205999, "grad_norm": 2904.936279296875, "learning_rate": 1.913742374524812e-06, "loss": 8.8164, "step": 16817 }, { "epoch": 0.15919955320377505, "grad_norm": 785.5325317382812, "learning_rate": 1.913729917786068e-06, "loss": 9.6719, "step": 16818 }, { "epoch": 0.1592090192254901, "grad_norm": 682.7310791015625, "learning_rate": 1.9137174601884778e-06, "loss": 22.0078, "step": 16819 }, { "epoch": 0.15921848524720517, "grad_norm": 1154.6016845703125, "learning_rate": 1.9137050017320514e-06, "loss": 25.5352, "step": 16820 }, { "epoch": 0.1592279512689202, "grad_norm": 223.41993713378906, "learning_rate": 1.9136925424168027e-06, "loss": 24.5625, "step": 16821 }, { "epoch": 0.15923741729063526, "grad_norm": 714.4197387695312, "learning_rate": 1.9136800822427422e-06, "loss": 33.7891, "step": 16822 }, { "epoch": 0.15924688331235032, "grad_norm": 243.70887756347656, "learning_rate": 1.913667621209882e-06, "loss": 25.2578, "step": 16823 }, { "epoch": 0.15925634933406538, "grad_norm": 373.1144714355469, "learning_rate": 1.913655159318233e-06, "loss": 26.875, "step": 16824 }, { "epoch": 0.15926581535578044, "grad_norm": 160.59503173828125, "learning_rate": 1.913642696567808e-06, "loss": 20.5938, "step": 16825 }, { "epoch": 0.15927528137749547, "grad_norm": 470.7149353027344, "learning_rate": 1.9136302329586187e-06, "loss": 42.9453, "step": 16826 }, { "epoch": 0.15928474739921053, "grad_norm": 813.3823852539062, "learning_rate": 1.913617768490676e-06, "loss": 51.25, "step": 16827 }, { "epoch": 0.1592942134209256, "grad_norm": 775.14794921875, "learning_rate": 1.913605303163992e-06, "loss": 50.0781, "step": 16828 }, { "epoch": 0.15930367944264065, "grad_norm": 377.03057861328125, "learning_rate": 1.913592836978579e-06, "loss": 51.5312, "step": 16829 }, { "epoch": 0.15931314546435568, "grad_norm": 319.7200927734375, "learning_rate": 1.9135803699344476e-06, "loss": 14.3906, "step": 16830 }, { "epoch": 0.15932261148607074, "grad_norm": 202.57498168945312, "learning_rate": 1.9135679020316104e-06, "loss": 27.3125, "step": 16831 }, { "epoch": 0.1593320775077858, "grad_norm": 250.65184020996094, "learning_rate": 1.9135554332700784e-06, "loss": 14.5625, "step": 16832 }, { "epoch": 0.15934154352950086, "grad_norm": 239.19822692871094, "learning_rate": 1.913542963649864e-06, "loss": 15.8125, "step": 16833 }, { "epoch": 0.15935100955121592, "grad_norm": 368.4674987792969, "learning_rate": 1.913530493170978e-06, "loss": 18.6289, "step": 16834 }, { "epoch": 0.15936047557293095, "grad_norm": 267.605712890625, "learning_rate": 1.913518021833434e-06, "loss": 16.8203, "step": 16835 }, { "epoch": 0.15936994159464601, "grad_norm": 550.9542846679688, "learning_rate": 1.9135055496372416e-06, "loss": 49.7109, "step": 16836 }, { "epoch": 0.15937940761636107, "grad_norm": 235.47161865234375, "learning_rate": 1.9134930765824138e-06, "loss": 18.0703, "step": 16837 }, { "epoch": 0.15938887363807613, "grad_norm": 250.7138671875, "learning_rate": 1.9134806026689618e-06, "loss": 20.6094, "step": 16838 }, { "epoch": 0.15939833965979117, "grad_norm": 2.096381664276123, "learning_rate": 1.9134681278968974e-06, "loss": 0.6552, "step": 16839 }, { "epoch": 0.15940780568150623, "grad_norm": 360.9614562988281, "learning_rate": 1.9134556522662322e-06, "loss": 22.6797, "step": 16840 }, { "epoch": 0.15941727170322129, "grad_norm": 1087.54638671875, "learning_rate": 1.913443175776979e-06, "loss": 42.3203, "step": 16841 }, { "epoch": 0.15942673772493635, "grad_norm": 292.4931945800781, "learning_rate": 1.9134306984291475e-06, "loss": 19.1797, "step": 16842 }, { "epoch": 0.1594362037466514, "grad_norm": 373.0649108886719, "learning_rate": 1.9134182202227512e-06, "loss": 42.6719, "step": 16843 }, { "epoch": 0.15944566976836644, "grad_norm": 1444.93505859375, "learning_rate": 1.9134057411578007e-06, "loss": 14.5234, "step": 16844 }, { "epoch": 0.1594551357900815, "grad_norm": 413.7614440917969, "learning_rate": 1.913393261234309e-06, "loss": 37.9688, "step": 16845 }, { "epoch": 0.15946460181179656, "grad_norm": 404.218017578125, "learning_rate": 1.9133807804522863e-06, "loss": 19.4844, "step": 16846 }, { "epoch": 0.15947406783351162, "grad_norm": 1287.6866455078125, "learning_rate": 1.9133682988117457e-06, "loss": 58.3594, "step": 16847 }, { "epoch": 0.15948353385522665, "grad_norm": 482.8599548339844, "learning_rate": 1.913355816312698e-06, "loss": 32.2344, "step": 16848 }, { "epoch": 0.1594929998769417, "grad_norm": 352.47015380859375, "learning_rate": 1.913343332955155e-06, "loss": 20.1094, "step": 16849 }, { "epoch": 0.15950246589865677, "grad_norm": 3.843177080154419, "learning_rate": 1.913330848739129e-06, "loss": 1.0, "step": 16850 }, { "epoch": 0.15951193192037183, "grad_norm": 461.7691955566406, "learning_rate": 1.9133183636646314e-06, "loss": 37.4297, "step": 16851 }, { "epoch": 0.1595213979420869, "grad_norm": 571.4805908203125, "learning_rate": 1.9133058777316743e-06, "loss": 46.0938, "step": 16852 }, { "epoch": 0.15953086396380192, "grad_norm": 645.2913818359375, "learning_rate": 1.9132933909402683e-06, "loss": 66.125, "step": 16853 }, { "epoch": 0.15954032998551698, "grad_norm": 654.1763916015625, "learning_rate": 1.9132809032904267e-06, "loss": 38.4531, "step": 16854 }, { "epoch": 0.15954979600723204, "grad_norm": 445.5452880859375, "learning_rate": 1.91326841478216e-06, "loss": 36.7969, "step": 16855 }, { "epoch": 0.1595592620289471, "grad_norm": 402.40789794921875, "learning_rate": 1.9132559254154807e-06, "loss": 16.1328, "step": 16856 }, { "epoch": 0.15956872805066216, "grad_norm": 494.7954406738281, "learning_rate": 1.9132434351903997e-06, "loss": 21.1641, "step": 16857 }, { "epoch": 0.1595781940723772, "grad_norm": 230.38267517089844, "learning_rate": 1.91323094410693e-06, "loss": 24.4453, "step": 16858 }, { "epoch": 0.15958766009409225, "grad_norm": 276.19317626953125, "learning_rate": 1.913218452165082e-06, "loss": 20.1953, "step": 16859 }, { "epoch": 0.15959712611580731, "grad_norm": 250.1018524169922, "learning_rate": 1.913205959364868e-06, "loss": 25.6172, "step": 16860 }, { "epoch": 0.15960659213752237, "grad_norm": 976.7036743164062, "learning_rate": 1.9131934657063006e-06, "loss": 32.7422, "step": 16861 }, { "epoch": 0.1596160581592374, "grad_norm": 516.8192749023438, "learning_rate": 1.91318097118939e-06, "loss": 37.0, "step": 16862 }, { "epoch": 0.15962552418095247, "grad_norm": 789.1393432617188, "learning_rate": 1.9131684758141493e-06, "loss": 19.2109, "step": 16863 }, { "epoch": 0.15963499020266753, "grad_norm": 196.55267333984375, "learning_rate": 1.9131559795805892e-06, "loss": 17.4375, "step": 16864 }, { "epoch": 0.15964445622438259, "grad_norm": 426.2040710449219, "learning_rate": 1.913143482488722e-06, "loss": 16.2344, "step": 16865 }, { "epoch": 0.15965392224609765, "grad_norm": 384.6396789550781, "learning_rate": 1.9131309845385593e-06, "loss": 25.4688, "step": 16866 }, { "epoch": 0.15966338826781268, "grad_norm": 247.65512084960938, "learning_rate": 1.9131184857301127e-06, "loss": 12.7461, "step": 16867 }, { "epoch": 0.15967285428952774, "grad_norm": 244.25123596191406, "learning_rate": 1.9131059860633943e-06, "loss": 25.4297, "step": 16868 }, { "epoch": 0.1596823203112428, "grad_norm": 257.3616943359375, "learning_rate": 1.913093485538416e-06, "loss": 22.2734, "step": 16869 }, { "epoch": 0.15969178633295786, "grad_norm": 2.965759038925171, "learning_rate": 1.9130809841551887e-06, "loss": 0.9893, "step": 16870 }, { "epoch": 0.1597012523546729, "grad_norm": 341.5444641113281, "learning_rate": 1.9130684819137248e-06, "loss": 24.0938, "step": 16871 }, { "epoch": 0.15971071837638795, "grad_norm": 534.2518920898438, "learning_rate": 1.9130559788140365e-06, "loss": 55.2188, "step": 16872 }, { "epoch": 0.159720184398103, "grad_norm": 333.8830261230469, "learning_rate": 1.9130434748561344e-06, "loss": 36.5, "step": 16873 }, { "epoch": 0.15972965041981807, "grad_norm": 2.5867865085601807, "learning_rate": 1.913030970040031e-06, "loss": 0.8126, "step": 16874 }, { "epoch": 0.15973911644153313, "grad_norm": 787.484619140625, "learning_rate": 1.9130184643657376e-06, "loss": 30.3906, "step": 16875 }, { "epoch": 0.15974858246324816, "grad_norm": 477.4033508300781, "learning_rate": 1.9130059578332666e-06, "loss": 42.6562, "step": 16876 }, { "epoch": 0.15975804848496322, "grad_norm": 207.07969665527344, "learning_rate": 1.9129934504426292e-06, "loss": 20.9609, "step": 16877 }, { "epoch": 0.15976751450667828, "grad_norm": 400.5614013671875, "learning_rate": 1.9129809421938374e-06, "loss": 23.1406, "step": 16878 }, { "epoch": 0.15977698052839334, "grad_norm": 252.4215087890625, "learning_rate": 1.912968433086903e-06, "loss": 25.9219, "step": 16879 }, { "epoch": 0.15978644655010837, "grad_norm": 481.4036865234375, "learning_rate": 1.9129559231218373e-06, "loss": 20.9336, "step": 16880 }, { "epoch": 0.15979591257182343, "grad_norm": 593.870361328125, "learning_rate": 1.9129434122986527e-06, "loss": 49.5312, "step": 16881 }, { "epoch": 0.1598053785935385, "grad_norm": 336.33331298828125, "learning_rate": 1.912930900617361e-06, "loss": 40.3594, "step": 16882 }, { "epoch": 0.15981484461525355, "grad_norm": 3.0921714305877686, "learning_rate": 1.912918388077973e-06, "loss": 0.8838, "step": 16883 }, { "epoch": 0.15982431063696861, "grad_norm": 553.4419555664062, "learning_rate": 1.9129058746805015e-06, "loss": 48.6172, "step": 16884 }, { "epoch": 0.15983377665868365, "grad_norm": 285.0718078613281, "learning_rate": 1.9128933604249577e-06, "loss": 29.3125, "step": 16885 }, { "epoch": 0.1598432426803987, "grad_norm": 501.3463439941406, "learning_rate": 1.9128808453113534e-06, "loss": 38.1094, "step": 16886 }, { "epoch": 0.15985270870211377, "grad_norm": 2.746562957763672, "learning_rate": 1.912868329339701e-06, "loss": 0.7812, "step": 16887 }, { "epoch": 0.15986217472382883, "grad_norm": 3.340176582336426, "learning_rate": 1.9128558125100114e-06, "loss": 0.8965, "step": 16888 }, { "epoch": 0.15987164074554386, "grad_norm": 239.31446838378906, "learning_rate": 1.912843294822297e-06, "loss": 25.9453, "step": 16889 }, { "epoch": 0.15988110676725892, "grad_norm": 187.79762268066406, "learning_rate": 1.9128307762765694e-06, "loss": 20.8125, "step": 16890 }, { "epoch": 0.15989057278897398, "grad_norm": 403.58453369140625, "learning_rate": 1.9128182568728394e-06, "loss": 19.4297, "step": 16891 }, { "epoch": 0.15990003881068904, "grad_norm": 283.1717224121094, "learning_rate": 1.9128057366111205e-06, "loss": 21.8203, "step": 16892 }, { "epoch": 0.1599095048324041, "grad_norm": 581.7300415039062, "learning_rate": 1.912793215491423e-06, "loss": 37.1719, "step": 16893 }, { "epoch": 0.15991897085411913, "grad_norm": 3.4857993125915527, "learning_rate": 1.9127806935137597e-06, "loss": 0.853, "step": 16894 }, { "epoch": 0.1599284368758342, "grad_norm": 628.8172607421875, "learning_rate": 1.9127681706781414e-06, "loss": 55.5, "step": 16895 }, { "epoch": 0.15993790289754925, "grad_norm": 395.4135437011719, "learning_rate": 1.9127556469845813e-06, "loss": 18.4453, "step": 16896 }, { "epoch": 0.1599473689192643, "grad_norm": 3.3371591567993164, "learning_rate": 1.9127431224330895e-06, "loss": 0.9829, "step": 16897 }, { "epoch": 0.15995683494097934, "grad_norm": 297.6080322265625, "learning_rate": 1.9127305970236786e-06, "loss": 18.7266, "step": 16898 }, { "epoch": 0.1599663009626944, "grad_norm": 314.972412109375, "learning_rate": 1.9127180707563606e-06, "loss": 18.0234, "step": 16899 }, { "epoch": 0.15997576698440946, "grad_norm": 462.6767883300781, "learning_rate": 1.912705543631147e-06, "loss": 20.0938, "step": 16900 }, { "epoch": 0.15998523300612452, "grad_norm": 342.5396728515625, "learning_rate": 1.9126930156480493e-06, "loss": 25.2734, "step": 16901 }, { "epoch": 0.15999469902783958, "grad_norm": 449.50421142578125, "learning_rate": 1.9126804868070797e-06, "loss": 44.0, "step": 16902 }, { "epoch": 0.16000416504955461, "grad_norm": 849.6061401367188, "learning_rate": 1.91266795710825e-06, "loss": 20.8242, "step": 16903 }, { "epoch": 0.16001363107126967, "grad_norm": 704.3296508789062, "learning_rate": 1.912655426551571e-06, "loss": 73.2812, "step": 16904 }, { "epoch": 0.16002309709298473, "grad_norm": 1293.2872314453125, "learning_rate": 1.912642895137056e-06, "loss": 40.4375, "step": 16905 }, { "epoch": 0.1600325631146998, "grad_norm": 448.6490173339844, "learning_rate": 1.912630362864716e-06, "loss": 18.8906, "step": 16906 }, { "epoch": 0.16004202913641483, "grad_norm": 470.2567138671875, "learning_rate": 1.912617829734563e-06, "loss": 34.0469, "step": 16907 }, { "epoch": 0.1600514951581299, "grad_norm": 206.54473876953125, "learning_rate": 1.9126052957466082e-06, "loss": 13.2383, "step": 16908 }, { "epoch": 0.16006096117984495, "grad_norm": 496.5941162109375, "learning_rate": 1.9125927609008637e-06, "loss": 33.6719, "step": 16909 }, { "epoch": 0.16007042720156, "grad_norm": 273.2015075683594, "learning_rate": 1.9125802251973416e-06, "loss": 16.0039, "step": 16910 }, { "epoch": 0.16007989322327507, "grad_norm": 360.2948303222656, "learning_rate": 1.9125676886360534e-06, "loss": 29.1406, "step": 16911 }, { "epoch": 0.1600893592449901, "grad_norm": 2.364194393157959, "learning_rate": 1.912555151217011e-06, "loss": 0.7612, "step": 16912 }, { "epoch": 0.16009882526670516, "grad_norm": 370.0992431640625, "learning_rate": 1.9125426129402264e-06, "loss": 18.7617, "step": 16913 }, { "epoch": 0.16010829128842022, "grad_norm": 435.2680358886719, "learning_rate": 1.9125300738057105e-06, "loss": 25.5938, "step": 16914 }, { "epoch": 0.16011775731013528, "grad_norm": 261.8534240722656, "learning_rate": 1.9125175338134763e-06, "loss": 21.2734, "step": 16915 }, { "epoch": 0.1601272233318503, "grad_norm": 433.4310302734375, "learning_rate": 1.9125049929635345e-06, "loss": 36.3594, "step": 16916 }, { "epoch": 0.16013668935356537, "grad_norm": 342.3426208496094, "learning_rate": 1.9124924512558977e-06, "loss": 21.7734, "step": 16917 }, { "epoch": 0.16014615537528043, "grad_norm": 266.1431579589844, "learning_rate": 1.9124799086905774e-06, "loss": 22.5312, "step": 16918 }, { "epoch": 0.1601556213969955, "grad_norm": 230.7234344482422, "learning_rate": 1.912467365267585e-06, "loss": 8.1719, "step": 16919 }, { "epoch": 0.16016508741871055, "grad_norm": 272.4124450683594, "learning_rate": 1.9124548209869326e-06, "loss": 17.7656, "step": 16920 }, { "epoch": 0.16017455344042558, "grad_norm": 604.1654052734375, "learning_rate": 1.9124422758486324e-06, "loss": 31.4609, "step": 16921 }, { "epoch": 0.16018401946214064, "grad_norm": 478.78961181640625, "learning_rate": 1.9124297298526956e-06, "loss": 24.7109, "step": 16922 }, { "epoch": 0.1601934854838557, "grad_norm": 365.4930725097656, "learning_rate": 1.9124171829991346e-06, "loss": 27.8945, "step": 16923 }, { "epoch": 0.16020295150557076, "grad_norm": 524.9852905273438, "learning_rate": 1.91240463528796e-06, "loss": 43.375, "step": 16924 }, { "epoch": 0.1602124175272858, "grad_norm": 334.66845703125, "learning_rate": 1.9123920867191847e-06, "loss": 20.3906, "step": 16925 }, { "epoch": 0.16022188354900085, "grad_norm": 248.23683166503906, "learning_rate": 1.9123795372928204e-06, "loss": 21.0391, "step": 16926 }, { "epoch": 0.16023134957071591, "grad_norm": 598.56005859375, "learning_rate": 1.912366987008879e-06, "loss": 28.0, "step": 16927 }, { "epoch": 0.16024081559243097, "grad_norm": 504.17669677734375, "learning_rate": 1.9123544358673716e-06, "loss": 18.6367, "step": 16928 }, { "epoch": 0.16025028161414603, "grad_norm": 338.59442138671875, "learning_rate": 1.9123418838683107e-06, "loss": 29.6094, "step": 16929 }, { "epoch": 0.16025974763586107, "grad_norm": 205.1815185546875, "learning_rate": 1.912329331011707e-06, "loss": 19.7109, "step": 16930 }, { "epoch": 0.16026921365757613, "grad_norm": 349.1948547363281, "learning_rate": 1.9123167772975738e-06, "loss": 33.5938, "step": 16931 }, { "epoch": 0.16027867967929119, "grad_norm": 260.69061279296875, "learning_rate": 1.912304222725922e-06, "loss": 28.2578, "step": 16932 }, { "epoch": 0.16028814570100625, "grad_norm": 218.4563751220703, "learning_rate": 1.9122916672967633e-06, "loss": 19.3516, "step": 16933 }, { "epoch": 0.16029761172272128, "grad_norm": 515.3067016601562, "learning_rate": 1.91227911101011e-06, "loss": 30.2344, "step": 16934 }, { "epoch": 0.16030707774443634, "grad_norm": 349.8551330566406, "learning_rate": 1.912266553865974e-06, "loss": 30.2812, "step": 16935 }, { "epoch": 0.1603165437661514, "grad_norm": 518.0768432617188, "learning_rate": 1.9122539958643666e-06, "loss": 35.5859, "step": 16936 }, { "epoch": 0.16032600978786646, "grad_norm": 384.69476318359375, "learning_rate": 1.9122414370052994e-06, "loss": 21.6797, "step": 16937 }, { "epoch": 0.16033547580958152, "grad_norm": 4059.861083984375, "learning_rate": 1.912228877288785e-06, "loss": 21.1016, "step": 16938 }, { "epoch": 0.16034494183129655, "grad_norm": 390.7470397949219, "learning_rate": 1.912216316714835e-06, "loss": 39.7422, "step": 16939 }, { "epoch": 0.1603544078530116, "grad_norm": 477.29754638671875, "learning_rate": 1.9122037552834603e-06, "loss": 37.7031, "step": 16940 }, { "epoch": 0.16036387387472667, "grad_norm": 565.3533325195312, "learning_rate": 1.912191192994674e-06, "loss": 21.6172, "step": 16941 }, { "epoch": 0.16037333989644173, "grad_norm": 2.887160301208496, "learning_rate": 1.912178629848487e-06, "loss": 0.9512, "step": 16942 }, { "epoch": 0.1603828059181568, "grad_norm": 3.325629949569702, "learning_rate": 1.9121660658449113e-06, "loss": 0.8584, "step": 16943 }, { "epoch": 0.16039227193987182, "grad_norm": 409.7071228027344, "learning_rate": 1.912153500983959e-06, "loss": 17.9688, "step": 16944 }, { "epoch": 0.16040173796158688, "grad_norm": 252.21726989746094, "learning_rate": 1.912140935265642e-06, "loss": 23.0, "step": 16945 }, { "epoch": 0.16041120398330194, "grad_norm": 398.6767883300781, "learning_rate": 1.9121283686899717e-06, "loss": 19.9043, "step": 16946 }, { "epoch": 0.160420670005017, "grad_norm": 375.1625061035156, "learning_rate": 1.91211580125696e-06, "loss": 34.0625, "step": 16947 }, { "epoch": 0.16043013602673203, "grad_norm": 291.8757629394531, "learning_rate": 1.912103232966619e-06, "loss": 34.9141, "step": 16948 }, { "epoch": 0.1604396020484471, "grad_norm": 3.3349342346191406, "learning_rate": 1.91209066381896e-06, "loss": 0.9844, "step": 16949 }, { "epoch": 0.16044906807016215, "grad_norm": 410.27099609375, "learning_rate": 1.912078093813995e-06, "loss": 23.0156, "step": 16950 }, { "epoch": 0.16045853409187721, "grad_norm": 332.38470458984375, "learning_rate": 1.9120655229517368e-06, "loss": 22.7266, "step": 16951 }, { "epoch": 0.16046800011359227, "grad_norm": 220.82229614257812, "learning_rate": 1.9120529512321954e-06, "loss": 23.3125, "step": 16952 }, { "epoch": 0.1604774661353073, "grad_norm": 235.512939453125, "learning_rate": 1.912040378655384e-06, "loss": 16.2031, "step": 16953 }, { "epoch": 0.16048693215702237, "grad_norm": 295.41046142578125, "learning_rate": 1.9120278052213136e-06, "loss": 23.9141, "step": 16954 }, { "epoch": 0.16049639817873743, "grad_norm": 635.16552734375, "learning_rate": 1.912015230929997e-06, "loss": 35.7969, "step": 16955 }, { "epoch": 0.16050586420045249, "grad_norm": 694.1016235351562, "learning_rate": 1.9120026557814447e-06, "loss": 45.2969, "step": 16956 }, { "epoch": 0.16051533022216752, "grad_norm": 210.99777221679688, "learning_rate": 1.9119900797756698e-06, "loss": 21.6562, "step": 16957 }, { "epoch": 0.16052479624388258, "grad_norm": 1657.641357421875, "learning_rate": 1.9119775029126835e-06, "loss": 42.0938, "step": 16958 }, { "epoch": 0.16053426226559764, "grad_norm": 178.64962768554688, "learning_rate": 1.9119649251924973e-06, "loss": 20.0938, "step": 16959 }, { "epoch": 0.1605437282873127, "grad_norm": 300.8656311035156, "learning_rate": 1.9119523466151236e-06, "loss": 14.3203, "step": 16960 }, { "epoch": 0.16055319430902776, "grad_norm": 463.6260986328125, "learning_rate": 1.9119397671805745e-06, "loss": 27.0547, "step": 16961 }, { "epoch": 0.1605626603307428, "grad_norm": 407.12335205078125, "learning_rate": 1.9119271868888607e-06, "loss": 42.9375, "step": 16962 }, { "epoch": 0.16057212635245785, "grad_norm": 372.8177185058594, "learning_rate": 1.911914605739995e-06, "loss": 26.4766, "step": 16963 }, { "epoch": 0.1605815923741729, "grad_norm": 185.95860290527344, "learning_rate": 1.9119020237339885e-06, "loss": 15.7188, "step": 16964 }, { "epoch": 0.16059105839588797, "grad_norm": 273.1447448730469, "learning_rate": 1.9118894408708537e-06, "loss": 17.0234, "step": 16965 }, { "epoch": 0.160600524417603, "grad_norm": 337.231201171875, "learning_rate": 1.911876857150602e-06, "loss": 22.4531, "step": 16966 }, { "epoch": 0.16060999043931806, "grad_norm": 505.4077453613281, "learning_rate": 1.9118642725732455e-06, "loss": 30.7969, "step": 16967 }, { "epoch": 0.16061945646103312, "grad_norm": 199.98495483398438, "learning_rate": 1.911851687138796e-06, "loss": 19.5156, "step": 16968 }, { "epoch": 0.16062892248274818, "grad_norm": 273.60955810546875, "learning_rate": 1.911839100847265e-06, "loss": 21.6016, "step": 16969 }, { "epoch": 0.16063838850446324, "grad_norm": 358.0626220703125, "learning_rate": 1.911826513698665e-06, "loss": 50.457, "step": 16970 }, { "epoch": 0.16064785452617827, "grad_norm": 193.36729431152344, "learning_rate": 1.911813925693007e-06, "loss": 19.8633, "step": 16971 }, { "epoch": 0.16065732054789333, "grad_norm": 200.96035766601562, "learning_rate": 1.911801336830303e-06, "loss": 15.8516, "step": 16972 }, { "epoch": 0.1606667865696084, "grad_norm": 408.78057861328125, "learning_rate": 1.9117887471105658e-06, "loss": 20.3047, "step": 16973 }, { "epoch": 0.16067625259132345, "grad_norm": 186.53636169433594, "learning_rate": 1.911776156533806e-06, "loss": 21.3594, "step": 16974 }, { "epoch": 0.1606857186130385, "grad_norm": 261.13433837890625, "learning_rate": 1.911763565100036e-06, "loss": 16.9453, "step": 16975 }, { "epoch": 0.16069518463475355, "grad_norm": 266.4335021972656, "learning_rate": 1.9117509728092676e-06, "loss": 16.4688, "step": 16976 }, { "epoch": 0.1607046506564686, "grad_norm": 396.63690185546875, "learning_rate": 1.9117383796615127e-06, "loss": 42.2188, "step": 16977 }, { "epoch": 0.16071411667818367, "grad_norm": 2.821974277496338, "learning_rate": 1.9117257856567827e-06, "loss": 1.0352, "step": 16978 }, { "epoch": 0.16072358269989873, "grad_norm": 560.1165771484375, "learning_rate": 1.91171319079509e-06, "loss": 59.7969, "step": 16979 }, { "epoch": 0.16073304872161376, "grad_norm": 609.7498779296875, "learning_rate": 1.9117005950764464e-06, "loss": 50.6094, "step": 16980 }, { "epoch": 0.16074251474332882, "grad_norm": 656.9005126953125, "learning_rate": 1.911687998500863e-06, "loss": 29.3125, "step": 16981 }, { "epoch": 0.16075198076504388, "grad_norm": 324.0623779296875, "learning_rate": 1.911675401068353e-06, "loss": 16.6875, "step": 16982 }, { "epoch": 0.16076144678675894, "grad_norm": 327.7328796386719, "learning_rate": 1.9116628027789266e-06, "loss": 20.8242, "step": 16983 }, { "epoch": 0.16077091280847397, "grad_norm": 999.0296630859375, "learning_rate": 1.911650203632597e-06, "loss": 67.7734, "step": 16984 }, { "epoch": 0.16078037883018903, "grad_norm": 402.6531066894531, "learning_rate": 1.911637603629375e-06, "loss": 50.0156, "step": 16985 }, { "epoch": 0.1607898448519041, "grad_norm": 2.9536476135253906, "learning_rate": 1.9116250027692735e-06, "loss": 0.8442, "step": 16986 }, { "epoch": 0.16079931087361915, "grad_norm": 367.35345458984375, "learning_rate": 1.9116124010523035e-06, "loss": 16.125, "step": 16987 }, { "epoch": 0.1608087768953342, "grad_norm": 182.04039001464844, "learning_rate": 1.9115997984784774e-06, "loss": 16.4414, "step": 16988 }, { "epoch": 0.16081824291704924, "grad_norm": 372.857421875, "learning_rate": 1.911587195047807e-06, "loss": 16.6016, "step": 16989 }, { "epoch": 0.1608277089387643, "grad_norm": 766.78271484375, "learning_rate": 1.911574590760303e-06, "loss": 49.2188, "step": 16990 }, { "epoch": 0.16083717496047936, "grad_norm": 849.5000610351562, "learning_rate": 1.911561985615979e-06, "loss": 31.0625, "step": 16991 }, { "epoch": 0.16084664098219442, "grad_norm": 268.73284912109375, "learning_rate": 1.9115493796148455e-06, "loss": 19.4297, "step": 16992 }, { "epoch": 0.16085610700390945, "grad_norm": 419.7916564941406, "learning_rate": 1.9115367727569156e-06, "loss": 20.1289, "step": 16993 }, { "epoch": 0.16086557302562451, "grad_norm": 701.0419921875, "learning_rate": 1.9115241650421997e-06, "loss": 29.875, "step": 16994 }, { "epoch": 0.16087503904733957, "grad_norm": 851.1591796875, "learning_rate": 1.911511556470711e-06, "loss": 38.4062, "step": 16995 }, { "epoch": 0.16088450506905463, "grad_norm": 319.7021789550781, "learning_rate": 1.9114989470424604e-06, "loss": 33.1562, "step": 16996 }, { "epoch": 0.1608939710907697, "grad_norm": 653.3712768554688, "learning_rate": 1.91148633675746e-06, "loss": 39.0312, "step": 16997 }, { "epoch": 0.16090343711248473, "grad_norm": 411.3580322265625, "learning_rate": 1.911473725615722e-06, "loss": 43.3906, "step": 16998 }, { "epoch": 0.1609129031341998, "grad_norm": 1791.1370849609375, "learning_rate": 1.911461113617258e-06, "loss": 34.25, "step": 16999 }, { "epoch": 0.16092236915591485, "grad_norm": 565.5704956054688, "learning_rate": 1.9114485007620796e-06, "loss": 46.7422, "step": 17000 }, { "epoch": 0.1609318351776299, "grad_norm": 453.46075439453125, "learning_rate": 1.911435887050199e-06, "loss": 18.6758, "step": 17001 }, { "epoch": 0.16094130119934494, "grad_norm": 284.1117248535156, "learning_rate": 1.9114232724816278e-06, "loss": 27.4219, "step": 17002 }, { "epoch": 0.16095076722106, "grad_norm": 887.9302368164062, "learning_rate": 1.9114106570563786e-06, "loss": 46.3203, "step": 17003 }, { "epoch": 0.16096023324277506, "grad_norm": 470.27362060546875, "learning_rate": 1.9113980407744622e-06, "loss": 36.7969, "step": 17004 }, { "epoch": 0.16096969926449012, "grad_norm": 293.1057434082031, "learning_rate": 1.9113854236358907e-06, "loss": 24.1172, "step": 17005 }, { "epoch": 0.16097916528620518, "grad_norm": 425.1702880859375, "learning_rate": 1.9113728056406766e-06, "loss": 37.9453, "step": 17006 }, { "epoch": 0.1609886313079202, "grad_norm": 924.8650512695312, "learning_rate": 1.911360186788831e-06, "loss": 22.0312, "step": 17007 }, { "epoch": 0.16099809732963527, "grad_norm": 1134.1319580078125, "learning_rate": 1.9113475670803666e-06, "loss": 37.6367, "step": 17008 }, { "epoch": 0.16100756335135033, "grad_norm": 557.943603515625, "learning_rate": 1.9113349465152948e-06, "loss": 27.0703, "step": 17009 }, { "epoch": 0.1610170293730654, "grad_norm": 460.73162841796875, "learning_rate": 1.9113223250936266e-06, "loss": 35.6875, "step": 17010 }, { "epoch": 0.16102649539478042, "grad_norm": 715.5545654296875, "learning_rate": 1.9113097028153756e-06, "loss": 71.7734, "step": 17011 }, { "epoch": 0.16103596141649548, "grad_norm": 735.2754516601562, "learning_rate": 1.9112970796805523e-06, "loss": 34.3438, "step": 17012 }, { "epoch": 0.16104542743821054, "grad_norm": 438.83642578125, "learning_rate": 1.911284455689169e-06, "loss": 21.6133, "step": 17013 }, { "epoch": 0.1610548934599256, "grad_norm": 244.44195556640625, "learning_rate": 1.9112718308412383e-06, "loss": 17.8906, "step": 17014 }, { "epoch": 0.16106435948164066, "grad_norm": 297.4529113769531, "learning_rate": 1.9112592051367704e-06, "loss": 25.6016, "step": 17015 }, { "epoch": 0.1610738255033557, "grad_norm": 456.4322814941406, "learning_rate": 1.9112465785757785e-06, "loss": 32.5312, "step": 17016 }, { "epoch": 0.16108329152507075, "grad_norm": 407.0337829589844, "learning_rate": 1.911233951158274e-06, "loss": 22.6875, "step": 17017 }, { "epoch": 0.16109275754678581, "grad_norm": 218.86134338378906, "learning_rate": 1.9112213228842694e-06, "loss": 21.5938, "step": 17018 }, { "epoch": 0.16110222356850087, "grad_norm": 395.30316162109375, "learning_rate": 1.9112086937537756e-06, "loss": 21.6406, "step": 17019 }, { "epoch": 0.1611116895902159, "grad_norm": 353.6055908203125, "learning_rate": 1.911196063766805e-06, "loss": 27.7734, "step": 17020 }, { "epoch": 0.16112115561193097, "grad_norm": 379.74713134765625, "learning_rate": 1.9111834329233693e-06, "loss": 25.7578, "step": 17021 }, { "epoch": 0.16113062163364603, "grad_norm": 3.2509233951568604, "learning_rate": 1.91117080122348e-06, "loss": 0.9849, "step": 17022 }, { "epoch": 0.1611400876553611, "grad_norm": 999.8805541992188, "learning_rate": 1.91115816866715e-06, "loss": 67.8281, "step": 17023 }, { "epoch": 0.16114955367707615, "grad_norm": 282.7792053222656, "learning_rate": 1.911145535254391e-06, "loss": 16.5625, "step": 17024 }, { "epoch": 0.16115901969879118, "grad_norm": 3.4331343173980713, "learning_rate": 1.911132900985214e-06, "loss": 0.9604, "step": 17025 }, { "epoch": 0.16116848572050624, "grad_norm": 254.82723999023438, "learning_rate": 1.9111202658596312e-06, "loss": 22.2656, "step": 17026 }, { "epoch": 0.1611779517422213, "grad_norm": 346.1422119140625, "learning_rate": 1.9111076298776543e-06, "loss": 25.6172, "step": 17027 }, { "epoch": 0.16118741776393636, "grad_norm": 163.77120971679688, "learning_rate": 1.911094993039296e-06, "loss": 15.6172, "step": 17028 }, { "epoch": 0.16119688378565142, "grad_norm": 3.4059479236602783, "learning_rate": 1.9110823553445678e-06, "loss": 0.9683, "step": 17029 }, { "epoch": 0.16120634980736645, "grad_norm": 668.4746704101562, "learning_rate": 1.9110697167934815e-06, "loss": 28.6016, "step": 17030 }, { "epoch": 0.1612158158290815, "grad_norm": 3.196014642715454, "learning_rate": 1.9110570773860482e-06, "loss": 0.853, "step": 17031 }, { "epoch": 0.16122528185079657, "grad_norm": 410.2226867675781, "learning_rate": 1.9110444371222812e-06, "loss": 23.3906, "step": 17032 }, { "epoch": 0.16123474787251163, "grad_norm": 191.06529235839844, "learning_rate": 1.9110317960021914e-06, "loss": 19.6875, "step": 17033 }, { "epoch": 0.16124421389422666, "grad_norm": 378.4808654785156, "learning_rate": 1.911019154025791e-06, "loss": 27.2344, "step": 17034 }, { "epoch": 0.16125367991594172, "grad_norm": 589.58837890625, "learning_rate": 1.911006511193092e-06, "loss": 19.125, "step": 17035 }, { "epoch": 0.16126314593765678, "grad_norm": 322.2188720703125, "learning_rate": 1.910993867504106e-06, "loss": 36.9531, "step": 17036 }, { "epoch": 0.16127261195937184, "grad_norm": 387.8877258300781, "learning_rate": 1.9109812229588454e-06, "loss": 23.7422, "step": 17037 }, { "epoch": 0.1612820779810869, "grad_norm": 3.699342727661133, "learning_rate": 1.9109685775573213e-06, "loss": 0.9492, "step": 17038 }, { "epoch": 0.16129154400280193, "grad_norm": 512.0419921875, "learning_rate": 1.9109559312995463e-06, "loss": 44.0156, "step": 17039 }, { "epoch": 0.161301010024517, "grad_norm": 410.6933288574219, "learning_rate": 1.9109432841855316e-06, "loss": 8.3359, "step": 17040 }, { "epoch": 0.16131047604623205, "grad_norm": 211.17330932617188, "learning_rate": 1.91093063621529e-06, "loss": 20.5, "step": 17041 }, { "epoch": 0.16131994206794711, "grad_norm": 206.46800231933594, "learning_rate": 1.9109179873888325e-06, "loss": 12.7773, "step": 17042 }, { "epoch": 0.16132940808966215, "grad_norm": 316.2215576171875, "learning_rate": 1.9109053377061713e-06, "loss": 26.2578, "step": 17043 }, { "epoch": 0.1613388741113772, "grad_norm": 589.4833374023438, "learning_rate": 1.9108926871673187e-06, "loss": 54.6797, "step": 17044 }, { "epoch": 0.16134834013309227, "grad_norm": 311.3746643066406, "learning_rate": 1.9108800357722856e-06, "loss": 14.4609, "step": 17045 }, { "epoch": 0.16135780615480733, "grad_norm": 214.340576171875, "learning_rate": 1.910867383521085e-06, "loss": 20.2148, "step": 17046 }, { "epoch": 0.16136727217652239, "grad_norm": 375.3583984375, "learning_rate": 1.9108547304137283e-06, "loss": 23.2109, "step": 17047 }, { "epoch": 0.16137673819823742, "grad_norm": 124.23546600341797, "learning_rate": 1.9108420764502274e-06, "loss": 19.25, "step": 17048 }, { "epoch": 0.16138620421995248, "grad_norm": 349.0888366699219, "learning_rate": 1.910829421630594e-06, "loss": 30.8438, "step": 17049 }, { "epoch": 0.16139567024166754, "grad_norm": 224.28025817871094, "learning_rate": 1.9108167659548405e-06, "loss": 20.1484, "step": 17050 }, { "epoch": 0.1614051362633826, "grad_norm": 274.1299133300781, "learning_rate": 1.910804109422978e-06, "loss": 10.8281, "step": 17051 }, { "epoch": 0.16141460228509763, "grad_norm": 382.4142761230469, "learning_rate": 1.910791452035019e-06, "loss": 15.1406, "step": 17052 }, { "epoch": 0.1614240683068127, "grad_norm": 495.7969055175781, "learning_rate": 1.910778793790976e-06, "loss": 49.9062, "step": 17053 }, { "epoch": 0.16143353432852775, "grad_norm": 356.5565490722656, "learning_rate": 1.9107661346908593e-06, "loss": 34.8672, "step": 17054 }, { "epoch": 0.1614430003502428, "grad_norm": 659.2477416992188, "learning_rate": 1.9107534747346825e-06, "loss": 25.2891, "step": 17055 }, { "epoch": 0.16145246637195787, "grad_norm": 320.6579895019531, "learning_rate": 1.910740813922456e-06, "loss": 19.9141, "step": 17056 }, { "epoch": 0.1614619323936729, "grad_norm": 377.04083251953125, "learning_rate": 1.9107281522541927e-06, "loss": 40.1562, "step": 17057 }, { "epoch": 0.16147139841538796, "grad_norm": 190.27761840820312, "learning_rate": 1.910715489729904e-06, "loss": 15.5312, "step": 17058 }, { "epoch": 0.16148086443710302, "grad_norm": 348.1722412109375, "learning_rate": 1.9107028263496023e-06, "loss": 22.9219, "step": 17059 }, { "epoch": 0.16149033045881808, "grad_norm": 231.05259704589844, "learning_rate": 1.910690162113299e-06, "loss": 20.1445, "step": 17060 }, { "epoch": 0.16149979648053311, "grad_norm": 385.2059631347656, "learning_rate": 1.910677497021006e-06, "loss": 39.5312, "step": 17061 }, { "epoch": 0.16150926250224817, "grad_norm": 549.7839965820312, "learning_rate": 1.9106648310727358e-06, "loss": 19.6016, "step": 17062 }, { "epoch": 0.16151872852396323, "grad_norm": 1272.0810546875, "learning_rate": 1.9106521642684996e-06, "loss": 20.4688, "step": 17063 }, { "epoch": 0.1615281945456783, "grad_norm": 366.4230041503906, "learning_rate": 1.9106394966083095e-06, "loss": 56.5938, "step": 17064 }, { "epoch": 0.16153766056739335, "grad_norm": 610.529541015625, "learning_rate": 1.9106268280921774e-06, "loss": 41.9062, "step": 17065 }, { "epoch": 0.1615471265891084, "grad_norm": 524.8357543945312, "learning_rate": 1.910614158720116e-06, "loss": 41.5703, "step": 17066 }, { "epoch": 0.16155659261082345, "grad_norm": 379.4273986816406, "learning_rate": 1.910601488492136e-06, "loss": 44.125, "step": 17067 }, { "epoch": 0.1615660586325385, "grad_norm": 3.61087703704834, "learning_rate": 1.91058881740825e-06, "loss": 1.1064, "step": 17068 }, { "epoch": 0.16157552465425357, "grad_norm": 458.9352722167969, "learning_rate": 1.91057614546847e-06, "loss": 41.8594, "step": 17069 }, { "epoch": 0.1615849906759686, "grad_norm": 501.1684265136719, "learning_rate": 1.910563472672807e-06, "loss": 33.3906, "step": 17070 }, { "epoch": 0.16159445669768366, "grad_norm": 3.0331943035125732, "learning_rate": 1.910550799021274e-06, "loss": 0.8342, "step": 17071 }, { "epoch": 0.16160392271939872, "grad_norm": 393.4090270996094, "learning_rate": 1.9105381245138826e-06, "loss": 49.0938, "step": 17072 }, { "epoch": 0.16161338874111378, "grad_norm": 292.204833984375, "learning_rate": 1.9105254491506444e-06, "loss": 31.2969, "step": 17073 }, { "epoch": 0.16162285476282884, "grad_norm": 3.132530927658081, "learning_rate": 1.910512772931572e-06, "loss": 0.9712, "step": 17074 }, { "epoch": 0.16163232078454387, "grad_norm": 378.35345458984375, "learning_rate": 1.9105000958566758e-06, "loss": 35.7422, "step": 17075 }, { "epoch": 0.16164178680625893, "grad_norm": 336.9638977050781, "learning_rate": 1.9104874179259694e-06, "loss": 25.6641, "step": 17076 }, { "epoch": 0.161651252827974, "grad_norm": 373.9454040527344, "learning_rate": 1.910474739139464e-06, "loss": 47.8828, "step": 17077 }, { "epoch": 0.16166071884968905, "grad_norm": 208.96713256835938, "learning_rate": 1.9104620594971718e-06, "loss": 18.4766, "step": 17078 }, { "epoch": 0.16167018487140408, "grad_norm": 2.5472519397735596, "learning_rate": 1.910449378999104e-06, "loss": 0.853, "step": 17079 }, { "epoch": 0.16167965089311914, "grad_norm": 2.880476474761963, "learning_rate": 1.910436697645273e-06, "loss": 0.9434, "step": 17080 }, { "epoch": 0.1616891169148342, "grad_norm": 200.97308349609375, "learning_rate": 1.9104240154356912e-06, "loss": 20.8398, "step": 17081 }, { "epoch": 0.16169858293654926, "grad_norm": 3.1474947929382324, "learning_rate": 1.9104113323703702e-06, "loss": 0.9336, "step": 17082 }, { "epoch": 0.16170804895826432, "grad_norm": 545.2364501953125, "learning_rate": 1.9103986484493215e-06, "loss": 49.1406, "step": 17083 }, { "epoch": 0.16171751497997935, "grad_norm": 546.783447265625, "learning_rate": 1.910385963672557e-06, "loss": 44.0312, "step": 17084 }, { "epoch": 0.16172698100169441, "grad_norm": 3.403522491455078, "learning_rate": 1.910373278040089e-06, "loss": 0.9551, "step": 17085 }, { "epoch": 0.16173644702340947, "grad_norm": 3.383004665374756, "learning_rate": 1.9103605915519295e-06, "loss": 0.8813, "step": 17086 }, { "epoch": 0.16174591304512453, "grad_norm": 353.47833251953125, "learning_rate": 1.9103479042080904e-06, "loss": 20.0, "step": 17087 }, { "epoch": 0.16175537906683957, "grad_norm": 285.5270080566406, "learning_rate": 1.9103352160085834e-06, "loss": 29.4375, "step": 17088 }, { "epoch": 0.16176484508855463, "grad_norm": 478.9138488769531, "learning_rate": 1.91032252695342e-06, "loss": 39.9375, "step": 17089 }, { "epoch": 0.1617743111102697, "grad_norm": 551.6159057617188, "learning_rate": 1.9103098370426135e-06, "loss": 26.2344, "step": 17090 }, { "epoch": 0.16178377713198475, "grad_norm": 810.7564697265625, "learning_rate": 1.9102971462761744e-06, "loss": 36.8047, "step": 17091 }, { "epoch": 0.1617932431536998, "grad_norm": 329.4029541015625, "learning_rate": 1.9102844546541156e-06, "loss": 22.7969, "step": 17092 }, { "epoch": 0.16180270917541484, "grad_norm": 710.7526245117188, "learning_rate": 1.9102717621764484e-06, "loss": 41.8633, "step": 17093 }, { "epoch": 0.1618121751971299, "grad_norm": 411.8090515136719, "learning_rate": 1.9102590688431848e-06, "loss": 33.3438, "step": 17094 }, { "epoch": 0.16182164121884496, "grad_norm": 319.9405822753906, "learning_rate": 1.910246374654337e-06, "loss": 22.2578, "step": 17095 }, { "epoch": 0.16183110724056002, "grad_norm": 258.8850402832031, "learning_rate": 1.910233679609917e-06, "loss": 6.3574, "step": 17096 }, { "epoch": 0.16184057326227505, "grad_norm": 419.0711975097656, "learning_rate": 1.9102209837099364e-06, "loss": 38.6719, "step": 17097 }, { "epoch": 0.1618500392839901, "grad_norm": 330.5131530761719, "learning_rate": 1.9102082869544072e-06, "loss": 39.4219, "step": 17098 }, { "epoch": 0.16185950530570517, "grad_norm": 418.7228088378906, "learning_rate": 1.9101955893433418e-06, "loss": 19.0078, "step": 17099 }, { "epoch": 0.16186897132742023, "grad_norm": 332.8175964355469, "learning_rate": 1.9101828908767514e-06, "loss": 9.0625, "step": 17100 }, { "epoch": 0.1618784373491353, "grad_norm": 587.3089599609375, "learning_rate": 1.9101701915546485e-06, "loss": 19.3672, "step": 17101 }, { "epoch": 0.16188790337085032, "grad_norm": 210.52789306640625, "learning_rate": 1.9101574913770444e-06, "loss": 8.5, "step": 17102 }, { "epoch": 0.16189736939256538, "grad_norm": 234.1007843017578, "learning_rate": 1.9101447903439523e-06, "loss": 17.2109, "step": 17103 }, { "epoch": 0.16190683541428044, "grad_norm": 239.49139404296875, "learning_rate": 1.9101320884553824e-06, "loss": 21.0938, "step": 17104 }, { "epoch": 0.1619163014359955, "grad_norm": 542.4743041992188, "learning_rate": 1.910119385711348e-06, "loss": 46.6719, "step": 17105 }, { "epoch": 0.16192576745771053, "grad_norm": 558.11572265625, "learning_rate": 1.9101066821118605e-06, "loss": 49.0469, "step": 17106 }, { "epoch": 0.1619352334794256, "grad_norm": 464.9468078613281, "learning_rate": 1.910093977656932e-06, "loss": 47.0156, "step": 17107 }, { "epoch": 0.16194469950114065, "grad_norm": 304.21148681640625, "learning_rate": 1.9100812723465745e-06, "loss": 17.2031, "step": 17108 }, { "epoch": 0.16195416552285571, "grad_norm": 727.4903564453125, "learning_rate": 1.9100685661807994e-06, "loss": 32.5352, "step": 17109 }, { "epoch": 0.16196363154457077, "grad_norm": 581.537841796875, "learning_rate": 1.9100558591596198e-06, "loss": 26.0703, "step": 17110 }, { "epoch": 0.1619730975662858, "grad_norm": 380.3512268066406, "learning_rate": 1.910043151283046e-06, "loss": 28.457, "step": 17111 }, { "epoch": 0.16198256358800087, "grad_norm": 359.89471435546875, "learning_rate": 1.9100304425510915e-06, "loss": 22.5703, "step": 17112 }, { "epoch": 0.16199202960971593, "grad_norm": 300.28375244140625, "learning_rate": 1.910017732963767e-06, "loss": 27.3906, "step": 17113 }, { "epoch": 0.162001495631431, "grad_norm": 706.1578369140625, "learning_rate": 1.9100050225210855e-06, "loss": 30.4219, "step": 17114 }, { "epoch": 0.16201096165314605, "grad_norm": 971.186279296875, "learning_rate": 1.909992311223058e-06, "loss": 27.9219, "step": 17115 }, { "epoch": 0.16202042767486108, "grad_norm": 251.74732971191406, "learning_rate": 1.909979599069698e-06, "loss": 18.0859, "step": 17116 }, { "epoch": 0.16202989369657614, "grad_norm": 385.5003356933594, "learning_rate": 1.909966886061015e-06, "loss": 38.8906, "step": 17117 }, { "epoch": 0.1620393597182912, "grad_norm": 413.82977294921875, "learning_rate": 1.909954172197023e-06, "loss": 49.9375, "step": 17118 }, { "epoch": 0.16204882574000626, "grad_norm": 273.7763366699219, "learning_rate": 1.9099414574777334e-06, "loss": 17.4062, "step": 17119 }, { "epoch": 0.1620582917617213, "grad_norm": 3.4670865535736084, "learning_rate": 1.909928741903158e-06, "loss": 0.9517, "step": 17120 }, { "epoch": 0.16206775778343635, "grad_norm": 357.6307373046875, "learning_rate": 1.9099160254733086e-06, "loss": 32.1562, "step": 17121 }, { "epoch": 0.1620772238051514, "grad_norm": 415.7175598144531, "learning_rate": 1.909903308188197e-06, "loss": 29.5156, "step": 17122 }, { "epoch": 0.16208668982686647, "grad_norm": 406.2748107910156, "learning_rate": 1.9098905900478363e-06, "loss": 32.8125, "step": 17123 }, { "epoch": 0.16209615584858153, "grad_norm": 406.7070617675781, "learning_rate": 1.909877871052237e-06, "loss": 36.2188, "step": 17124 }, { "epoch": 0.16210562187029656, "grad_norm": 560.647216796875, "learning_rate": 1.9098651512014118e-06, "loss": 40.3633, "step": 17125 }, { "epoch": 0.16211508789201162, "grad_norm": 456.6175537109375, "learning_rate": 1.9098524304953725e-06, "loss": 66.1875, "step": 17126 }, { "epoch": 0.16212455391372668, "grad_norm": 365.978271484375, "learning_rate": 1.909839708934131e-06, "loss": 11.2305, "step": 17127 }, { "epoch": 0.16213401993544174, "grad_norm": 690.6747436523438, "learning_rate": 1.9098269865177e-06, "loss": 42.918, "step": 17128 }, { "epoch": 0.16214348595715677, "grad_norm": 200.08790588378906, "learning_rate": 1.9098142632460903e-06, "loss": 14.375, "step": 17129 }, { "epoch": 0.16215295197887183, "grad_norm": 3.420147657394409, "learning_rate": 1.9098015391193144e-06, "loss": 0.9441, "step": 17130 }, { "epoch": 0.1621624180005869, "grad_norm": 457.46514892578125, "learning_rate": 1.9097888141373843e-06, "loss": 43.7812, "step": 17131 }, { "epoch": 0.16217188402230195, "grad_norm": 174.50579833984375, "learning_rate": 1.909776088300312e-06, "loss": 20.0234, "step": 17132 }, { "epoch": 0.16218135004401701, "grad_norm": 236.48043823242188, "learning_rate": 1.9097633616081094e-06, "loss": 19.0, "step": 17133 }, { "epoch": 0.16219081606573205, "grad_norm": 301.0186462402344, "learning_rate": 1.909750634060788e-06, "loss": 17.6484, "step": 17134 }, { "epoch": 0.1622002820874471, "grad_norm": 325.7938232421875, "learning_rate": 1.909737905658361e-06, "loss": 30.5938, "step": 17135 }, { "epoch": 0.16220974810916217, "grad_norm": 584.9515380859375, "learning_rate": 1.909725176400839e-06, "loss": 41.5391, "step": 17136 }, { "epoch": 0.16221921413087723, "grad_norm": 362.670654296875, "learning_rate": 1.909712446288234e-06, "loss": 38.8281, "step": 17137 }, { "epoch": 0.16222868015259226, "grad_norm": 193.35032653808594, "learning_rate": 1.9096997153205592e-06, "loss": 14.6484, "step": 17138 }, { "epoch": 0.16223814617430732, "grad_norm": 347.5096435546875, "learning_rate": 1.909686983497826e-06, "loss": 27.2891, "step": 17139 }, { "epoch": 0.16224761219602238, "grad_norm": 312.19268798828125, "learning_rate": 1.9096742508200457e-06, "loss": 36.0312, "step": 17140 }, { "epoch": 0.16225707821773744, "grad_norm": 794.9446411132812, "learning_rate": 1.909661517287231e-06, "loss": 47.4453, "step": 17141 }, { "epoch": 0.1622665442394525, "grad_norm": 194.83502197265625, "learning_rate": 1.9096487828993936e-06, "loss": 18.3672, "step": 17142 }, { "epoch": 0.16227601026116753, "grad_norm": 364.40667724609375, "learning_rate": 1.9096360476565457e-06, "loss": 18.5859, "step": 17143 }, { "epoch": 0.1622854762828826, "grad_norm": 259.6159973144531, "learning_rate": 1.909623311558699e-06, "loss": 19.2031, "step": 17144 }, { "epoch": 0.16229494230459765, "grad_norm": 277.787841796875, "learning_rate": 1.9096105746058652e-06, "loss": 10.9961, "step": 17145 }, { "epoch": 0.1623044083263127, "grad_norm": 252.2308807373047, "learning_rate": 1.909597836798057e-06, "loss": 18.8047, "step": 17146 }, { "epoch": 0.16231387434802774, "grad_norm": 227.10679626464844, "learning_rate": 1.9095850981352862e-06, "loss": 13.8008, "step": 17147 }, { "epoch": 0.1623233403697428, "grad_norm": 302.960693359375, "learning_rate": 1.9095723586175643e-06, "loss": 13.9688, "step": 17148 }, { "epoch": 0.16233280639145786, "grad_norm": 272.6726379394531, "learning_rate": 1.9095596182449032e-06, "loss": 21.3203, "step": 17149 }, { "epoch": 0.16234227241317292, "grad_norm": 611.236083984375, "learning_rate": 1.909546877017316e-06, "loss": 40.25, "step": 17150 }, { "epoch": 0.16235173843488798, "grad_norm": 513.290283203125, "learning_rate": 1.9095341349348136e-06, "loss": 28.9688, "step": 17151 }, { "epoch": 0.16236120445660301, "grad_norm": 3.0989832878112793, "learning_rate": 1.909521391997408e-06, "loss": 1.0117, "step": 17152 }, { "epoch": 0.16237067047831807, "grad_norm": 468.90093994140625, "learning_rate": 1.909508648205112e-06, "loss": 26.7031, "step": 17153 }, { "epoch": 0.16238013650003313, "grad_norm": 606.1409912109375, "learning_rate": 1.9094959035579365e-06, "loss": 16.1016, "step": 17154 }, { "epoch": 0.1623896025217482, "grad_norm": 329.4105224609375, "learning_rate": 1.9094831580558942e-06, "loss": 21.1797, "step": 17155 }, { "epoch": 0.16239906854346323, "grad_norm": 3.0354511737823486, "learning_rate": 1.909470411698997e-06, "loss": 0.7454, "step": 17156 }, { "epoch": 0.1624085345651783, "grad_norm": 380.9478454589844, "learning_rate": 1.909457664487257e-06, "loss": 19.1406, "step": 17157 }, { "epoch": 0.16241800058689335, "grad_norm": 145.47605895996094, "learning_rate": 1.909444916420685e-06, "loss": 12.0547, "step": 17158 }, { "epoch": 0.1624274666086084, "grad_norm": 391.83172607421875, "learning_rate": 1.909432167499295e-06, "loss": 13.9141, "step": 17159 }, { "epoch": 0.16243693263032347, "grad_norm": 740.0308837890625, "learning_rate": 1.9094194177230975e-06, "loss": 19.3789, "step": 17160 }, { "epoch": 0.1624463986520385, "grad_norm": 523.7193603515625, "learning_rate": 1.9094066670921047e-06, "loss": 34.125, "step": 17161 }, { "epoch": 0.16245586467375356, "grad_norm": 244.2097930908203, "learning_rate": 1.909393915606329e-06, "loss": 15.0312, "step": 17162 }, { "epoch": 0.16246533069546862, "grad_norm": 218.5679168701172, "learning_rate": 1.9093811632657823e-06, "loss": 18.418, "step": 17163 }, { "epoch": 0.16247479671718368, "grad_norm": 226.27503967285156, "learning_rate": 1.9093684100704764e-06, "loss": 24.9688, "step": 17164 }, { "epoch": 0.1624842627388987, "grad_norm": 838.9443969726562, "learning_rate": 1.909355656020423e-06, "loss": 48.0156, "step": 17165 }, { "epoch": 0.16249372876061377, "grad_norm": 438.9240417480469, "learning_rate": 1.909342901115635e-06, "loss": 23.0781, "step": 17166 }, { "epoch": 0.16250319478232883, "grad_norm": 351.3302307128906, "learning_rate": 1.9093301453561236e-06, "loss": 19.8906, "step": 17167 }, { "epoch": 0.1625126608040439, "grad_norm": 199.6851806640625, "learning_rate": 1.9093173887419014e-06, "loss": 15.8516, "step": 17168 }, { "epoch": 0.16252212682575895, "grad_norm": 2.6602160930633545, "learning_rate": 1.9093046312729793e-06, "loss": 0.7588, "step": 17169 }, { "epoch": 0.16253159284747398, "grad_norm": 819.4470825195312, "learning_rate": 1.9092918729493704e-06, "loss": 28.4609, "step": 17170 }, { "epoch": 0.16254105886918904, "grad_norm": 236.28158569335938, "learning_rate": 1.9092791137710863e-06, "loss": 11.3594, "step": 17171 }, { "epoch": 0.1625505248909041, "grad_norm": 322.6043701171875, "learning_rate": 1.9092663537381387e-06, "loss": 24.6641, "step": 17172 }, { "epoch": 0.16255999091261916, "grad_norm": 577.3320922851562, "learning_rate": 1.9092535928505404e-06, "loss": 11.1816, "step": 17173 }, { "epoch": 0.1625694569343342, "grad_norm": 671.29638671875, "learning_rate": 1.9092408311083023e-06, "loss": 41.6562, "step": 17174 }, { "epoch": 0.16257892295604925, "grad_norm": 242.12611389160156, "learning_rate": 1.9092280685114373e-06, "loss": 19.625, "step": 17175 }, { "epoch": 0.16258838897776431, "grad_norm": 1076.1988525390625, "learning_rate": 1.909215305059957e-06, "loss": 64.9688, "step": 17176 }, { "epoch": 0.16259785499947937, "grad_norm": 197.93309020996094, "learning_rate": 1.9092025407538735e-06, "loss": 14.1484, "step": 17177 }, { "epoch": 0.16260732102119443, "grad_norm": 441.7080078125, "learning_rate": 1.909189775593199e-06, "loss": 44.5156, "step": 17178 }, { "epoch": 0.16261678704290947, "grad_norm": 458.0302734375, "learning_rate": 1.9091770095779448e-06, "loss": 35.9219, "step": 17179 }, { "epoch": 0.16262625306462453, "grad_norm": 3.109907627105713, "learning_rate": 1.9091642427081233e-06, "loss": 0.7944, "step": 17180 }, { "epoch": 0.1626357190863396, "grad_norm": 606.73828125, "learning_rate": 1.909151474983747e-06, "loss": 18.5, "step": 17181 }, { "epoch": 0.16264518510805465, "grad_norm": 1083.8465576171875, "learning_rate": 1.909138706404827e-06, "loss": 63.2031, "step": 17182 }, { "epoch": 0.16265465112976968, "grad_norm": 293.37738037109375, "learning_rate": 1.9091259369713762e-06, "loss": 25.9375, "step": 17183 }, { "epoch": 0.16266411715148474, "grad_norm": 213.6715087890625, "learning_rate": 1.909113166683406e-06, "loss": 17.4219, "step": 17184 }, { "epoch": 0.1626735831731998, "grad_norm": 178.16970825195312, "learning_rate": 1.9091003955409283e-06, "loss": 20.375, "step": 17185 }, { "epoch": 0.16268304919491486, "grad_norm": 467.533935546875, "learning_rate": 1.909087623543956e-06, "loss": 18.2266, "step": 17186 }, { "epoch": 0.16269251521662992, "grad_norm": 580.991943359375, "learning_rate": 1.9090748506925e-06, "loss": 38.4531, "step": 17187 }, { "epoch": 0.16270198123834495, "grad_norm": 470.53033447265625, "learning_rate": 1.9090620769865725e-06, "loss": 48.125, "step": 17188 }, { "epoch": 0.16271144726006, "grad_norm": 3.287400960922241, "learning_rate": 1.909049302426186e-06, "loss": 0.9988, "step": 17189 }, { "epoch": 0.16272091328177507, "grad_norm": 806.9395751953125, "learning_rate": 1.9090365270113527e-06, "loss": 31.1328, "step": 17190 }, { "epoch": 0.16273037930349013, "grad_norm": 2.913935899734497, "learning_rate": 1.9090237507420837e-06, "loss": 0.9429, "step": 17191 }, { "epoch": 0.16273984532520516, "grad_norm": 335.96502685546875, "learning_rate": 1.909010973618392e-06, "loss": 16.707, "step": 17192 }, { "epoch": 0.16274931134692022, "grad_norm": 1045.2623291015625, "learning_rate": 1.908998195640289e-06, "loss": 21.4844, "step": 17193 }, { "epoch": 0.16275877736863528, "grad_norm": 301.88983154296875, "learning_rate": 1.9089854168077864e-06, "loss": 14.5, "step": 17194 }, { "epoch": 0.16276824339035034, "grad_norm": 453.4959411621094, "learning_rate": 1.908972637120897e-06, "loss": 37.8906, "step": 17195 }, { "epoch": 0.1627777094120654, "grad_norm": 572.2178344726562, "learning_rate": 1.908959856579632e-06, "loss": 66.0312, "step": 17196 }, { "epoch": 0.16278717543378043, "grad_norm": 702.9304809570312, "learning_rate": 1.9089470751840046e-06, "loss": 52.1797, "step": 17197 }, { "epoch": 0.1627966414554955, "grad_norm": 577.6237182617188, "learning_rate": 1.9089342929340257e-06, "loss": 25.3281, "step": 17198 }, { "epoch": 0.16280610747721055, "grad_norm": 374.94049072265625, "learning_rate": 1.9089215098297078e-06, "loss": 20.3672, "step": 17199 }, { "epoch": 0.16281557349892561, "grad_norm": 219.3008575439453, "learning_rate": 1.9089087258710627e-06, "loss": 20.125, "step": 17200 }, { "epoch": 0.16282503952064067, "grad_norm": 498.6373596191406, "learning_rate": 1.9088959410581027e-06, "loss": 26.0, "step": 17201 }, { "epoch": 0.1628345055423557, "grad_norm": 158.0800323486328, "learning_rate": 1.9088831553908397e-06, "loss": 8.9609, "step": 17202 }, { "epoch": 0.16284397156407077, "grad_norm": 756.7716674804688, "learning_rate": 1.908870368869285e-06, "loss": 10.0547, "step": 17203 }, { "epoch": 0.16285343758578583, "grad_norm": 367.3434753417969, "learning_rate": 1.908857581493452e-06, "loss": 9.5039, "step": 17204 }, { "epoch": 0.1628629036075009, "grad_norm": 1055.233642578125, "learning_rate": 1.9088447932633514e-06, "loss": 26.3867, "step": 17205 }, { "epoch": 0.16287236962921592, "grad_norm": 681.2559204101562, "learning_rate": 1.9088320041789966e-06, "loss": 51.0625, "step": 17206 }, { "epoch": 0.16288183565093098, "grad_norm": 3.624427080154419, "learning_rate": 1.9088192142403984e-06, "loss": 0.9932, "step": 17207 }, { "epoch": 0.16289130167264604, "grad_norm": 295.9122619628906, "learning_rate": 1.908806423447569e-06, "loss": 34.3438, "step": 17208 }, { "epoch": 0.1629007676943611, "grad_norm": 285.0680236816406, "learning_rate": 1.9087936318005212e-06, "loss": 23.7344, "step": 17209 }, { "epoch": 0.16291023371607616, "grad_norm": 765.9578857421875, "learning_rate": 1.908780839299266e-06, "loss": 9.0957, "step": 17210 }, { "epoch": 0.1629196997377912, "grad_norm": 369.7676696777344, "learning_rate": 1.9087680459438165e-06, "loss": 21.1328, "step": 17211 }, { "epoch": 0.16292916575950625, "grad_norm": 547.5623168945312, "learning_rate": 1.9087552517341834e-06, "loss": 47.3281, "step": 17212 }, { "epoch": 0.1629386317812213, "grad_norm": 448.5507507324219, "learning_rate": 1.9087424566703803e-06, "loss": 19.6133, "step": 17213 }, { "epoch": 0.16294809780293637, "grad_norm": 274.9565734863281, "learning_rate": 1.9087296607524177e-06, "loss": 31.5, "step": 17214 }, { "epoch": 0.1629575638246514, "grad_norm": 565.0906372070312, "learning_rate": 1.908716863980309e-06, "loss": 39.6641, "step": 17215 }, { "epoch": 0.16296702984636646, "grad_norm": 310.8688659667969, "learning_rate": 1.908704066354065e-06, "loss": 15.1875, "step": 17216 }, { "epoch": 0.16297649586808152, "grad_norm": 417.39556884765625, "learning_rate": 1.9086912678736986e-06, "loss": 34.4844, "step": 17217 }, { "epoch": 0.16298596188979658, "grad_norm": 516.5235595703125, "learning_rate": 1.9086784685392215e-06, "loss": 30.7266, "step": 17218 }, { "epoch": 0.16299542791151164, "grad_norm": 244.13966369628906, "learning_rate": 1.908665668350646e-06, "loss": 15.5234, "step": 17219 }, { "epoch": 0.16300489393322667, "grad_norm": 425.6676025390625, "learning_rate": 1.908652867307983e-06, "loss": 24.0469, "step": 17220 }, { "epoch": 0.16301435995494173, "grad_norm": 1046.1126708984375, "learning_rate": 1.908640065411246e-06, "loss": 8.8594, "step": 17221 }, { "epoch": 0.1630238259766568, "grad_norm": 648.1852416992188, "learning_rate": 1.9086272626604467e-06, "loss": 26.3359, "step": 17222 }, { "epoch": 0.16303329199837185, "grad_norm": 235.3729705810547, "learning_rate": 1.9086144590555966e-06, "loss": 18.7812, "step": 17223 }, { "epoch": 0.1630427580200869, "grad_norm": 390.0371398925781, "learning_rate": 1.908601654596708e-06, "loss": 35.9688, "step": 17224 }, { "epoch": 0.16305222404180195, "grad_norm": 356.9551696777344, "learning_rate": 1.908588849283793e-06, "loss": 21.2734, "step": 17225 }, { "epoch": 0.163061690063517, "grad_norm": 283.96258544921875, "learning_rate": 1.9085760431168637e-06, "loss": 19.4219, "step": 17226 }, { "epoch": 0.16307115608523207, "grad_norm": 455.4337158203125, "learning_rate": 1.908563236095932e-06, "loss": 34.3984, "step": 17227 }, { "epoch": 0.16308062210694713, "grad_norm": 309.80169677734375, "learning_rate": 1.90855042822101e-06, "loss": 20.0312, "step": 17228 }, { "epoch": 0.16309008812866216, "grad_norm": 555.0149536132812, "learning_rate": 1.9085376194921094e-06, "loss": 54.8906, "step": 17229 }, { "epoch": 0.16309955415037722, "grad_norm": 427.07611083984375, "learning_rate": 1.9085248099092426e-06, "loss": 39.2266, "step": 17230 }, { "epoch": 0.16310902017209228, "grad_norm": 733.1690063476562, "learning_rate": 1.908511999472422e-06, "loss": 41.5, "step": 17231 }, { "epoch": 0.16311848619380734, "grad_norm": 262.6268310546875, "learning_rate": 1.908499188181659e-06, "loss": 21.9375, "step": 17232 }, { "epoch": 0.16312795221552237, "grad_norm": 498.63299560546875, "learning_rate": 1.9084863760369655e-06, "loss": 32.9375, "step": 17233 }, { "epoch": 0.16313741823723743, "grad_norm": 402.1280212402344, "learning_rate": 1.9084735630383544e-06, "loss": 21.1641, "step": 17234 }, { "epoch": 0.1631468842589525, "grad_norm": 467.3144836425781, "learning_rate": 1.9084607491858367e-06, "loss": 49.5312, "step": 17235 }, { "epoch": 0.16315635028066755, "grad_norm": 733.6912841796875, "learning_rate": 1.9084479344794256e-06, "loss": 34.2266, "step": 17236 }, { "epoch": 0.1631658163023826, "grad_norm": 539.5352172851562, "learning_rate": 1.9084351189191323e-06, "loss": 42.5469, "step": 17237 }, { "epoch": 0.16317528232409764, "grad_norm": 493.86199951171875, "learning_rate": 1.908422302504969e-06, "loss": 17.375, "step": 17238 }, { "epoch": 0.1631847483458127, "grad_norm": 306.2831726074219, "learning_rate": 1.908409485236948e-06, "loss": 7.7539, "step": 17239 }, { "epoch": 0.16319421436752776, "grad_norm": 497.1737365722656, "learning_rate": 1.908396667115081e-06, "loss": 54.625, "step": 17240 }, { "epoch": 0.16320368038924282, "grad_norm": 1190.3018798828125, "learning_rate": 1.90838384813938e-06, "loss": 57.3398, "step": 17241 }, { "epoch": 0.16321314641095785, "grad_norm": 2.7576329708099365, "learning_rate": 1.908371028309858e-06, "loss": 0.8093, "step": 17242 }, { "epoch": 0.16322261243267291, "grad_norm": 268.7993469238281, "learning_rate": 1.908358207626526e-06, "loss": 38.4531, "step": 17243 }, { "epoch": 0.16323207845438797, "grad_norm": 1502.006103515625, "learning_rate": 1.908345386089396e-06, "loss": 48.3281, "step": 17244 }, { "epoch": 0.16324154447610303, "grad_norm": 584.24658203125, "learning_rate": 1.908332563698481e-06, "loss": 41.0938, "step": 17245 }, { "epoch": 0.1632510104978181, "grad_norm": 619.9765625, "learning_rate": 1.908319740453792e-06, "loss": 45.3242, "step": 17246 }, { "epoch": 0.16326047651953313, "grad_norm": 220.34048461914062, "learning_rate": 1.908306916355342e-06, "loss": 30.0938, "step": 17247 }, { "epoch": 0.1632699425412482, "grad_norm": 456.5369567871094, "learning_rate": 1.9082940914031422e-06, "loss": 14.125, "step": 17248 }, { "epoch": 0.16327940856296325, "grad_norm": 228.76438903808594, "learning_rate": 1.908281265597205e-06, "loss": 29.4375, "step": 17249 }, { "epoch": 0.1632888745846783, "grad_norm": 634.3157958984375, "learning_rate": 1.9082684389375427e-06, "loss": 45.2031, "step": 17250 }, { "epoch": 0.16329834060639334, "grad_norm": 511.55804443359375, "learning_rate": 1.9082556114241674e-06, "loss": 39.2031, "step": 17251 }, { "epoch": 0.1633078066281084, "grad_norm": 464.3226013183594, "learning_rate": 1.9082427830570905e-06, "loss": 59.1406, "step": 17252 }, { "epoch": 0.16331727264982346, "grad_norm": 1442.189697265625, "learning_rate": 1.9082299538363247e-06, "loss": 46.8359, "step": 17253 }, { "epoch": 0.16332673867153852, "grad_norm": 723.2037353515625, "learning_rate": 1.9082171237618817e-06, "loss": 53.0078, "step": 17254 }, { "epoch": 0.16333620469325358, "grad_norm": 461.0644226074219, "learning_rate": 1.9082042928337736e-06, "loss": 17.7969, "step": 17255 }, { "epoch": 0.1633456707149686, "grad_norm": 472.1877746582031, "learning_rate": 1.908191461052013e-06, "loss": 50.9688, "step": 17256 }, { "epoch": 0.16335513673668367, "grad_norm": 381.3504638671875, "learning_rate": 1.9081786284166112e-06, "loss": 29.2891, "step": 17257 }, { "epoch": 0.16336460275839873, "grad_norm": 308.43524169921875, "learning_rate": 1.9081657949275803e-06, "loss": 21.3984, "step": 17258 }, { "epoch": 0.1633740687801138, "grad_norm": 619.8270874023438, "learning_rate": 1.908152960584933e-06, "loss": 41.5, "step": 17259 }, { "epoch": 0.16338353480182882, "grad_norm": 334.5119323730469, "learning_rate": 1.9081401253886807e-06, "loss": 19.0273, "step": 17260 }, { "epoch": 0.16339300082354388, "grad_norm": 3.2657604217529297, "learning_rate": 1.908127289338836e-06, "loss": 1.0107, "step": 17261 }, { "epoch": 0.16340246684525894, "grad_norm": 343.782470703125, "learning_rate": 1.908114452435411e-06, "loss": 22.0703, "step": 17262 }, { "epoch": 0.163411932866974, "grad_norm": 418.0935974121094, "learning_rate": 1.9081016146784168e-06, "loss": 32.7266, "step": 17263 }, { "epoch": 0.16342139888868906, "grad_norm": 530.8961181640625, "learning_rate": 1.9080887760678668e-06, "loss": 46.9062, "step": 17264 }, { "epoch": 0.1634308649104041, "grad_norm": 930.7993774414062, "learning_rate": 1.908075936603772e-06, "loss": 8.498, "step": 17265 }, { "epoch": 0.16344033093211915, "grad_norm": 460.0788879394531, "learning_rate": 1.908063096286145e-06, "loss": 34.6133, "step": 17266 }, { "epoch": 0.16344979695383421, "grad_norm": 186.2071533203125, "learning_rate": 1.908050255114998e-06, "loss": 20.5664, "step": 17267 }, { "epoch": 0.16345926297554927, "grad_norm": 227.62094116210938, "learning_rate": 1.9080374130903423e-06, "loss": 42.7656, "step": 17268 }, { "epoch": 0.1634687289972643, "grad_norm": 346.6227722167969, "learning_rate": 1.908024570212191e-06, "loss": 9.1992, "step": 17269 }, { "epoch": 0.16347819501897937, "grad_norm": 267.4680480957031, "learning_rate": 1.9080117264805554e-06, "loss": 19.7188, "step": 17270 }, { "epoch": 0.16348766104069443, "grad_norm": 284.62701416015625, "learning_rate": 1.907998881895448e-06, "loss": 27.7734, "step": 17271 }, { "epoch": 0.1634971270624095, "grad_norm": 455.7958984375, "learning_rate": 1.9079860364568806e-06, "loss": 40.2812, "step": 17272 }, { "epoch": 0.16350659308412455, "grad_norm": 274.2398986816406, "learning_rate": 1.9079731901648654e-06, "loss": 17.2148, "step": 17273 }, { "epoch": 0.16351605910583958, "grad_norm": 229.88185119628906, "learning_rate": 1.907960343019415e-06, "loss": 19.168, "step": 17274 }, { "epoch": 0.16352552512755464, "grad_norm": 2.779298782348633, "learning_rate": 1.90794749502054e-06, "loss": 0.833, "step": 17275 }, { "epoch": 0.1635349911492697, "grad_norm": 905.8943481445312, "learning_rate": 1.907934646168254e-06, "loss": 31.9375, "step": 17276 }, { "epoch": 0.16354445717098476, "grad_norm": 303.1787109375, "learning_rate": 1.9079217964625683e-06, "loss": 22.8438, "step": 17277 }, { "epoch": 0.1635539231926998, "grad_norm": 246.9368438720703, "learning_rate": 1.9079089459034954e-06, "loss": 12.8984, "step": 17278 }, { "epoch": 0.16356338921441485, "grad_norm": 355.64166259765625, "learning_rate": 1.907896094491047e-06, "loss": 40.9219, "step": 17279 }, { "epoch": 0.1635728552361299, "grad_norm": 784.2803344726562, "learning_rate": 1.9078832422252353e-06, "loss": 38.4609, "step": 17280 }, { "epoch": 0.16358232125784497, "grad_norm": 257.0386962890625, "learning_rate": 1.9078703891060727e-06, "loss": 9.9375, "step": 17281 }, { "epoch": 0.16359178727956003, "grad_norm": 305.16387939453125, "learning_rate": 1.9078575351335705e-06, "loss": 17.8594, "step": 17282 }, { "epoch": 0.16360125330127506, "grad_norm": 426.4391784667969, "learning_rate": 1.9078446803077415e-06, "loss": 33.7539, "step": 17283 }, { "epoch": 0.16361071932299012, "grad_norm": 426.05194091796875, "learning_rate": 1.9078318246285976e-06, "loss": 44.875, "step": 17284 }, { "epoch": 0.16362018534470518, "grad_norm": 284.321044921875, "learning_rate": 1.907818968096151e-06, "loss": 33.8984, "step": 17285 }, { "epoch": 0.16362965136642024, "grad_norm": 315.1356201171875, "learning_rate": 1.9078061107104134e-06, "loss": 18.7188, "step": 17286 }, { "epoch": 0.16363911738813527, "grad_norm": 204.96136474609375, "learning_rate": 1.9077932524713974e-06, "loss": 17.7734, "step": 17287 }, { "epoch": 0.16364858340985033, "grad_norm": 539.9306030273438, "learning_rate": 1.9077803933791143e-06, "loss": 25.3867, "step": 17288 }, { "epoch": 0.1636580494315654, "grad_norm": 325.587646484375, "learning_rate": 1.907767533433577e-06, "loss": 20.8438, "step": 17289 }, { "epoch": 0.16366751545328045, "grad_norm": 729.2837524414062, "learning_rate": 1.9077546726347975e-06, "loss": 55.5312, "step": 17290 }, { "epoch": 0.16367698147499551, "grad_norm": 150.3682403564453, "learning_rate": 1.907741810982787e-06, "loss": 18.6719, "step": 17291 }, { "epoch": 0.16368644749671055, "grad_norm": 278.2935791015625, "learning_rate": 1.907728948477559e-06, "loss": 19.1094, "step": 17292 }, { "epoch": 0.1636959135184256, "grad_norm": 671.7658081054688, "learning_rate": 1.9077160851191244e-06, "loss": 10.5039, "step": 17293 }, { "epoch": 0.16370537954014067, "grad_norm": 379.17999267578125, "learning_rate": 1.907703220907496e-06, "loss": 47.2031, "step": 17294 }, { "epoch": 0.16371484556185573, "grad_norm": 282.73321533203125, "learning_rate": 1.9076903558426854e-06, "loss": 16.5156, "step": 17295 }, { "epoch": 0.1637243115835708, "grad_norm": 2127.893798828125, "learning_rate": 1.907677489924705e-06, "loss": 42.9297, "step": 17296 }, { "epoch": 0.16373377760528582, "grad_norm": 3.3144431114196777, "learning_rate": 1.9076646231535667e-06, "loss": 1.0825, "step": 17297 }, { "epoch": 0.16374324362700088, "grad_norm": 576.4899291992188, "learning_rate": 1.907651755529283e-06, "loss": 27.4375, "step": 17298 }, { "epoch": 0.16375270964871594, "grad_norm": 161.1691131591797, "learning_rate": 1.907638887051865e-06, "loss": 20.8828, "step": 17299 }, { "epoch": 0.163762175670431, "grad_norm": 2.8430819511413574, "learning_rate": 1.907626017721326e-06, "loss": 0.8096, "step": 17300 }, { "epoch": 0.16377164169214603, "grad_norm": 335.9741516113281, "learning_rate": 1.9076131475376776e-06, "loss": 22.9102, "step": 17301 }, { "epoch": 0.1637811077138611, "grad_norm": 392.5373229980469, "learning_rate": 1.9076002765009322e-06, "loss": 30.9531, "step": 17302 }, { "epoch": 0.16379057373557615, "grad_norm": 522.0945434570312, "learning_rate": 1.907587404611101e-06, "loss": 44.0312, "step": 17303 }, { "epoch": 0.1638000397572912, "grad_norm": 208.47308349609375, "learning_rate": 1.9075745318681967e-06, "loss": 17.5898, "step": 17304 }, { "epoch": 0.16380950577900627, "grad_norm": 454.6498718261719, "learning_rate": 1.907561658272232e-06, "loss": 23.2109, "step": 17305 }, { "epoch": 0.1638189718007213, "grad_norm": 738.67138671875, "learning_rate": 1.9075487838232178e-06, "loss": 42.0781, "step": 17306 }, { "epoch": 0.16382843782243636, "grad_norm": 3.1086196899414062, "learning_rate": 1.907535908521167e-06, "loss": 1.0962, "step": 17307 }, { "epoch": 0.16383790384415142, "grad_norm": 653.8179321289062, "learning_rate": 1.9075230323660915e-06, "loss": 22.9844, "step": 17308 }, { "epoch": 0.16384736986586648, "grad_norm": 724.3300170898438, "learning_rate": 1.907510155358003e-06, "loss": 46.2969, "step": 17309 }, { "epoch": 0.16385683588758151, "grad_norm": 477.46240234375, "learning_rate": 1.9074972774969145e-06, "loss": 33.7031, "step": 17310 }, { "epoch": 0.16386630190929657, "grad_norm": 1505.8099365234375, "learning_rate": 1.9074843987828375e-06, "loss": 43.7188, "step": 17311 }, { "epoch": 0.16387576793101163, "grad_norm": 189.4551544189453, "learning_rate": 1.907471519215784e-06, "loss": 19.4375, "step": 17312 }, { "epoch": 0.1638852339527267, "grad_norm": 3.7274160385131836, "learning_rate": 1.9074586387957663e-06, "loss": 0.9453, "step": 17313 }, { "epoch": 0.16389469997444175, "grad_norm": 207.0487823486328, "learning_rate": 1.907445757522797e-06, "loss": 17.1484, "step": 17314 }, { "epoch": 0.1639041659961568, "grad_norm": 770.6456909179688, "learning_rate": 1.907432875396887e-06, "loss": 43.9062, "step": 17315 }, { "epoch": 0.16391363201787185, "grad_norm": 135.98394775390625, "learning_rate": 1.9074199924180496e-06, "loss": 21.1562, "step": 17316 }, { "epoch": 0.1639230980395869, "grad_norm": 183.80084228515625, "learning_rate": 1.9074071085862964e-06, "loss": 16.6172, "step": 17317 }, { "epoch": 0.16393256406130197, "grad_norm": 213.6418914794922, "learning_rate": 1.9073942239016395e-06, "loss": 21.918, "step": 17318 }, { "epoch": 0.163942030083017, "grad_norm": 653.19287109375, "learning_rate": 1.9073813383640908e-06, "loss": 37.1094, "step": 17319 }, { "epoch": 0.16395149610473206, "grad_norm": 172.4814453125, "learning_rate": 1.907368451973663e-06, "loss": 7.8789, "step": 17320 }, { "epoch": 0.16396096212644712, "grad_norm": 355.8941345214844, "learning_rate": 1.907355564730368e-06, "loss": 17.3906, "step": 17321 }, { "epoch": 0.16397042814816218, "grad_norm": 264.2322082519531, "learning_rate": 1.9073426766342173e-06, "loss": 26.4844, "step": 17322 }, { "epoch": 0.16397989416987724, "grad_norm": 3.1622257232666016, "learning_rate": 1.907329787685224e-06, "loss": 0.9995, "step": 17323 }, { "epoch": 0.16398936019159227, "grad_norm": 217.45404052734375, "learning_rate": 1.9073168978833994e-06, "loss": 11.166, "step": 17324 }, { "epoch": 0.16399882621330733, "grad_norm": 3.4124906063079834, "learning_rate": 1.907304007228756e-06, "loss": 0.9761, "step": 17325 }, { "epoch": 0.1640082922350224, "grad_norm": 240.8367156982422, "learning_rate": 1.9072911157213061e-06, "loss": 16.4297, "step": 17326 }, { "epoch": 0.16401775825673745, "grad_norm": 372.6999206542969, "learning_rate": 1.9072782233610614e-06, "loss": 14.5039, "step": 17327 }, { "epoch": 0.16402722427845248, "grad_norm": 514.5304565429688, "learning_rate": 1.9072653301480342e-06, "loss": 15.5488, "step": 17328 }, { "epoch": 0.16403669030016754, "grad_norm": 300.5985107421875, "learning_rate": 1.9072524360822367e-06, "loss": 18.5156, "step": 17329 }, { "epoch": 0.1640461563218826, "grad_norm": 236.85272216796875, "learning_rate": 1.9072395411636805e-06, "loss": 20.3398, "step": 17330 }, { "epoch": 0.16405562234359766, "grad_norm": 254.76138305664062, "learning_rate": 1.9072266453923786e-06, "loss": 25.3984, "step": 17331 }, { "epoch": 0.16406508836531272, "grad_norm": 563.8883666992188, "learning_rate": 1.9072137487683423e-06, "loss": 33.0781, "step": 17332 }, { "epoch": 0.16407455438702775, "grad_norm": 1606.2283935546875, "learning_rate": 1.9072008512915847e-06, "loss": 36.5703, "step": 17333 }, { "epoch": 0.16408402040874281, "grad_norm": 202.05392456054688, "learning_rate": 1.9071879529621168e-06, "loss": 23.0156, "step": 17334 }, { "epoch": 0.16409348643045787, "grad_norm": 708.4649047851562, "learning_rate": 1.9071750537799515e-06, "loss": 38.7188, "step": 17335 }, { "epoch": 0.16410295245217293, "grad_norm": 191.0648956298828, "learning_rate": 1.9071621537451005e-06, "loss": 22.6797, "step": 17336 }, { "epoch": 0.16411241847388797, "grad_norm": 391.1949157714844, "learning_rate": 1.907149252857576e-06, "loss": 43.7188, "step": 17337 }, { "epoch": 0.16412188449560303, "grad_norm": 569.3390502929688, "learning_rate": 1.9071363511173905e-06, "loss": 24.3359, "step": 17338 }, { "epoch": 0.1641313505173181, "grad_norm": 156.3509979248047, "learning_rate": 1.9071234485245557e-06, "loss": 17.2266, "step": 17339 }, { "epoch": 0.16414081653903315, "grad_norm": 479.2027282714844, "learning_rate": 1.9071105450790838e-06, "loss": 26.9453, "step": 17340 }, { "epoch": 0.1641502825607482, "grad_norm": 615.8892822265625, "learning_rate": 1.907097640780987e-06, "loss": 49.2812, "step": 17341 }, { "epoch": 0.16415974858246324, "grad_norm": 3.2099802494049072, "learning_rate": 1.9070847356302778e-06, "loss": 0.9307, "step": 17342 }, { "epoch": 0.1641692146041783, "grad_norm": 377.12689208984375, "learning_rate": 1.9070718296269678e-06, "loss": 53.3594, "step": 17343 }, { "epoch": 0.16417868062589336, "grad_norm": 205.2611846923828, "learning_rate": 1.907058922771069e-06, "loss": 16.6641, "step": 17344 }, { "epoch": 0.16418814664760842, "grad_norm": 3.501997947692871, "learning_rate": 1.907046015062594e-06, "loss": 0.8491, "step": 17345 }, { "epoch": 0.16419761266932345, "grad_norm": 664.63232421875, "learning_rate": 1.9070331065015548e-06, "loss": 28.5781, "step": 17346 }, { "epoch": 0.1642070786910385, "grad_norm": 622.3645629882812, "learning_rate": 1.9070201970879633e-06, "loss": 26.375, "step": 17347 }, { "epoch": 0.16421654471275357, "grad_norm": 348.4555969238281, "learning_rate": 1.907007286821832e-06, "loss": 32.4844, "step": 17348 }, { "epoch": 0.16422601073446863, "grad_norm": 399.59759521484375, "learning_rate": 1.9069943757031728e-06, "loss": 10.6992, "step": 17349 }, { "epoch": 0.1642354767561837, "grad_norm": 565.074951171875, "learning_rate": 1.906981463731998e-06, "loss": 35.6875, "step": 17350 }, { "epoch": 0.16424494277789872, "grad_norm": 193.03909301757812, "learning_rate": 1.9069685509083193e-06, "loss": 20.3477, "step": 17351 }, { "epoch": 0.16425440879961378, "grad_norm": 417.5644836425781, "learning_rate": 1.9069556372321495e-06, "loss": 21.7891, "step": 17352 }, { "epoch": 0.16426387482132884, "grad_norm": 3.0022530555725098, "learning_rate": 1.9069427227035001e-06, "loss": 0.9678, "step": 17353 }, { "epoch": 0.1642733408430439, "grad_norm": 338.07080078125, "learning_rate": 1.9069298073223838e-06, "loss": 19.6484, "step": 17354 }, { "epoch": 0.16428280686475893, "grad_norm": 414.6466979980469, "learning_rate": 1.9069168910888123e-06, "loss": 8.9141, "step": 17355 }, { "epoch": 0.164292272886474, "grad_norm": 453.0166931152344, "learning_rate": 1.906903974002798e-06, "loss": 41.3438, "step": 17356 }, { "epoch": 0.16430173890818905, "grad_norm": 497.8065490722656, "learning_rate": 1.9068910560643532e-06, "loss": 32.5391, "step": 17357 }, { "epoch": 0.16431120492990411, "grad_norm": 347.42791748046875, "learning_rate": 1.9068781372734897e-06, "loss": 46.8125, "step": 17358 }, { "epoch": 0.16432067095161917, "grad_norm": 194.52601623535156, "learning_rate": 1.9068652176302194e-06, "loss": 20.6484, "step": 17359 }, { "epoch": 0.1643301369733342, "grad_norm": 282.4821472167969, "learning_rate": 1.9068522971345552e-06, "loss": 20.8438, "step": 17360 }, { "epoch": 0.16433960299504927, "grad_norm": 778.1165771484375, "learning_rate": 1.9068393757865087e-06, "loss": 10.9336, "step": 17361 }, { "epoch": 0.16434906901676433, "grad_norm": 780.0211791992188, "learning_rate": 1.906826453586092e-06, "loss": 36.3047, "step": 17362 }, { "epoch": 0.1643585350384794, "grad_norm": 3.098611354827881, "learning_rate": 1.9068135305333176e-06, "loss": 0.9795, "step": 17363 }, { "epoch": 0.16436800106019442, "grad_norm": 539.475341796875, "learning_rate": 1.9068006066281975e-06, "loss": 52.4688, "step": 17364 }, { "epoch": 0.16437746708190948, "grad_norm": 203.83750915527344, "learning_rate": 1.9067876818707437e-06, "loss": 8.0547, "step": 17365 }, { "epoch": 0.16438693310362454, "grad_norm": 406.5920715332031, "learning_rate": 1.9067747562609683e-06, "loss": 28.7266, "step": 17366 }, { "epoch": 0.1643963991253396, "grad_norm": 236.96009826660156, "learning_rate": 1.906761829798884e-06, "loss": 19.4609, "step": 17367 }, { "epoch": 0.16440586514705466, "grad_norm": 192.86683654785156, "learning_rate": 1.9067489024845025e-06, "loss": 20.8477, "step": 17368 }, { "epoch": 0.1644153311687697, "grad_norm": 517.0897216796875, "learning_rate": 1.906735974317836e-06, "loss": 50.3906, "step": 17369 }, { "epoch": 0.16442479719048475, "grad_norm": 2.8282814025878906, "learning_rate": 1.9067230452988965e-06, "loss": 0.9146, "step": 17370 }, { "epoch": 0.1644342632121998, "grad_norm": 260.5337829589844, "learning_rate": 1.9067101154276966e-06, "loss": 30.4766, "step": 17371 }, { "epoch": 0.16444372923391487, "grad_norm": 730.047607421875, "learning_rate": 1.9066971847042478e-06, "loss": 24.8281, "step": 17372 }, { "epoch": 0.1644531952556299, "grad_norm": 288.7608947753906, "learning_rate": 1.906684253128563e-06, "loss": 25.1055, "step": 17373 }, { "epoch": 0.16446266127734496, "grad_norm": 600.3643188476562, "learning_rate": 1.9066713207006538e-06, "loss": 45.1094, "step": 17374 }, { "epoch": 0.16447212729906002, "grad_norm": 475.83868408203125, "learning_rate": 1.9066583874205326e-06, "loss": 58.4062, "step": 17375 }, { "epoch": 0.16448159332077508, "grad_norm": 540.615478515625, "learning_rate": 1.9066454532882115e-06, "loss": 20.4453, "step": 17376 }, { "epoch": 0.16449105934249014, "grad_norm": 559.2560424804688, "learning_rate": 1.9066325183037027e-06, "loss": 56.2891, "step": 17377 }, { "epoch": 0.16450052536420517, "grad_norm": 2.8869452476501465, "learning_rate": 1.9066195824670182e-06, "loss": 0.8149, "step": 17378 }, { "epoch": 0.16450999138592023, "grad_norm": 244.72402954101562, "learning_rate": 1.9066066457781702e-06, "loss": 15.7969, "step": 17379 }, { "epoch": 0.1645194574076353, "grad_norm": 465.4638366699219, "learning_rate": 1.906593708237171e-06, "loss": 29.9609, "step": 17380 }, { "epoch": 0.16452892342935035, "grad_norm": 534.8636474609375, "learning_rate": 1.9065807698440329e-06, "loss": 19.1406, "step": 17381 }, { "epoch": 0.16453838945106541, "grad_norm": 312.7527770996094, "learning_rate": 1.9065678305987677e-06, "loss": 18.6875, "step": 17382 }, { "epoch": 0.16454785547278045, "grad_norm": 713.9063110351562, "learning_rate": 1.9065548905013875e-06, "loss": 28.6406, "step": 17383 }, { "epoch": 0.1645573214944955, "grad_norm": 254.88821411132812, "learning_rate": 1.9065419495519048e-06, "loss": 24.3281, "step": 17384 }, { "epoch": 0.16456678751621057, "grad_norm": 470.61175537109375, "learning_rate": 1.9065290077503318e-06, "loss": 8.3789, "step": 17385 }, { "epoch": 0.16457625353792563, "grad_norm": 374.0289611816406, "learning_rate": 1.9065160650966808e-06, "loss": 21.7266, "step": 17386 }, { "epoch": 0.16458571955964066, "grad_norm": 476.5569152832031, "learning_rate": 1.9065031215909631e-06, "loss": 20.5781, "step": 17387 }, { "epoch": 0.16459518558135572, "grad_norm": 429.81427001953125, "learning_rate": 1.9064901772331917e-06, "loss": 33.25, "step": 17388 }, { "epoch": 0.16460465160307078, "grad_norm": 170.4853973388672, "learning_rate": 1.9064772320233784e-06, "loss": 15.668, "step": 17389 }, { "epoch": 0.16461411762478584, "grad_norm": 3.144179582595825, "learning_rate": 1.9064642859615355e-06, "loss": 0.9189, "step": 17390 }, { "epoch": 0.1646235836465009, "grad_norm": 390.4148254394531, "learning_rate": 1.906451339047675e-06, "loss": 29.3047, "step": 17391 }, { "epoch": 0.16463304966821593, "grad_norm": 277.05999755859375, "learning_rate": 1.9064383912818097e-06, "loss": 18.1016, "step": 17392 }, { "epoch": 0.164642515689931, "grad_norm": 283.0719909667969, "learning_rate": 1.9064254426639508e-06, "loss": 25.3516, "step": 17393 }, { "epoch": 0.16465198171164605, "grad_norm": 536.8934936523438, "learning_rate": 1.906412493194111e-06, "loss": 35.1484, "step": 17394 }, { "epoch": 0.1646614477333611, "grad_norm": 465.54437255859375, "learning_rate": 1.9063995428723026e-06, "loss": 35.25, "step": 17395 }, { "epoch": 0.16467091375507614, "grad_norm": 192.22854614257812, "learning_rate": 1.9063865916985375e-06, "loss": 15.8906, "step": 17396 }, { "epoch": 0.1646803797767912, "grad_norm": 238.4422149658203, "learning_rate": 1.9063736396728278e-06, "loss": 24.4531, "step": 17397 }, { "epoch": 0.16468984579850626, "grad_norm": 245.88458251953125, "learning_rate": 1.9063606867951863e-06, "loss": 19.375, "step": 17398 }, { "epoch": 0.16469931182022132, "grad_norm": 522.15771484375, "learning_rate": 1.9063477330656244e-06, "loss": 22.9414, "step": 17399 }, { "epoch": 0.16470877784193638, "grad_norm": 409.5943603515625, "learning_rate": 1.9063347784841545e-06, "loss": 40.375, "step": 17400 }, { "epoch": 0.16471824386365141, "grad_norm": 187.69285583496094, "learning_rate": 1.9063218230507889e-06, "loss": 17.8828, "step": 17401 }, { "epoch": 0.16472770988536647, "grad_norm": 672.754638671875, "learning_rate": 1.90630886676554e-06, "loss": 37.8672, "step": 17402 }, { "epoch": 0.16473717590708153, "grad_norm": 438.0321960449219, "learning_rate": 1.9062959096284196e-06, "loss": 45.5469, "step": 17403 }, { "epoch": 0.1647466419287966, "grad_norm": 357.0373840332031, "learning_rate": 1.9062829516394402e-06, "loss": 7.1641, "step": 17404 }, { "epoch": 0.16475610795051163, "grad_norm": 794.4812622070312, "learning_rate": 1.9062699927986134e-06, "loss": 42.8203, "step": 17405 }, { "epoch": 0.1647655739722267, "grad_norm": 440.9702453613281, "learning_rate": 1.9062570331059517e-06, "loss": 37.7188, "step": 17406 }, { "epoch": 0.16477503999394175, "grad_norm": 484.17095947265625, "learning_rate": 1.9062440725614677e-06, "loss": 38.3594, "step": 17407 }, { "epoch": 0.1647845060156568, "grad_norm": 386.2882080078125, "learning_rate": 1.906231111165173e-06, "loss": 36.8594, "step": 17408 }, { "epoch": 0.16479397203737187, "grad_norm": 690.023681640625, "learning_rate": 1.9062181489170803e-06, "loss": 36.1562, "step": 17409 }, { "epoch": 0.1648034380590869, "grad_norm": 206.55955505371094, "learning_rate": 1.9062051858172012e-06, "loss": 18.4453, "step": 17410 }, { "epoch": 0.16481290408080196, "grad_norm": 335.5829772949219, "learning_rate": 1.9061922218655484e-06, "loss": 40.5938, "step": 17411 }, { "epoch": 0.16482237010251702, "grad_norm": 459.7207336425781, "learning_rate": 1.9061792570621336e-06, "loss": 37.3594, "step": 17412 }, { "epoch": 0.16483183612423208, "grad_norm": 454.43389892578125, "learning_rate": 1.9061662914069693e-06, "loss": 19.2891, "step": 17413 }, { "epoch": 0.1648413021459471, "grad_norm": 176.02877807617188, "learning_rate": 1.906153324900068e-06, "loss": 20.3516, "step": 17414 }, { "epoch": 0.16485076816766217, "grad_norm": 220.8588409423828, "learning_rate": 1.906140357541441e-06, "loss": 20.0391, "step": 17415 }, { "epoch": 0.16486023418937723, "grad_norm": 892.9717407226562, "learning_rate": 1.9061273893311014e-06, "loss": 32.7656, "step": 17416 }, { "epoch": 0.1648697002110923, "grad_norm": 200.55337524414062, "learning_rate": 1.906114420269061e-06, "loss": 20.5312, "step": 17417 }, { "epoch": 0.16487916623280735, "grad_norm": 527.3665771484375, "learning_rate": 1.9061014503553316e-06, "loss": 32.9922, "step": 17418 }, { "epoch": 0.16488863225452238, "grad_norm": 336.8902893066406, "learning_rate": 1.9060884795899261e-06, "loss": 16.9688, "step": 17419 }, { "epoch": 0.16489809827623744, "grad_norm": 349.04949951171875, "learning_rate": 1.9060755079728563e-06, "loss": 28.6719, "step": 17420 }, { "epoch": 0.1649075642979525, "grad_norm": 301.9931945800781, "learning_rate": 1.9060625355041347e-06, "loss": 31.7109, "step": 17421 }, { "epoch": 0.16491703031966756, "grad_norm": 310.0751037597656, "learning_rate": 1.906049562183773e-06, "loss": 15.2266, "step": 17422 }, { "epoch": 0.1649264963413826, "grad_norm": 229.2127227783203, "learning_rate": 1.9060365880117835e-06, "loss": 14.2266, "step": 17423 }, { "epoch": 0.16493596236309765, "grad_norm": 578.4375, "learning_rate": 1.9060236129881789e-06, "loss": 38.4844, "step": 17424 }, { "epoch": 0.16494542838481271, "grad_norm": 725.2532348632812, "learning_rate": 1.9060106371129707e-06, "loss": 20.9219, "step": 17425 }, { "epoch": 0.16495489440652777, "grad_norm": 635.6505737304688, "learning_rate": 1.9059976603861717e-06, "loss": 22.4102, "step": 17426 }, { "epoch": 0.16496436042824283, "grad_norm": 491.5859069824219, "learning_rate": 1.905984682807794e-06, "loss": 24.0859, "step": 17427 }, { "epoch": 0.16497382644995787, "grad_norm": 261.7463073730469, "learning_rate": 1.9059717043778492e-06, "loss": 23.4453, "step": 17428 }, { "epoch": 0.16498329247167293, "grad_norm": 3.218956708908081, "learning_rate": 1.90595872509635e-06, "loss": 0.9678, "step": 17429 }, { "epoch": 0.164992758493388, "grad_norm": 367.6520080566406, "learning_rate": 1.9059457449633087e-06, "loss": 18.6406, "step": 17430 }, { "epoch": 0.16500222451510305, "grad_norm": 202.3921356201172, "learning_rate": 1.9059327639787374e-06, "loss": 23.7656, "step": 17431 }, { "epoch": 0.16501169053681808, "grad_norm": 294.8594970703125, "learning_rate": 1.9059197821426482e-06, "loss": 18.6055, "step": 17432 }, { "epoch": 0.16502115655853314, "grad_norm": 2.6194286346435547, "learning_rate": 1.9059067994550532e-06, "loss": 0.8018, "step": 17433 }, { "epoch": 0.1650306225802482, "grad_norm": 1078.1204833984375, "learning_rate": 1.9058938159159649e-06, "loss": 45.5156, "step": 17434 }, { "epoch": 0.16504008860196326, "grad_norm": 684.2207641601562, "learning_rate": 1.9058808315253953e-06, "loss": 40.3438, "step": 17435 }, { "epoch": 0.16504955462367832, "grad_norm": 2.8628129959106445, "learning_rate": 1.9058678462833563e-06, "loss": 0.8311, "step": 17436 }, { "epoch": 0.16505902064539335, "grad_norm": 431.0395812988281, "learning_rate": 1.905854860189861e-06, "loss": 24.7812, "step": 17437 }, { "epoch": 0.1650684866671084, "grad_norm": 504.46917724609375, "learning_rate": 1.9058418732449208e-06, "loss": 48.75, "step": 17438 }, { "epoch": 0.16507795268882347, "grad_norm": 928.3423461914062, "learning_rate": 1.9058288854485483e-06, "loss": 66.8281, "step": 17439 }, { "epoch": 0.16508741871053853, "grad_norm": 189.87057495117188, "learning_rate": 1.9058158968007554e-06, "loss": 21.3477, "step": 17440 }, { "epoch": 0.16509688473225356, "grad_norm": 406.949462890625, "learning_rate": 1.9058029073015546e-06, "loss": 20.6016, "step": 17441 }, { "epoch": 0.16510635075396862, "grad_norm": 579.3365478515625, "learning_rate": 1.9057899169509578e-06, "loss": 47.0469, "step": 17442 }, { "epoch": 0.16511581677568368, "grad_norm": 241.85130310058594, "learning_rate": 1.9057769257489777e-06, "loss": 17.2734, "step": 17443 }, { "epoch": 0.16512528279739874, "grad_norm": 300.6641540527344, "learning_rate": 1.905763933695626e-06, "loss": 22.3984, "step": 17444 }, { "epoch": 0.1651347488191138, "grad_norm": 364.77325439453125, "learning_rate": 1.905750940790915e-06, "loss": 9.8848, "step": 17445 }, { "epoch": 0.16514421484082883, "grad_norm": 145.34217834472656, "learning_rate": 1.9057379470348577e-06, "loss": 19.7188, "step": 17446 }, { "epoch": 0.1651536808625439, "grad_norm": 260.9282531738281, "learning_rate": 1.905724952427465e-06, "loss": 21.8555, "step": 17447 }, { "epoch": 0.16516314688425895, "grad_norm": 475.71258544921875, "learning_rate": 1.90571195696875e-06, "loss": 39.3906, "step": 17448 }, { "epoch": 0.16517261290597401, "grad_norm": 364.8771057128906, "learning_rate": 1.9056989606587247e-06, "loss": 50.5938, "step": 17449 }, { "epoch": 0.16518207892768905, "grad_norm": 382.34381103515625, "learning_rate": 1.9056859634974013e-06, "loss": 28.5234, "step": 17450 }, { "epoch": 0.1651915449494041, "grad_norm": 598.556884765625, "learning_rate": 1.9056729654847918e-06, "loss": 44.918, "step": 17451 }, { "epoch": 0.16520101097111917, "grad_norm": 533.8781127929688, "learning_rate": 1.9056599666209087e-06, "loss": 52.2188, "step": 17452 }, { "epoch": 0.16521047699283423, "grad_norm": 206.83224487304688, "learning_rate": 1.9056469669057642e-06, "loss": 9.6406, "step": 17453 }, { "epoch": 0.1652199430145493, "grad_norm": 658.0169677734375, "learning_rate": 1.9056339663393705e-06, "loss": 45.4375, "step": 17454 }, { "epoch": 0.16522940903626432, "grad_norm": 475.63531494140625, "learning_rate": 1.9056209649217398e-06, "loss": 16.6406, "step": 17455 }, { "epoch": 0.16523887505797938, "grad_norm": 361.7275085449219, "learning_rate": 1.9056079626528842e-06, "loss": 31.4688, "step": 17456 }, { "epoch": 0.16524834107969444, "grad_norm": 426.32855224609375, "learning_rate": 1.9055949595328164e-06, "loss": 19.9023, "step": 17457 }, { "epoch": 0.1652578071014095, "grad_norm": 554.1663208007812, "learning_rate": 1.9055819555615478e-06, "loss": 36.2656, "step": 17458 }, { "epoch": 0.16526727312312453, "grad_norm": 352.1636047363281, "learning_rate": 1.9055689507390913e-06, "loss": 24.0391, "step": 17459 }, { "epoch": 0.1652767391448396, "grad_norm": 2457.297119140625, "learning_rate": 1.905555945065459e-06, "loss": 40.0625, "step": 17460 }, { "epoch": 0.16528620516655465, "grad_norm": 172.90989685058594, "learning_rate": 1.9055429385406627e-06, "loss": 17.5742, "step": 17461 }, { "epoch": 0.1652956711882697, "grad_norm": 284.2088623046875, "learning_rate": 1.9055299311647151e-06, "loss": 19.5547, "step": 17462 }, { "epoch": 0.16530513720998477, "grad_norm": 691.6528930664062, "learning_rate": 1.9055169229376284e-06, "loss": 54.3906, "step": 17463 }, { "epoch": 0.1653146032316998, "grad_norm": 281.00018310546875, "learning_rate": 1.9055039138594146e-06, "loss": 17.6562, "step": 17464 }, { "epoch": 0.16532406925341486, "grad_norm": 225.76121520996094, "learning_rate": 1.905490903930086e-06, "loss": 21.6875, "step": 17465 }, { "epoch": 0.16533353527512992, "grad_norm": 437.7471008300781, "learning_rate": 1.9054778931496548e-06, "loss": 19.2891, "step": 17466 }, { "epoch": 0.16534300129684498, "grad_norm": 300.2461242675781, "learning_rate": 1.9054648815181334e-06, "loss": 19.7031, "step": 17467 }, { "epoch": 0.16535246731856004, "grad_norm": 683.804931640625, "learning_rate": 1.9054518690355339e-06, "loss": 41.8047, "step": 17468 }, { "epoch": 0.16536193334027507, "grad_norm": 455.3758544921875, "learning_rate": 1.9054388557018687e-06, "loss": 45.7812, "step": 17469 }, { "epoch": 0.16537139936199013, "grad_norm": 216.4862518310547, "learning_rate": 1.9054258415171494e-06, "loss": 24.9609, "step": 17470 }, { "epoch": 0.1653808653837052, "grad_norm": 261.44964599609375, "learning_rate": 1.905412826481389e-06, "loss": 30.0, "step": 17471 }, { "epoch": 0.16539033140542025, "grad_norm": 379.0142517089844, "learning_rate": 1.9053998105945995e-06, "loss": 23.6172, "step": 17472 }, { "epoch": 0.1653997974271353, "grad_norm": 981.3734130859375, "learning_rate": 1.9053867938567932e-06, "loss": 27.3203, "step": 17473 }, { "epoch": 0.16540926344885035, "grad_norm": 384.907958984375, "learning_rate": 1.9053737762679817e-06, "loss": 19.3047, "step": 17474 }, { "epoch": 0.1654187294705654, "grad_norm": 2.8409032821655273, "learning_rate": 1.9053607578281783e-06, "loss": 0.9224, "step": 17475 }, { "epoch": 0.16542819549228047, "grad_norm": 216.22903442382812, "learning_rate": 1.9053477385373945e-06, "loss": 20.3203, "step": 17476 }, { "epoch": 0.16543766151399553, "grad_norm": 175.42303466796875, "learning_rate": 1.9053347183956427e-06, "loss": 14.1797, "step": 17477 }, { "epoch": 0.16544712753571056, "grad_norm": 249.78160095214844, "learning_rate": 1.905321697402935e-06, "loss": 24.0312, "step": 17478 }, { "epoch": 0.16545659355742562, "grad_norm": 483.0263366699219, "learning_rate": 1.905308675559284e-06, "loss": 37.7266, "step": 17479 }, { "epoch": 0.16546605957914068, "grad_norm": 361.2465515136719, "learning_rate": 1.9052956528647017e-06, "loss": 29.8125, "step": 17480 }, { "epoch": 0.16547552560085574, "grad_norm": 254.82867431640625, "learning_rate": 1.9052826293192003e-06, "loss": 18.4062, "step": 17481 }, { "epoch": 0.16548499162257077, "grad_norm": 308.0985412597656, "learning_rate": 1.9052696049227923e-06, "loss": 26.5938, "step": 17482 }, { "epoch": 0.16549445764428583, "grad_norm": 567.3588256835938, "learning_rate": 1.9052565796754899e-06, "loss": 52.8281, "step": 17483 }, { "epoch": 0.1655039236660009, "grad_norm": 1368.41015625, "learning_rate": 1.9052435535773047e-06, "loss": 54.3711, "step": 17484 }, { "epoch": 0.16551338968771595, "grad_norm": 441.4949645996094, "learning_rate": 1.9052305266282498e-06, "loss": 40.1953, "step": 17485 }, { "epoch": 0.165522855709431, "grad_norm": 166.63502502441406, "learning_rate": 1.905217498828337e-06, "loss": 13.1602, "step": 17486 }, { "epoch": 0.16553232173114604, "grad_norm": 269.75360107421875, "learning_rate": 1.9052044701775784e-06, "loss": 24.8438, "step": 17487 }, { "epoch": 0.1655417877528611, "grad_norm": 1941.5811767578125, "learning_rate": 1.905191440675987e-06, "loss": 51.9297, "step": 17488 }, { "epoch": 0.16555125377457616, "grad_norm": 260.94146728515625, "learning_rate": 1.9051784103235742e-06, "loss": 17.918, "step": 17489 }, { "epoch": 0.16556071979629122, "grad_norm": 422.0663146972656, "learning_rate": 1.9051653791203527e-06, "loss": 29.8203, "step": 17490 }, { "epoch": 0.16557018581800625, "grad_norm": 223.78103637695312, "learning_rate": 1.9051523470663344e-06, "loss": 8.5352, "step": 17491 }, { "epoch": 0.16557965183972131, "grad_norm": 552.6943359375, "learning_rate": 1.905139314161532e-06, "loss": 43.668, "step": 17492 }, { "epoch": 0.16558911786143637, "grad_norm": 332.2611999511719, "learning_rate": 1.9051262804059575e-06, "loss": 30.4219, "step": 17493 }, { "epoch": 0.16559858388315143, "grad_norm": 189.31246948242188, "learning_rate": 1.9051132457996233e-06, "loss": 21.6328, "step": 17494 }, { "epoch": 0.1656080499048665, "grad_norm": 361.1471252441406, "learning_rate": 1.9051002103425413e-06, "loss": 26.7969, "step": 17495 }, { "epoch": 0.16561751592658153, "grad_norm": 967.703125, "learning_rate": 1.9050871740347242e-06, "loss": 46.375, "step": 17496 }, { "epoch": 0.1656269819482966, "grad_norm": 398.5499267578125, "learning_rate": 1.9050741368761838e-06, "loss": 43.5859, "step": 17497 }, { "epoch": 0.16563644797001165, "grad_norm": 139.1147918701172, "learning_rate": 1.9050610988669326e-06, "loss": 13.5742, "step": 17498 }, { "epoch": 0.1656459139917267, "grad_norm": 478.48516845703125, "learning_rate": 1.9050480600069832e-06, "loss": 45.5938, "step": 17499 }, { "epoch": 0.16565538001344174, "grad_norm": 1298.537109375, "learning_rate": 1.905035020296347e-06, "loss": 63.25, "step": 17500 }, { "epoch": 0.1656648460351568, "grad_norm": 408.89422607421875, "learning_rate": 1.9050219797350372e-06, "loss": 43.2969, "step": 17501 }, { "epoch": 0.16567431205687186, "grad_norm": 208.45872497558594, "learning_rate": 1.9050089383230653e-06, "loss": 22.4688, "step": 17502 }, { "epoch": 0.16568377807858692, "grad_norm": 736.5292358398438, "learning_rate": 1.904995896060444e-06, "loss": 38.6094, "step": 17503 }, { "epoch": 0.16569324410030198, "grad_norm": 3.2901976108551025, "learning_rate": 1.9049828529471855e-06, "loss": 0.9219, "step": 17504 }, { "epoch": 0.165702710122017, "grad_norm": 489.8011169433594, "learning_rate": 1.904969808983302e-06, "loss": 18.3672, "step": 17505 }, { "epoch": 0.16571217614373207, "grad_norm": 560.5367431640625, "learning_rate": 1.9049567641688055e-06, "loss": 42.4062, "step": 17506 }, { "epoch": 0.16572164216544713, "grad_norm": 551.5521850585938, "learning_rate": 1.9049437185037086e-06, "loss": 35.9844, "step": 17507 }, { "epoch": 0.1657311081871622, "grad_norm": 430.78753662109375, "learning_rate": 1.9049306719880236e-06, "loss": 17.0938, "step": 17508 }, { "epoch": 0.16574057420887722, "grad_norm": 369.551513671875, "learning_rate": 1.9049176246217626e-06, "loss": 15.9531, "step": 17509 }, { "epoch": 0.16575004023059228, "grad_norm": 430.7459716796875, "learning_rate": 1.9049045764049381e-06, "loss": 18.3789, "step": 17510 }, { "epoch": 0.16575950625230734, "grad_norm": 785.5707397460938, "learning_rate": 1.9048915273375617e-06, "loss": 52.7031, "step": 17511 }, { "epoch": 0.1657689722740224, "grad_norm": 632.776123046875, "learning_rate": 1.9048784774196465e-06, "loss": 37.2031, "step": 17512 }, { "epoch": 0.16577843829573746, "grad_norm": 375.1039123535156, "learning_rate": 1.9048654266512043e-06, "loss": 12.3984, "step": 17513 }, { "epoch": 0.1657879043174525, "grad_norm": 524.6016235351562, "learning_rate": 1.9048523750322473e-06, "loss": 37.5938, "step": 17514 }, { "epoch": 0.16579737033916755, "grad_norm": 260.9999084472656, "learning_rate": 1.9048393225627883e-06, "loss": 15.7695, "step": 17515 }, { "epoch": 0.16580683636088261, "grad_norm": 1023.9266357421875, "learning_rate": 1.904826269242839e-06, "loss": 65.2344, "step": 17516 }, { "epoch": 0.16581630238259767, "grad_norm": 2.9306912422180176, "learning_rate": 1.9048132150724117e-06, "loss": 0.7778, "step": 17517 }, { "epoch": 0.1658257684043127, "grad_norm": 3.321288824081421, "learning_rate": 1.904800160051519e-06, "loss": 0.8086, "step": 17518 }, { "epoch": 0.16583523442602777, "grad_norm": 646.6973266601562, "learning_rate": 1.904787104180173e-06, "loss": 54.8281, "step": 17519 }, { "epoch": 0.16584470044774283, "grad_norm": 669.599609375, "learning_rate": 1.904774047458386e-06, "loss": 31.9062, "step": 17520 }, { "epoch": 0.1658541664694579, "grad_norm": 230.40538024902344, "learning_rate": 1.9047609898861703e-06, "loss": 30.4297, "step": 17521 }, { "epoch": 0.16586363249117295, "grad_norm": 455.5083923339844, "learning_rate": 1.9047479314635382e-06, "loss": 36.2969, "step": 17522 }, { "epoch": 0.16587309851288798, "grad_norm": 301.80804443359375, "learning_rate": 1.9047348721905018e-06, "loss": 22.8906, "step": 17523 }, { "epoch": 0.16588256453460304, "grad_norm": 413.2418212890625, "learning_rate": 1.9047218120670734e-06, "loss": 33.0859, "step": 17524 }, { "epoch": 0.1658920305563181, "grad_norm": 331.57855224609375, "learning_rate": 1.9047087510932651e-06, "loss": 16.7266, "step": 17525 }, { "epoch": 0.16590149657803316, "grad_norm": 459.75909423828125, "learning_rate": 1.90469568926909e-06, "loss": 21.2422, "step": 17526 }, { "epoch": 0.1659109625997482, "grad_norm": 309.9540100097656, "learning_rate": 1.9046826265945596e-06, "loss": 18.2109, "step": 17527 }, { "epoch": 0.16592042862146325, "grad_norm": 210.1006317138672, "learning_rate": 1.904669563069686e-06, "loss": 19.8047, "step": 17528 }, { "epoch": 0.1659298946431783, "grad_norm": 331.5171813964844, "learning_rate": 1.9046564986944822e-06, "loss": 25.9375, "step": 17529 }, { "epoch": 0.16593936066489337, "grad_norm": 693.3828125, "learning_rate": 1.9046434334689603e-06, "loss": 59.9375, "step": 17530 }, { "epoch": 0.16594882668660843, "grad_norm": 331.1889953613281, "learning_rate": 1.9046303673931322e-06, "loss": 18.8359, "step": 17531 }, { "epoch": 0.16595829270832346, "grad_norm": 472.9093322753906, "learning_rate": 1.9046173004670106e-06, "loss": 47.9766, "step": 17532 }, { "epoch": 0.16596775873003852, "grad_norm": 3.3124990463256836, "learning_rate": 1.9046042326906074e-06, "loss": 0.7698, "step": 17533 }, { "epoch": 0.16597722475175358, "grad_norm": 280.6656494140625, "learning_rate": 1.9045911640639352e-06, "loss": 16.7266, "step": 17534 }, { "epoch": 0.16598669077346864, "grad_norm": 348.37982177734375, "learning_rate": 1.904578094587006e-06, "loss": 49.5781, "step": 17535 }, { "epoch": 0.16599615679518367, "grad_norm": 386.5525817871094, "learning_rate": 1.9045650242598321e-06, "loss": 49.2188, "step": 17536 }, { "epoch": 0.16600562281689873, "grad_norm": 402.7718505859375, "learning_rate": 1.9045519530824263e-06, "loss": 43.625, "step": 17537 }, { "epoch": 0.1660150888386138, "grad_norm": 951.482421875, "learning_rate": 1.9045388810548001e-06, "loss": 61.8594, "step": 17538 }, { "epoch": 0.16602455486032885, "grad_norm": 340.7397155761719, "learning_rate": 1.9045258081769665e-06, "loss": 23.7031, "step": 17539 }, { "epoch": 0.16603402088204391, "grad_norm": 873.0079345703125, "learning_rate": 1.9045127344489376e-06, "loss": 71.7344, "step": 17540 }, { "epoch": 0.16604348690375895, "grad_norm": 295.83575439453125, "learning_rate": 1.9044996598707254e-06, "loss": 18.6875, "step": 17541 }, { "epoch": 0.166052952925474, "grad_norm": 204.02024841308594, "learning_rate": 1.9044865844423424e-06, "loss": 8.6797, "step": 17542 }, { "epoch": 0.16606241894718907, "grad_norm": 941.30029296875, "learning_rate": 1.9044735081638007e-06, "loss": 48.3086, "step": 17543 }, { "epoch": 0.16607188496890413, "grad_norm": 1187.2080078125, "learning_rate": 1.904460431035113e-06, "loss": 7.082, "step": 17544 }, { "epoch": 0.16608135099061916, "grad_norm": 315.45037841796875, "learning_rate": 1.904447353056291e-06, "loss": 25.0, "step": 17545 }, { "epoch": 0.16609081701233422, "grad_norm": 221.50184631347656, "learning_rate": 1.9044342742273476e-06, "loss": 25.4375, "step": 17546 }, { "epoch": 0.16610028303404928, "grad_norm": 705.7461547851562, "learning_rate": 1.9044211945482947e-06, "loss": 45.5469, "step": 17547 }, { "epoch": 0.16610974905576434, "grad_norm": 581.6356811523438, "learning_rate": 1.9044081140191448e-06, "loss": 52.0547, "step": 17548 }, { "epoch": 0.1661192150774794, "grad_norm": 232.07843017578125, "learning_rate": 1.9043950326399098e-06, "loss": 19.6406, "step": 17549 }, { "epoch": 0.16612868109919443, "grad_norm": 570.5678100585938, "learning_rate": 1.9043819504106027e-06, "loss": 29.7812, "step": 17550 }, { "epoch": 0.1661381471209095, "grad_norm": 357.05511474609375, "learning_rate": 1.9043688673312352e-06, "loss": 26.5938, "step": 17551 }, { "epoch": 0.16614761314262455, "grad_norm": 272.0744323730469, "learning_rate": 1.90435578340182e-06, "loss": 16.7969, "step": 17552 }, { "epoch": 0.1661570791643396, "grad_norm": 348.88653564453125, "learning_rate": 1.904342698622369e-06, "loss": 16.957, "step": 17553 }, { "epoch": 0.16616654518605467, "grad_norm": 260.9897155761719, "learning_rate": 1.9043296129928947e-06, "loss": 23.0469, "step": 17554 }, { "epoch": 0.1661760112077697, "grad_norm": 567.7647094726562, "learning_rate": 1.9043165265134093e-06, "loss": 32.7656, "step": 17555 }, { "epoch": 0.16618547722948476, "grad_norm": 213.37747192382812, "learning_rate": 1.9043034391839254e-06, "loss": 21.75, "step": 17556 }, { "epoch": 0.16619494325119982, "grad_norm": 4.090524673461914, "learning_rate": 1.904290351004455e-06, "loss": 0.9546, "step": 17557 }, { "epoch": 0.16620440927291488, "grad_norm": 1687.2283935546875, "learning_rate": 1.9042772619750104e-06, "loss": 19.4453, "step": 17558 }, { "epoch": 0.16621387529462991, "grad_norm": 685.069091796875, "learning_rate": 1.9042641720956044e-06, "loss": 30.6016, "step": 17559 }, { "epoch": 0.16622334131634497, "grad_norm": 441.974365234375, "learning_rate": 1.9042510813662484e-06, "loss": 40.2188, "step": 17560 }, { "epoch": 0.16623280733806003, "grad_norm": 353.1178283691406, "learning_rate": 1.9042379897869556e-06, "loss": 19.8594, "step": 17561 }, { "epoch": 0.1662422733597751, "grad_norm": 226.99598693847656, "learning_rate": 1.9042248973577377e-06, "loss": 14.9219, "step": 17562 }, { "epoch": 0.16625173938149015, "grad_norm": 1174.181396484375, "learning_rate": 1.9042118040786074e-06, "loss": 32.4375, "step": 17563 }, { "epoch": 0.1662612054032052, "grad_norm": 204.27928161621094, "learning_rate": 1.9041987099495764e-06, "loss": 13.8984, "step": 17564 }, { "epoch": 0.16627067142492025, "grad_norm": 253.1001434326172, "learning_rate": 1.9041856149706579e-06, "loss": 22.5703, "step": 17565 }, { "epoch": 0.1662801374466353, "grad_norm": 285.1921081542969, "learning_rate": 1.9041725191418637e-06, "loss": 34.3203, "step": 17566 }, { "epoch": 0.16628960346835037, "grad_norm": 432.8686828613281, "learning_rate": 1.904159422463206e-06, "loss": 45.2422, "step": 17567 }, { "epoch": 0.1662990694900654, "grad_norm": 372.8061828613281, "learning_rate": 1.9041463249346973e-06, "loss": 17.9961, "step": 17568 }, { "epoch": 0.16630853551178046, "grad_norm": 960.317138671875, "learning_rate": 1.9041332265563498e-06, "loss": 52.8984, "step": 17569 }, { "epoch": 0.16631800153349552, "grad_norm": 427.927001953125, "learning_rate": 1.9041201273281759e-06, "loss": 53.2812, "step": 17570 }, { "epoch": 0.16632746755521058, "grad_norm": 260.4722900390625, "learning_rate": 1.904107027250188e-06, "loss": 19.8594, "step": 17571 }, { "epoch": 0.16633693357692564, "grad_norm": 274.7586669921875, "learning_rate": 1.9040939263223982e-06, "loss": 17.7031, "step": 17572 }, { "epoch": 0.16634639959864067, "grad_norm": 516.5198974609375, "learning_rate": 1.904080824544819e-06, "loss": 43.2969, "step": 17573 }, { "epoch": 0.16635586562035573, "grad_norm": 817.337890625, "learning_rate": 1.9040677219174625e-06, "loss": 43.1562, "step": 17574 }, { "epoch": 0.1663653316420708, "grad_norm": 213.5312042236328, "learning_rate": 1.9040546184403413e-06, "loss": 16.4922, "step": 17575 }, { "epoch": 0.16637479766378585, "grad_norm": 379.9842529296875, "learning_rate": 1.9040415141134676e-06, "loss": 28.2109, "step": 17576 }, { "epoch": 0.16638426368550088, "grad_norm": 312.6467590332031, "learning_rate": 1.9040284089368536e-06, "loss": 44.4375, "step": 17577 }, { "epoch": 0.16639372970721594, "grad_norm": 368.03009033203125, "learning_rate": 1.9040153029105118e-06, "loss": 22.3828, "step": 17578 }, { "epoch": 0.166403195728931, "grad_norm": 331.3036193847656, "learning_rate": 1.9040021960344546e-06, "loss": 29.3672, "step": 17579 }, { "epoch": 0.16641266175064606, "grad_norm": 868.1847534179688, "learning_rate": 1.9039890883086937e-06, "loss": 45.1602, "step": 17580 }, { "epoch": 0.16642212777236112, "grad_norm": 658.0927734375, "learning_rate": 1.903975979733242e-06, "loss": 49.5156, "step": 17581 }, { "epoch": 0.16643159379407615, "grad_norm": 321.1667785644531, "learning_rate": 1.903962870308112e-06, "loss": 16.7695, "step": 17582 }, { "epoch": 0.16644105981579121, "grad_norm": 458.8502197265625, "learning_rate": 1.9039497600333153e-06, "loss": 25.1016, "step": 17583 }, { "epoch": 0.16645052583750627, "grad_norm": 402.8515625, "learning_rate": 1.903936648908865e-06, "loss": 29.0234, "step": 17584 }, { "epoch": 0.16645999185922133, "grad_norm": 290.2109680175781, "learning_rate": 1.9039235369347728e-06, "loss": 16.375, "step": 17585 }, { "epoch": 0.16646945788093637, "grad_norm": 758.2498168945312, "learning_rate": 1.9039104241110513e-06, "loss": 39.543, "step": 17586 }, { "epoch": 0.16647892390265143, "grad_norm": 232.66412353515625, "learning_rate": 1.9038973104377128e-06, "loss": 17.1797, "step": 17587 }, { "epoch": 0.1664883899243665, "grad_norm": 549.873291015625, "learning_rate": 1.9038841959147698e-06, "loss": 45.375, "step": 17588 }, { "epoch": 0.16649785594608155, "grad_norm": 459.9300231933594, "learning_rate": 1.9038710805422343e-06, "loss": 27.5625, "step": 17589 }, { "epoch": 0.1665073219677966, "grad_norm": 702.8592529296875, "learning_rate": 1.9038579643201186e-06, "loss": 31.0938, "step": 17590 }, { "epoch": 0.16651678798951164, "grad_norm": 242.56007385253906, "learning_rate": 1.9038448472484357e-06, "loss": 27.9414, "step": 17591 }, { "epoch": 0.1665262540112267, "grad_norm": 248.9016571044922, "learning_rate": 1.903831729327197e-06, "loss": 28.2344, "step": 17592 }, { "epoch": 0.16653572003294176, "grad_norm": 874.3299560546875, "learning_rate": 1.9038186105564154e-06, "loss": 69.8906, "step": 17593 }, { "epoch": 0.16654518605465682, "grad_norm": 592.4867553710938, "learning_rate": 1.9038054909361032e-06, "loss": 21.7578, "step": 17594 }, { "epoch": 0.16655465207637185, "grad_norm": 527.7444458007812, "learning_rate": 1.9037923704662725e-06, "loss": 17.2344, "step": 17595 }, { "epoch": 0.1665641180980869, "grad_norm": 863.6167602539062, "learning_rate": 1.903779249146936e-06, "loss": 44.5078, "step": 17596 }, { "epoch": 0.16657358411980197, "grad_norm": 272.0924377441406, "learning_rate": 1.9037661269781056e-06, "loss": 22.7969, "step": 17597 }, { "epoch": 0.16658305014151703, "grad_norm": 446.99853515625, "learning_rate": 1.9037530039597938e-06, "loss": 39.8594, "step": 17598 }, { "epoch": 0.1665925161632321, "grad_norm": 584.3058471679688, "learning_rate": 1.9037398800920133e-06, "loss": 23.9141, "step": 17599 }, { "epoch": 0.16660198218494712, "grad_norm": 481.2335205078125, "learning_rate": 1.9037267553747757e-06, "loss": 39.5781, "step": 17600 }, { "epoch": 0.16661144820666218, "grad_norm": 265.9800720214844, "learning_rate": 1.9037136298080939e-06, "loss": 14.6484, "step": 17601 }, { "epoch": 0.16662091422837724, "grad_norm": 362.1441650390625, "learning_rate": 1.9037005033919803e-06, "loss": 21.6289, "step": 17602 }, { "epoch": 0.1666303802500923, "grad_norm": 337.5423278808594, "learning_rate": 1.9036873761264464e-06, "loss": 40.75, "step": 17603 }, { "epoch": 0.16663984627180733, "grad_norm": 342.0367431640625, "learning_rate": 1.9036742480115055e-06, "loss": 20.8008, "step": 17604 }, { "epoch": 0.1666493122935224, "grad_norm": 320.5480041503906, "learning_rate": 1.90366111904717e-06, "loss": 20.7656, "step": 17605 }, { "epoch": 0.16665877831523745, "grad_norm": 402.43646240234375, "learning_rate": 1.9036479892334514e-06, "loss": 14.6719, "step": 17606 }, { "epoch": 0.16666824433695251, "grad_norm": 325.80535888671875, "learning_rate": 1.9036348585703624e-06, "loss": 30.8906, "step": 17607 }, { "epoch": 0.16667771035866757, "grad_norm": 340.5157470703125, "learning_rate": 1.9036217270579159e-06, "loss": 29.2969, "step": 17608 }, { "epoch": 0.1666871763803826, "grad_norm": 635.1835327148438, "learning_rate": 1.9036085946961231e-06, "loss": 42.9219, "step": 17609 }, { "epoch": 0.16669664240209767, "grad_norm": 388.62225341796875, "learning_rate": 1.9035954614849973e-06, "loss": 15.4453, "step": 17610 }, { "epoch": 0.16670610842381273, "grad_norm": 765.403076171875, "learning_rate": 1.9035823274245505e-06, "loss": 55.375, "step": 17611 }, { "epoch": 0.1667155744455278, "grad_norm": 343.6528015136719, "learning_rate": 1.9035691925147954e-06, "loss": 30.0938, "step": 17612 }, { "epoch": 0.16672504046724282, "grad_norm": 300.2288818359375, "learning_rate": 1.9035560567557439e-06, "loss": 37.5, "step": 17613 }, { "epoch": 0.16673450648895788, "grad_norm": 376.2392883300781, "learning_rate": 1.9035429201474082e-06, "loss": 15.9023, "step": 17614 }, { "epoch": 0.16674397251067294, "grad_norm": 4.302179336547852, "learning_rate": 1.903529782689801e-06, "loss": 1.0547, "step": 17615 }, { "epoch": 0.166753438532388, "grad_norm": 262.98284912109375, "learning_rate": 1.9035166443829352e-06, "loss": 20.6641, "step": 17616 }, { "epoch": 0.16676290455410306, "grad_norm": 166.8349609375, "learning_rate": 1.9035035052268218e-06, "loss": 24.2969, "step": 17617 }, { "epoch": 0.1667723705758181, "grad_norm": 388.0373229980469, "learning_rate": 1.9034903652214743e-06, "loss": 20.3594, "step": 17618 }, { "epoch": 0.16678183659753315, "grad_norm": 320.00628662109375, "learning_rate": 1.9034772243669044e-06, "loss": 22.5312, "step": 17619 }, { "epoch": 0.1667913026192482, "grad_norm": 559.9071044921875, "learning_rate": 1.9034640826631247e-06, "loss": 40.6875, "step": 17620 }, { "epoch": 0.16680076864096327, "grad_norm": 680.3812255859375, "learning_rate": 1.9034509401101477e-06, "loss": 6.1992, "step": 17621 }, { "epoch": 0.1668102346626783, "grad_norm": 303.6968688964844, "learning_rate": 1.9034377967079854e-06, "loss": 28.7188, "step": 17622 }, { "epoch": 0.16681970068439336, "grad_norm": 324.863037109375, "learning_rate": 1.9034246524566506e-06, "loss": 45.1719, "step": 17623 }, { "epoch": 0.16682916670610842, "grad_norm": 341.242431640625, "learning_rate": 1.9034115073561549e-06, "loss": 32.1484, "step": 17624 }, { "epoch": 0.16683863272782348, "grad_norm": 527.30859375, "learning_rate": 1.9033983614065116e-06, "loss": 42.6328, "step": 17625 }, { "epoch": 0.16684809874953854, "grad_norm": 898.049072265625, "learning_rate": 1.9033852146077326e-06, "loss": 65.0, "step": 17626 }, { "epoch": 0.16685756477125357, "grad_norm": 362.9295349121094, "learning_rate": 1.9033720669598302e-06, "loss": 36.3438, "step": 17627 }, { "epoch": 0.16686703079296863, "grad_norm": 683.8591918945312, "learning_rate": 1.9033589184628168e-06, "loss": 28.2266, "step": 17628 }, { "epoch": 0.1668764968146837, "grad_norm": 226.20999145507812, "learning_rate": 1.9033457691167048e-06, "loss": 17.8633, "step": 17629 }, { "epoch": 0.16688596283639875, "grad_norm": 343.6250305175781, "learning_rate": 1.9033326189215064e-06, "loss": 17.4453, "step": 17630 }, { "epoch": 0.1668954288581138, "grad_norm": 250.3340606689453, "learning_rate": 1.9033194678772342e-06, "loss": 30.3906, "step": 17631 }, { "epoch": 0.16690489487982885, "grad_norm": 318.9510498046875, "learning_rate": 1.9033063159839005e-06, "loss": 44.0547, "step": 17632 }, { "epoch": 0.1669143609015439, "grad_norm": 284.7974548339844, "learning_rate": 1.9032931632415176e-06, "loss": 31.3477, "step": 17633 }, { "epoch": 0.16692382692325897, "grad_norm": 499.61993408203125, "learning_rate": 1.903280009650098e-06, "loss": 33.2812, "step": 17634 }, { "epoch": 0.16693329294497403, "grad_norm": 404.04034423828125, "learning_rate": 1.9032668552096536e-06, "loss": 36.6406, "step": 17635 }, { "epoch": 0.16694275896668906, "grad_norm": 241.6040802001953, "learning_rate": 1.9032536999201977e-06, "loss": 18.5234, "step": 17636 }, { "epoch": 0.16695222498840412, "grad_norm": 234.95359802246094, "learning_rate": 1.9032405437817416e-06, "loss": 18.2656, "step": 17637 }, { "epoch": 0.16696169101011918, "grad_norm": 213.71636962890625, "learning_rate": 1.9032273867942982e-06, "loss": 29.3828, "step": 17638 }, { "epoch": 0.16697115703183424, "grad_norm": 3.263805866241455, "learning_rate": 1.90321422895788e-06, "loss": 0.7021, "step": 17639 }, { "epoch": 0.1669806230535493, "grad_norm": 260.0309753417969, "learning_rate": 1.903201070272499e-06, "loss": 20.4453, "step": 17640 }, { "epoch": 0.16699008907526433, "grad_norm": 293.8359069824219, "learning_rate": 1.9031879107381679e-06, "loss": 24.6758, "step": 17641 }, { "epoch": 0.1669995550969794, "grad_norm": 555.5476684570312, "learning_rate": 1.903174750354899e-06, "loss": 53.0117, "step": 17642 }, { "epoch": 0.16700902111869445, "grad_norm": 558.2454833984375, "learning_rate": 1.9031615891227043e-06, "loss": 24.7109, "step": 17643 }, { "epoch": 0.1670184871404095, "grad_norm": 268.9899597167969, "learning_rate": 1.9031484270415968e-06, "loss": 15.9219, "step": 17644 }, { "epoch": 0.16702795316212454, "grad_norm": 878.5142822265625, "learning_rate": 1.9031352641115884e-06, "loss": 36.0547, "step": 17645 }, { "epoch": 0.1670374191838396, "grad_norm": 235.3226776123047, "learning_rate": 1.9031221003326914e-06, "loss": 15.9453, "step": 17646 }, { "epoch": 0.16704688520555466, "grad_norm": 269.2769775390625, "learning_rate": 1.9031089357049183e-06, "loss": 16.8125, "step": 17647 }, { "epoch": 0.16705635122726972, "grad_norm": 172.7972869873047, "learning_rate": 1.9030957702282819e-06, "loss": 14.6602, "step": 17648 }, { "epoch": 0.16706581724898478, "grad_norm": 378.7185974121094, "learning_rate": 1.9030826039027943e-06, "loss": 20.2734, "step": 17649 }, { "epoch": 0.16707528327069981, "grad_norm": 195.5813751220703, "learning_rate": 1.9030694367284675e-06, "loss": 17.793, "step": 17650 }, { "epoch": 0.16708474929241487, "grad_norm": 374.4558410644531, "learning_rate": 1.9030562687053142e-06, "loss": 28.3906, "step": 17651 }, { "epoch": 0.16709421531412993, "grad_norm": 470.4617004394531, "learning_rate": 1.903043099833347e-06, "loss": 46.4688, "step": 17652 }, { "epoch": 0.167103681335845, "grad_norm": 482.7084655761719, "learning_rate": 1.9030299301125779e-06, "loss": 48.0469, "step": 17653 }, { "epoch": 0.16711314735756003, "grad_norm": 329.18658447265625, "learning_rate": 1.9030167595430194e-06, "loss": 19.7266, "step": 17654 }, { "epoch": 0.1671226133792751, "grad_norm": 347.3111267089844, "learning_rate": 1.903003588124684e-06, "loss": 21.1562, "step": 17655 }, { "epoch": 0.16713207940099015, "grad_norm": 309.45745849609375, "learning_rate": 1.902990415857584e-06, "loss": 25.1172, "step": 17656 }, { "epoch": 0.1671415454227052, "grad_norm": 579.4442749023438, "learning_rate": 1.9029772427417314e-06, "loss": 48.875, "step": 17657 }, { "epoch": 0.16715101144442027, "grad_norm": 515.3912963867188, "learning_rate": 1.9029640687771389e-06, "loss": 48.5625, "step": 17658 }, { "epoch": 0.1671604774661353, "grad_norm": 391.3013000488281, "learning_rate": 1.9029508939638192e-06, "loss": 24.3281, "step": 17659 }, { "epoch": 0.16716994348785036, "grad_norm": 788.8831787109375, "learning_rate": 1.9029377183017843e-06, "loss": 35.0977, "step": 17660 }, { "epoch": 0.16717940950956542, "grad_norm": 239.90005493164062, "learning_rate": 1.9029245417910468e-06, "loss": 19.875, "step": 17661 }, { "epoch": 0.16718887553128048, "grad_norm": 239.92262268066406, "learning_rate": 1.902911364431619e-06, "loss": 21.2422, "step": 17662 }, { "epoch": 0.1671983415529955, "grad_norm": 903.8651733398438, "learning_rate": 1.9028981862235132e-06, "loss": 41.25, "step": 17663 }, { "epoch": 0.16720780757471057, "grad_norm": 674.9968872070312, "learning_rate": 1.9028850071667418e-06, "loss": 37.625, "step": 17664 }, { "epoch": 0.16721727359642563, "grad_norm": 173.25674438476562, "learning_rate": 1.9028718272613173e-06, "loss": 17.4609, "step": 17665 }, { "epoch": 0.1672267396181407, "grad_norm": 129.79005432128906, "learning_rate": 1.9028586465072518e-06, "loss": 10.0234, "step": 17666 }, { "epoch": 0.16723620563985575, "grad_norm": 3.0381925106048584, "learning_rate": 1.902845464904558e-06, "loss": 0.8896, "step": 17667 }, { "epoch": 0.16724567166157078, "grad_norm": 267.30517578125, "learning_rate": 1.9028322824532484e-06, "loss": 21.9375, "step": 17668 }, { "epoch": 0.16725513768328584, "grad_norm": 464.1353759765625, "learning_rate": 1.9028190991533351e-06, "loss": 49.0547, "step": 17669 }, { "epoch": 0.1672646037050009, "grad_norm": 341.5248107910156, "learning_rate": 1.9028059150048304e-06, "loss": 36.125, "step": 17670 }, { "epoch": 0.16727406972671596, "grad_norm": 455.4871826171875, "learning_rate": 1.902792730007747e-06, "loss": 27.5469, "step": 17671 }, { "epoch": 0.167283535748431, "grad_norm": 670.85595703125, "learning_rate": 1.9027795441620973e-06, "loss": 12.2578, "step": 17672 }, { "epoch": 0.16729300177014605, "grad_norm": 208.12249755859375, "learning_rate": 1.9027663574678934e-06, "loss": 20.1875, "step": 17673 }, { "epoch": 0.16730246779186111, "grad_norm": 608.0801391601562, "learning_rate": 1.9027531699251479e-06, "loss": 39.0859, "step": 17674 }, { "epoch": 0.16731193381357617, "grad_norm": 859.7506103515625, "learning_rate": 1.902739981533873e-06, "loss": 54.4297, "step": 17675 }, { "epoch": 0.16732139983529123, "grad_norm": 360.2976989746094, "learning_rate": 1.9027267922940816e-06, "loss": 22.6211, "step": 17676 }, { "epoch": 0.16733086585700627, "grad_norm": 1163.53662109375, "learning_rate": 1.9027136022057854e-06, "loss": 73.7344, "step": 17677 }, { "epoch": 0.16734033187872133, "grad_norm": 376.98590087890625, "learning_rate": 1.9027004112689972e-06, "loss": 22.0156, "step": 17678 }, { "epoch": 0.1673497979004364, "grad_norm": 810.4246826171875, "learning_rate": 1.9026872194837296e-06, "loss": 50.7266, "step": 17679 }, { "epoch": 0.16735926392215145, "grad_norm": 254.180908203125, "learning_rate": 1.9026740268499944e-06, "loss": 28.7891, "step": 17680 }, { "epoch": 0.16736872994386648, "grad_norm": 670.5555419921875, "learning_rate": 1.9026608333678045e-06, "loss": 35.6172, "step": 17681 }, { "epoch": 0.16737819596558154, "grad_norm": 417.5400390625, "learning_rate": 1.902647639037172e-06, "loss": 21.4688, "step": 17682 }, { "epoch": 0.1673876619872966, "grad_norm": 372.73944091796875, "learning_rate": 1.9026344438581097e-06, "loss": 34.0781, "step": 17683 }, { "epoch": 0.16739712800901166, "grad_norm": 368.216064453125, "learning_rate": 1.9026212478306294e-06, "loss": 18.6094, "step": 17684 }, { "epoch": 0.16740659403072672, "grad_norm": 483.0911560058594, "learning_rate": 1.9026080509547442e-06, "loss": 21.6484, "step": 17685 }, { "epoch": 0.16741606005244175, "grad_norm": 222.82125854492188, "learning_rate": 1.9025948532304659e-06, "loss": 17.7344, "step": 17686 }, { "epoch": 0.1674255260741568, "grad_norm": 249.2345733642578, "learning_rate": 1.902581654657807e-06, "loss": 24.8047, "step": 17687 }, { "epoch": 0.16743499209587187, "grad_norm": 537.444580078125, "learning_rate": 1.9025684552367805e-06, "loss": 49.5781, "step": 17688 }, { "epoch": 0.16744445811758693, "grad_norm": 509.4107971191406, "learning_rate": 1.9025552549673981e-06, "loss": 56.4688, "step": 17689 }, { "epoch": 0.16745392413930196, "grad_norm": 241.20567321777344, "learning_rate": 1.9025420538496725e-06, "loss": 24.0312, "step": 17690 }, { "epoch": 0.16746339016101702, "grad_norm": 458.91009521484375, "learning_rate": 1.9025288518836162e-06, "loss": 39.4531, "step": 17691 }, { "epoch": 0.16747285618273208, "grad_norm": 322.17254638671875, "learning_rate": 1.9025156490692415e-06, "loss": 20.7969, "step": 17692 }, { "epoch": 0.16748232220444714, "grad_norm": 236.8173370361328, "learning_rate": 1.9025024454065606e-06, "loss": 8.1504, "step": 17693 }, { "epoch": 0.1674917882261622, "grad_norm": 296.8450622558594, "learning_rate": 1.902489240895586e-06, "loss": 18.7031, "step": 17694 }, { "epoch": 0.16750125424787723, "grad_norm": 284.1463623046875, "learning_rate": 1.9024760355363307e-06, "loss": 22.9688, "step": 17695 }, { "epoch": 0.1675107202695923, "grad_norm": 3.383134365081787, "learning_rate": 1.9024628293288063e-06, "loss": 0.936, "step": 17696 }, { "epoch": 0.16752018629130735, "grad_norm": 1269.6239013671875, "learning_rate": 1.9024496222730258e-06, "loss": 50.0391, "step": 17697 }, { "epoch": 0.16752965231302241, "grad_norm": 640.1484985351562, "learning_rate": 1.9024364143690012e-06, "loss": 36.8438, "step": 17698 }, { "epoch": 0.16753911833473745, "grad_norm": 268.4124755859375, "learning_rate": 1.902423205616745e-06, "loss": 18.5078, "step": 17699 }, { "epoch": 0.1675485843564525, "grad_norm": 295.1808166503906, "learning_rate": 1.9024099960162698e-06, "loss": 16.2969, "step": 17700 }, { "epoch": 0.16755805037816757, "grad_norm": 343.1858215332031, "learning_rate": 1.902396785567588e-06, "loss": 29.25, "step": 17701 }, { "epoch": 0.16756751639988263, "grad_norm": 593.0007934570312, "learning_rate": 1.9023835742707116e-06, "loss": 25.3984, "step": 17702 }, { "epoch": 0.1675769824215977, "grad_norm": 451.3909606933594, "learning_rate": 1.9023703621256538e-06, "loss": 30.8203, "step": 17703 }, { "epoch": 0.16758644844331272, "grad_norm": 462.23577880859375, "learning_rate": 1.9023571491324263e-06, "loss": 39.2656, "step": 17704 }, { "epoch": 0.16759591446502778, "grad_norm": 785.9237060546875, "learning_rate": 1.9023439352910418e-06, "loss": 50.875, "step": 17705 }, { "epoch": 0.16760538048674284, "grad_norm": 562.3674926757812, "learning_rate": 1.9023307206015126e-06, "loss": 50.0625, "step": 17706 }, { "epoch": 0.1676148465084579, "grad_norm": 442.06982421875, "learning_rate": 1.9023175050638513e-06, "loss": 19.3047, "step": 17707 }, { "epoch": 0.16762431253017293, "grad_norm": 524.97216796875, "learning_rate": 1.9023042886780703e-06, "loss": 33.1172, "step": 17708 }, { "epoch": 0.167633778551888, "grad_norm": 332.382568359375, "learning_rate": 1.902291071444182e-06, "loss": 18.6094, "step": 17709 }, { "epoch": 0.16764324457360305, "grad_norm": 456.53076171875, "learning_rate": 1.902277853362199e-06, "loss": 23.0781, "step": 17710 }, { "epoch": 0.1676527105953181, "grad_norm": 180.86087036132812, "learning_rate": 1.9022646344321331e-06, "loss": 22.9453, "step": 17711 }, { "epoch": 0.16766217661703317, "grad_norm": 516.3060302734375, "learning_rate": 1.9022514146539972e-06, "loss": 23.6016, "step": 17712 }, { "epoch": 0.1676716426387482, "grad_norm": 409.3343505859375, "learning_rate": 1.9022381940278037e-06, "loss": 31.2188, "step": 17713 }, { "epoch": 0.16768110866046326, "grad_norm": 499.1239318847656, "learning_rate": 1.902224972553565e-06, "loss": 37.9219, "step": 17714 }, { "epoch": 0.16769057468217832, "grad_norm": 335.83111572265625, "learning_rate": 1.9022117502312938e-06, "loss": 18.7109, "step": 17715 }, { "epoch": 0.16770004070389338, "grad_norm": 4.254152297973633, "learning_rate": 1.9021985270610016e-06, "loss": 1.0688, "step": 17716 }, { "epoch": 0.16770950672560841, "grad_norm": 930.213623046875, "learning_rate": 1.9021853030427022e-06, "loss": 40.8594, "step": 17717 }, { "epoch": 0.16771897274732347, "grad_norm": 599.6341552734375, "learning_rate": 1.9021720781764068e-06, "loss": 28.0703, "step": 17718 }, { "epoch": 0.16772843876903853, "grad_norm": 313.07562255859375, "learning_rate": 1.9021588524621289e-06, "loss": 19.9336, "step": 17719 }, { "epoch": 0.1677379047907536, "grad_norm": 561.8310546875, "learning_rate": 1.9021456258998797e-06, "loss": 55.8594, "step": 17720 }, { "epoch": 0.16774737081246865, "grad_norm": 909.892578125, "learning_rate": 1.9021323984896727e-06, "loss": 49.7188, "step": 17721 }, { "epoch": 0.1677568368341837, "grad_norm": 598.6032104492188, "learning_rate": 1.9021191702315198e-06, "loss": 26.1875, "step": 17722 }, { "epoch": 0.16776630285589875, "grad_norm": 716.4925537109375, "learning_rate": 1.9021059411254337e-06, "loss": 49.6719, "step": 17723 }, { "epoch": 0.1677757688776138, "grad_norm": 1362.068603515625, "learning_rate": 1.9020927111714264e-06, "loss": 51.793, "step": 17724 }, { "epoch": 0.16778523489932887, "grad_norm": 693.08154296875, "learning_rate": 1.9020794803695108e-06, "loss": 60.3438, "step": 17725 }, { "epoch": 0.16779470092104393, "grad_norm": 785.204833984375, "learning_rate": 1.9020662487196995e-06, "loss": 45.2031, "step": 17726 }, { "epoch": 0.16780416694275896, "grad_norm": 338.90087890625, "learning_rate": 1.9020530162220042e-06, "loss": 21.1016, "step": 17727 }, { "epoch": 0.16781363296447402, "grad_norm": 891.704833984375, "learning_rate": 1.9020397828764378e-06, "loss": 20.1387, "step": 17728 }, { "epoch": 0.16782309898618908, "grad_norm": 619.4381713867188, "learning_rate": 1.902026548683013e-06, "loss": 14.1562, "step": 17729 }, { "epoch": 0.16783256500790414, "grad_norm": 1385.9609375, "learning_rate": 1.9020133136417415e-06, "loss": 25.6992, "step": 17730 }, { "epoch": 0.16784203102961917, "grad_norm": 265.01177978515625, "learning_rate": 1.9020000777526363e-06, "loss": 23.9609, "step": 17731 }, { "epoch": 0.16785149705133423, "grad_norm": 392.3757019042969, "learning_rate": 1.90198684101571e-06, "loss": 27.3203, "step": 17732 }, { "epoch": 0.1678609630730493, "grad_norm": 328.0782470703125, "learning_rate": 1.9019736034309744e-06, "loss": 24.3984, "step": 17733 }, { "epoch": 0.16787042909476435, "grad_norm": 1046.01123046875, "learning_rate": 1.9019603649984424e-06, "loss": 50.9062, "step": 17734 }, { "epoch": 0.1678798951164794, "grad_norm": 3.637129783630371, "learning_rate": 1.9019471257181263e-06, "loss": 1.1055, "step": 17735 }, { "epoch": 0.16788936113819444, "grad_norm": 1050.9591064453125, "learning_rate": 1.9019338855900384e-06, "loss": 81.7734, "step": 17736 }, { "epoch": 0.1678988271599095, "grad_norm": 315.4830017089844, "learning_rate": 1.9019206446141915e-06, "loss": 17.7188, "step": 17737 }, { "epoch": 0.16790829318162456, "grad_norm": 623.521484375, "learning_rate": 1.9019074027905982e-06, "loss": 24.8203, "step": 17738 }, { "epoch": 0.16791775920333962, "grad_norm": 271.83984375, "learning_rate": 1.9018941601192702e-06, "loss": 26.3828, "step": 17739 }, { "epoch": 0.16792722522505465, "grad_norm": 712.6432495117188, "learning_rate": 1.9018809166002202e-06, "loss": 20.1719, "step": 17740 }, { "epoch": 0.16793669124676971, "grad_norm": 596.4508666992188, "learning_rate": 1.9018676722334613e-06, "loss": 28.0469, "step": 17741 }, { "epoch": 0.16794615726848477, "grad_norm": 707.4386596679688, "learning_rate": 1.9018544270190052e-06, "loss": 21.7734, "step": 17742 }, { "epoch": 0.16795562329019983, "grad_norm": 244.18191528320312, "learning_rate": 1.9018411809568646e-06, "loss": 8.9023, "step": 17743 }, { "epoch": 0.1679650893119149, "grad_norm": 514.0537109375, "learning_rate": 1.901827934047052e-06, "loss": 22.1406, "step": 17744 }, { "epoch": 0.16797455533362993, "grad_norm": 528.3394775390625, "learning_rate": 1.9018146862895797e-06, "loss": 36.1641, "step": 17745 }, { "epoch": 0.167984021355345, "grad_norm": 849.8679809570312, "learning_rate": 1.9018014376844604e-06, "loss": 21.2266, "step": 17746 }, { "epoch": 0.16799348737706005, "grad_norm": 265.3690185546875, "learning_rate": 1.9017881882317064e-06, "loss": 20.6875, "step": 17747 }, { "epoch": 0.1680029533987751, "grad_norm": 305.3695983886719, "learning_rate": 1.90177493793133e-06, "loss": 20.2344, "step": 17748 }, { "epoch": 0.16801241942049014, "grad_norm": 1229.5718994140625, "learning_rate": 1.9017616867833442e-06, "loss": 24.2422, "step": 17749 }, { "epoch": 0.1680218854422052, "grad_norm": 189.28660583496094, "learning_rate": 1.9017484347877608e-06, "loss": 14.6875, "step": 17750 }, { "epoch": 0.16803135146392026, "grad_norm": 246.96121215820312, "learning_rate": 1.9017351819445927e-06, "loss": 25.3828, "step": 17751 }, { "epoch": 0.16804081748563532, "grad_norm": 406.8390197753906, "learning_rate": 1.9017219282538518e-06, "loss": 40.8906, "step": 17752 }, { "epoch": 0.16805028350735038, "grad_norm": 642.6065673828125, "learning_rate": 1.9017086737155514e-06, "loss": 19.1445, "step": 17753 }, { "epoch": 0.1680597495290654, "grad_norm": 292.57330322265625, "learning_rate": 1.9016954183297034e-06, "loss": 42.0469, "step": 17754 }, { "epoch": 0.16806921555078047, "grad_norm": 452.9516906738281, "learning_rate": 1.9016821620963204e-06, "loss": 41.0469, "step": 17755 }, { "epoch": 0.16807868157249553, "grad_norm": 289.8093566894531, "learning_rate": 1.9016689050154144e-06, "loss": 17.9297, "step": 17756 }, { "epoch": 0.1680881475942106, "grad_norm": 251.84051513671875, "learning_rate": 1.9016556470869988e-06, "loss": 19.3398, "step": 17757 }, { "epoch": 0.16809761361592562, "grad_norm": 294.21588134765625, "learning_rate": 1.9016423883110853e-06, "loss": 25.375, "step": 17758 }, { "epoch": 0.16810707963764068, "grad_norm": 544.9436645507812, "learning_rate": 1.9016291286876867e-06, "loss": 47.9922, "step": 17759 }, { "epoch": 0.16811654565935574, "grad_norm": 2.4725825786590576, "learning_rate": 1.9016158682168153e-06, "loss": 0.7637, "step": 17760 }, { "epoch": 0.1681260116810708, "grad_norm": 446.65264892578125, "learning_rate": 1.901602606898484e-06, "loss": 49.4688, "step": 17761 }, { "epoch": 0.16813547770278586, "grad_norm": 198.07745361328125, "learning_rate": 1.9015893447327043e-06, "loss": 12.4805, "step": 17762 }, { "epoch": 0.1681449437245009, "grad_norm": 261.5894775390625, "learning_rate": 1.9015760817194898e-06, "loss": 18.9531, "step": 17763 }, { "epoch": 0.16815440974621595, "grad_norm": 174.38919067382812, "learning_rate": 1.9015628178588519e-06, "loss": 15.9062, "step": 17764 }, { "epoch": 0.16816387576793101, "grad_norm": 396.42681884765625, "learning_rate": 1.9015495531508038e-06, "loss": 38.9688, "step": 17765 }, { "epoch": 0.16817334178964607, "grad_norm": 405.6079406738281, "learning_rate": 1.901536287595358e-06, "loss": 17.9688, "step": 17766 }, { "epoch": 0.1681828078113611, "grad_norm": 392.3843994140625, "learning_rate": 1.9015230211925267e-06, "loss": 31.0, "step": 17767 }, { "epoch": 0.16819227383307617, "grad_norm": 2.960378885269165, "learning_rate": 1.901509753942322e-06, "loss": 1.0112, "step": 17768 }, { "epoch": 0.16820173985479123, "grad_norm": 242.2855682373047, "learning_rate": 1.9014964858447574e-06, "loss": 33.7969, "step": 17769 }, { "epoch": 0.1682112058765063, "grad_norm": 351.17388916015625, "learning_rate": 1.9014832168998444e-06, "loss": 20.1953, "step": 17770 }, { "epoch": 0.16822067189822135, "grad_norm": 1036.538330078125, "learning_rate": 1.901469947107596e-06, "loss": 56.7031, "step": 17771 }, { "epoch": 0.16823013791993638, "grad_norm": 277.4615173339844, "learning_rate": 1.9014566764680241e-06, "loss": 15.8828, "step": 17772 }, { "epoch": 0.16823960394165144, "grad_norm": 477.1905822753906, "learning_rate": 1.9014434049811418e-06, "loss": 19.5703, "step": 17773 }, { "epoch": 0.1682490699633665, "grad_norm": 3.002081871032715, "learning_rate": 1.9014301326469614e-06, "loss": 1.0576, "step": 17774 }, { "epoch": 0.16825853598508156, "grad_norm": 208.9560546875, "learning_rate": 1.9014168594654954e-06, "loss": 19.6016, "step": 17775 }, { "epoch": 0.1682680020067966, "grad_norm": 330.0254211425781, "learning_rate": 1.9014035854367561e-06, "loss": 30.0, "step": 17776 }, { "epoch": 0.16827746802851165, "grad_norm": 875.5594482421875, "learning_rate": 1.9013903105607558e-06, "loss": 67.5156, "step": 17777 }, { "epoch": 0.1682869340502267, "grad_norm": 227.6334686279297, "learning_rate": 1.9013770348375075e-06, "loss": 17.5703, "step": 17778 }, { "epoch": 0.16829640007194177, "grad_norm": 728.2520751953125, "learning_rate": 1.9013637582670235e-06, "loss": 87.375, "step": 17779 }, { "epoch": 0.16830586609365683, "grad_norm": 299.20086669921875, "learning_rate": 1.9013504808493162e-06, "loss": 22.7188, "step": 17780 }, { "epoch": 0.16831533211537186, "grad_norm": 338.49383544921875, "learning_rate": 1.9013372025843979e-06, "loss": 24.1172, "step": 17781 }, { "epoch": 0.16832479813708692, "grad_norm": 213.378662109375, "learning_rate": 1.9013239234722814e-06, "loss": 19.1562, "step": 17782 }, { "epoch": 0.16833426415880198, "grad_norm": 3.0868895053863525, "learning_rate": 1.901310643512979e-06, "loss": 0.9185, "step": 17783 }, { "epoch": 0.16834373018051704, "grad_norm": 356.1834716796875, "learning_rate": 1.9012973627065033e-06, "loss": 39.1562, "step": 17784 }, { "epoch": 0.16835319620223207, "grad_norm": 638.5443725585938, "learning_rate": 1.9012840810528666e-06, "loss": 43.6562, "step": 17785 }, { "epoch": 0.16836266222394713, "grad_norm": 254.2024688720703, "learning_rate": 1.9012707985520815e-06, "loss": 14.1836, "step": 17786 }, { "epoch": 0.1683721282456622, "grad_norm": 957.7716064453125, "learning_rate": 1.9012575152041606e-06, "loss": 62.3945, "step": 17787 }, { "epoch": 0.16838159426737725, "grad_norm": 387.06524658203125, "learning_rate": 1.9012442310091161e-06, "loss": 29.2812, "step": 17788 }, { "epoch": 0.16839106028909231, "grad_norm": 153.4307403564453, "learning_rate": 1.9012309459669606e-06, "loss": 18.0, "step": 17789 }, { "epoch": 0.16840052631080735, "grad_norm": 358.12939453125, "learning_rate": 1.901217660077707e-06, "loss": 49.0625, "step": 17790 }, { "epoch": 0.1684099923325224, "grad_norm": 3.2531816959381104, "learning_rate": 1.9012043733413672e-06, "loss": 1.0107, "step": 17791 }, { "epoch": 0.16841945835423747, "grad_norm": 360.8478088378906, "learning_rate": 1.901191085757954e-06, "loss": 10.1094, "step": 17792 }, { "epoch": 0.16842892437595253, "grad_norm": 241.09178161621094, "learning_rate": 1.9011777973274796e-06, "loss": 14.6328, "step": 17793 }, { "epoch": 0.16843839039766756, "grad_norm": 289.9788513183594, "learning_rate": 1.9011645080499568e-06, "loss": 21.3828, "step": 17794 }, { "epoch": 0.16844785641938262, "grad_norm": 3.3087213039398193, "learning_rate": 1.9011512179253982e-06, "loss": 1.0049, "step": 17795 }, { "epoch": 0.16845732244109768, "grad_norm": 1057.461669921875, "learning_rate": 1.9011379269538159e-06, "loss": 34.8594, "step": 17796 }, { "epoch": 0.16846678846281274, "grad_norm": 1168.5474853515625, "learning_rate": 1.9011246351352225e-06, "loss": 44.7891, "step": 17797 }, { "epoch": 0.1684762544845278, "grad_norm": 656.6654663085938, "learning_rate": 1.9011113424696306e-06, "loss": 49.3906, "step": 17798 }, { "epoch": 0.16848572050624283, "grad_norm": 270.8592224121094, "learning_rate": 1.9010980489570528e-06, "loss": 18.625, "step": 17799 }, { "epoch": 0.1684951865279579, "grad_norm": 395.4127502441406, "learning_rate": 1.9010847545975013e-06, "loss": 41.1875, "step": 17800 }, { "epoch": 0.16850465254967295, "grad_norm": 210.2036895751953, "learning_rate": 1.9010714593909887e-06, "loss": 20.7656, "step": 17801 }, { "epoch": 0.168514118571388, "grad_norm": 482.3031311035156, "learning_rate": 1.9010581633375279e-06, "loss": 37.7344, "step": 17802 }, { "epoch": 0.16852358459310304, "grad_norm": 625.5570678710938, "learning_rate": 1.9010448664371306e-06, "loss": 31.9609, "step": 17803 }, { "epoch": 0.1685330506148181, "grad_norm": 339.39141845703125, "learning_rate": 1.90103156868981e-06, "loss": 20.9883, "step": 17804 }, { "epoch": 0.16854251663653316, "grad_norm": 283.72021484375, "learning_rate": 1.9010182700955784e-06, "loss": 15.1797, "step": 17805 }, { "epoch": 0.16855198265824822, "grad_norm": 345.8155822753906, "learning_rate": 1.901004970654448e-06, "loss": 20.7344, "step": 17806 }, { "epoch": 0.16856144867996328, "grad_norm": 495.6812744140625, "learning_rate": 1.9009916703664317e-06, "loss": 46.2969, "step": 17807 }, { "epoch": 0.16857091470167831, "grad_norm": 283.1066589355469, "learning_rate": 1.9009783692315418e-06, "loss": 7.6367, "step": 17808 }, { "epoch": 0.16858038072339337, "grad_norm": 315.3823547363281, "learning_rate": 1.9009650672497908e-06, "loss": 35.375, "step": 17809 }, { "epoch": 0.16858984674510843, "grad_norm": 3.126279592514038, "learning_rate": 1.9009517644211912e-06, "loss": 0.7871, "step": 17810 }, { "epoch": 0.1685993127668235, "grad_norm": 275.097412109375, "learning_rate": 1.9009384607457556e-06, "loss": 20.5391, "step": 17811 }, { "epoch": 0.16860877878853855, "grad_norm": 2.684793472290039, "learning_rate": 1.9009251562234967e-06, "loss": 0.8457, "step": 17812 }, { "epoch": 0.1686182448102536, "grad_norm": 636.2796020507812, "learning_rate": 1.9009118508544266e-06, "loss": 40.1406, "step": 17813 }, { "epoch": 0.16862771083196865, "grad_norm": 484.5663757324219, "learning_rate": 1.900898544638558e-06, "loss": 40.3281, "step": 17814 }, { "epoch": 0.1686371768536837, "grad_norm": 296.0337219238281, "learning_rate": 1.9008852375759032e-06, "loss": 20.5469, "step": 17815 }, { "epoch": 0.16864664287539877, "grad_norm": 426.0196838378906, "learning_rate": 1.900871929666475e-06, "loss": 21.8828, "step": 17816 }, { "epoch": 0.1686561088971138, "grad_norm": 439.8944091796875, "learning_rate": 1.9008586209102857e-06, "loss": 47.6562, "step": 17817 }, { "epoch": 0.16866557491882886, "grad_norm": 208.7421417236328, "learning_rate": 1.9008453113073482e-06, "loss": 17.375, "step": 17818 }, { "epoch": 0.16867504094054392, "grad_norm": 830.2057495117188, "learning_rate": 1.9008320008576743e-06, "loss": 27.4375, "step": 17819 }, { "epoch": 0.16868450696225898, "grad_norm": 410.4068908691406, "learning_rate": 1.9008186895612773e-06, "loss": 31.625, "step": 17820 }, { "epoch": 0.16869397298397404, "grad_norm": 554.7896118164062, "learning_rate": 1.9008053774181692e-06, "loss": 34.0781, "step": 17821 }, { "epoch": 0.16870343900568907, "grad_norm": 3.076225996017456, "learning_rate": 1.9007920644283627e-06, "loss": 0.9551, "step": 17822 }, { "epoch": 0.16871290502740413, "grad_norm": 273.26416015625, "learning_rate": 1.90077875059187e-06, "loss": 26.1523, "step": 17823 }, { "epoch": 0.1687223710491192, "grad_norm": 437.48968505859375, "learning_rate": 1.9007654359087039e-06, "loss": 7.8242, "step": 17824 }, { "epoch": 0.16873183707083425, "grad_norm": 295.3888244628906, "learning_rate": 1.9007521203788772e-06, "loss": 20.9609, "step": 17825 }, { "epoch": 0.16874130309254928, "grad_norm": 381.0657043457031, "learning_rate": 1.900738804002402e-06, "loss": 32.8828, "step": 17826 }, { "epoch": 0.16875076911426434, "grad_norm": 373.1103515625, "learning_rate": 1.9007254867792906e-06, "loss": 40.3594, "step": 17827 }, { "epoch": 0.1687602351359794, "grad_norm": 444.50048828125, "learning_rate": 1.9007121687095563e-06, "loss": 26.8008, "step": 17828 }, { "epoch": 0.16876970115769446, "grad_norm": 2.819035291671753, "learning_rate": 1.9006988497932108e-06, "loss": 0.7615, "step": 17829 }, { "epoch": 0.16877916717940952, "grad_norm": 499.438232421875, "learning_rate": 1.9006855300302673e-06, "loss": 47.5469, "step": 17830 }, { "epoch": 0.16878863320112455, "grad_norm": 919.3040771484375, "learning_rate": 1.9006722094207377e-06, "loss": 47.4961, "step": 17831 }, { "epoch": 0.16879809922283961, "grad_norm": 408.87396240234375, "learning_rate": 1.9006588879646351e-06, "loss": 31.0, "step": 17832 }, { "epoch": 0.16880756524455467, "grad_norm": 242.04811096191406, "learning_rate": 1.9006455656619714e-06, "loss": 17.3438, "step": 17833 }, { "epoch": 0.16881703126626973, "grad_norm": 217.59149169921875, "learning_rate": 1.9006322425127594e-06, "loss": 22.7188, "step": 17834 }, { "epoch": 0.16882649728798477, "grad_norm": 235.82156372070312, "learning_rate": 1.9006189185170123e-06, "loss": 22.9102, "step": 17835 }, { "epoch": 0.16883596330969983, "grad_norm": 381.9217834472656, "learning_rate": 1.9006055936747413e-06, "loss": 17.6641, "step": 17836 }, { "epoch": 0.1688454293314149, "grad_norm": 426.2496643066406, "learning_rate": 1.90059226798596e-06, "loss": 15.3828, "step": 17837 }, { "epoch": 0.16885489535312995, "grad_norm": 561.7144775390625, "learning_rate": 1.9005789414506808e-06, "loss": 30.7422, "step": 17838 }, { "epoch": 0.168864361374845, "grad_norm": 437.0665588378906, "learning_rate": 1.9005656140689155e-06, "loss": 44.0938, "step": 17839 }, { "epoch": 0.16887382739656004, "grad_norm": 345.266357421875, "learning_rate": 1.9005522858406773e-06, "loss": 24.8594, "step": 17840 }, { "epoch": 0.1688832934182751, "grad_norm": 489.565673828125, "learning_rate": 1.9005389567659784e-06, "loss": 35.375, "step": 17841 }, { "epoch": 0.16889275943999016, "grad_norm": 1063.00439453125, "learning_rate": 1.9005256268448316e-06, "loss": 36.0586, "step": 17842 }, { "epoch": 0.16890222546170522, "grad_norm": 274.7663269042969, "learning_rate": 1.9005122960772492e-06, "loss": 29.0781, "step": 17843 }, { "epoch": 0.16891169148342025, "grad_norm": 568.6911010742188, "learning_rate": 1.9004989644632441e-06, "loss": 31.2188, "step": 17844 }, { "epoch": 0.1689211575051353, "grad_norm": 3.3106443881988525, "learning_rate": 1.9004856320028285e-06, "loss": 0.8394, "step": 17845 }, { "epoch": 0.16893062352685037, "grad_norm": 253.1864013671875, "learning_rate": 1.9004722986960147e-06, "loss": 17.5625, "step": 17846 }, { "epoch": 0.16894008954856543, "grad_norm": 540.8236694335938, "learning_rate": 1.9004589645428158e-06, "loss": 54.5625, "step": 17847 }, { "epoch": 0.1689495555702805, "grad_norm": 241.75242614746094, "learning_rate": 1.9004456295432438e-06, "loss": 15.9141, "step": 17848 }, { "epoch": 0.16895902159199552, "grad_norm": 3.887639045715332, "learning_rate": 1.9004322936973116e-06, "loss": 0.7759, "step": 17849 }, { "epoch": 0.16896848761371058, "grad_norm": 553.3565673828125, "learning_rate": 1.9004189570050317e-06, "loss": 23.9844, "step": 17850 }, { "epoch": 0.16897795363542564, "grad_norm": 227.6580352783203, "learning_rate": 1.9004056194664164e-06, "loss": 19.0156, "step": 17851 }, { "epoch": 0.1689874196571407, "grad_norm": 246.80880737304688, "learning_rate": 1.9003922810814785e-06, "loss": 19.793, "step": 17852 }, { "epoch": 0.16899688567885573, "grad_norm": 521.127197265625, "learning_rate": 1.9003789418502305e-06, "loss": 46.9531, "step": 17853 }, { "epoch": 0.1690063517005708, "grad_norm": 271.4349060058594, "learning_rate": 1.9003656017726847e-06, "loss": 44.9375, "step": 17854 }, { "epoch": 0.16901581772228585, "grad_norm": 502.8125305175781, "learning_rate": 1.9003522608488537e-06, "loss": 42.5625, "step": 17855 }, { "epoch": 0.16902528374400091, "grad_norm": 312.7447814941406, "learning_rate": 1.9003389190787504e-06, "loss": 24.2188, "step": 17856 }, { "epoch": 0.16903474976571597, "grad_norm": 642.8194580078125, "learning_rate": 1.900325576462387e-06, "loss": 53.6016, "step": 17857 }, { "epoch": 0.169044215787431, "grad_norm": 552.0877685546875, "learning_rate": 1.9003122329997762e-06, "loss": 30.0703, "step": 17858 }, { "epoch": 0.16905368180914607, "grad_norm": 13145.43359375, "learning_rate": 1.9002988886909303e-06, "loss": 62.6328, "step": 17859 }, { "epoch": 0.16906314783086113, "grad_norm": 380.9663391113281, "learning_rate": 1.9002855435358623e-06, "loss": 20.8828, "step": 17860 }, { "epoch": 0.1690726138525762, "grad_norm": 349.7706604003906, "learning_rate": 1.9002721975345844e-06, "loss": 12.0898, "step": 17861 }, { "epoch": 0.16908207987429122, "grad_norm": 496.1686096191406, "learning_rate": 1.9002588506871092e-06, "loss": 27.8672, "step": 17862 }, { "epoch": 0.16909154589600628, "grad_norm": 249.62318420410156, "learning_rate": 1.900245502993449e-06, "loss": 22.1797, "step": 17863 }, { "epoch": 0.16910101191772134, "grad_norm": 375.92486572265625, "learning_rate": 1.9002321544536168e-06, "loss": 34.1562, "step": 17864 }, { "epoch": 0.1691104779394364, "grad_norm": 411.9693298339844, "learning_rate": 1.9002188050676247e-06, "loss": 44.4062, "step": 17865 }, { "epoch": 0.16911994396115146, "grad_norm": 389.6769714355469, "learning_rate": 1.9002054548354857e-06, "loss": 19.043, "step": 17866 }, { "epoch": 0.1691294099828665, "grad_norm": 1070.240966796875, "learning_rate": 1.9001921037572122e-06, "loss": 61.668, "step": 17867 }, { "epoch": 0.16913887600458155, "grad_norm": 411.69232177734375, "learning_rate": 1.9001787518328163e-06, "loss": 24.6172, "step": 17868 }, { "epoch": 0.1691483420262966, "grad_norm": 233.09951782226562, "learning_rate": 1.9001653990623113e-06, "loss": 17.5469, "step": 17869 }, { "epoch": 0.16915780804801167, "grad_norm": 379.72900390625, "learning_rate": 1.9001520454457094e-06, "loss": 25.4453, "step": 17870 }, { "epoch": 0.1691672740697267, "grad_norm": 1627.5369873046875, "learning_rate": 1.9001386909830228e-06, "loss": 14.4277, "step": 17871 }, { "epoch": 0.16917674009144176, "grad_norm": 578.2105102539062, "learning_rate": 1.9001253356742648e-06, "loss": 39.3281, "step": 17872 }, { "epoch": 0.16918620611315682, "grad_norm": 375.775146484375, "learning_rate": 1.9001119795194474e-06, "loss": 19.6641, "step": 17873 }, { "epoch": 0.16919567213487188, "grad_norm": 326.5600280761719, "learning_rate": 1.9000986225185831e-06, "loss": 28.6875, "step": 17874 }, { "epoch": 0.16920513815658694, "grad_norm": 175.64923095703125, "learning_rate": 1.9000852646716848e-06, "loss": 20.2188, "step": 17875 }, { "epoch": 0.16921460417830197, "grad_norm": 346.28515625, "learning_rate": 1.9000719059787649e-06, "loss": 16.5078, "step": 17876 }, { "epoch": 0.16922407020001703, "grad_norm": 307.8047180175781, "learning_rate": 1.900058546439836e-06, "loss": 10.5547, "step": 17877 }, { "epoch": 0.1692335362217321, "grad_norm": 537.9241333007812, "learning_rate": 1.9000451860549104e-06, "loss": 27.6562, "step": 17878 }, { "epoch": 0.16924300224344715, "grad_norm": 258.8063659667969, "learning_rate": 1.9000318248240013e-06, "loss": 16.3984, "step": 17879 }, { "epoch": 0.1692524682651622, "grad_norm": 282.595458984375, "learning_rate": 1.9000184627471208e-06, "loss": 34.9844, "step": 17880 }, { "epoch": 0.16926193428687725, "grad_norm": 935.4859619140625, "learning_rate": 1.9000050998242812e-06, "loss": 37.4375, "step": 17881 }, { "epoch": 0.1692714003085923, "grad_norm": 521.9757690429688, "learning_rate": 1.8999917360554954e-06, "loss": 40.7188, "step": 17882 }, { "epoch": 0.16928086633030737, "grad_norm": 453.9342041015625, "learning_rate": 1.8999783714407758e-06, "loss": 33.0312, "step": 17883 }, { "epoch": 0.16929033235202243, "grad_norm": 232.6048126220703, "learning_rate": 1.8999650059801353e-06, "loss": 20.6094, "step": 17884 }, { "epoch": 0.16929979837373746, "grad_norm": 598.7670288085938, "learning_rate": 1.8999516396735863e-06, "loss": 35.9219, "step": 17885 }, { "epoch": 0.16930926439545252, "grad_norm": 716.68017578125, "learning_rate": 1.8999382725211412e-06, "loss": 43.3281, "step": 17886 }, { "epoch": 0.16931873041716758, "grad_norm": 540.1483764648438, "learning_rate": 1.8999249045228126e-06, "loss": 14.9219, "step": 17887 }, { "epoch": 0.16932819643888264, "grad_norm": 343.8977966308594, "learning_rate": 1.8999115356786132e-06, "loss": 34.9922, "step": 17888 }, { "epoch": 0.16933766246059767, "grad_norm": 1175.350830078125, "learning_rate": 1.8998981659885557e-06, "loss": 65.2969, "step": 17889 }, { "epoch": 0.16934712848231273, "grad_norm": 391.184326171875, "learning_rate": 1.899884795452652e-06, "loss": 26.5312, "step": 17890 }, { "epoch": 0.1693565945040278, "grad_norm": 374.490966796875, "learning_rate": 1.8998714240709152e-06, "loss": 17.5703, "step": 17891 }, { "epoch": 0.16936606052574285, "grad_norm": 510.29022216796875, "learning_rate": 1.8998580518433583e-06, "loss": 55.1562, "step": 17892 }, { "epoch": 0.1693755265474579, "grad_norm": 307.5956115722656, "learning_rate": 1.8998446787699928e-06, "loss": 16.1328, "step": 17893 }, { "epoch": 0.16938499256917294, "grad_norm": 364.675537109375, "learning_rate": 1.8998313048508324e-06, "loss": 21.0391, "step": 17894 }, { "epoch": 0.169394458590888, "grad_norm": 933.7001953125, "learning_rate": 1.8998179300858888e-06, "loss": 66.1914, "step": 17895 }, { "epoch": 0.16940392461260306, "grad_norm": 526.345947265625, "learning_rate": 1.899804554475175e-06, "loss": 35.4844, "step": 17896 }, { "epoch": 0.16941339063431812, "grad_norm": 2.7978134155273438, "learning_rate": 1.8997911780187033e-06, "loss": 0.8433, "step": 17897 }, { "epoch": 0.16942285665603318, "grad_norm": 275.4418029785156, "learning_rate": 1.8997778007164865e-06, "loss": 21.5859, "step": 17898 }, { "epoch": 0.16943232267774821, "grad_norm": 3.2872204780578613, "learning_rate": 1.8997644225685369e-06, "loss": 0.9783, "step": 17899 }, { "epoch": 0.16944178869946327, "grad_norm": 467.91070556640625, "learning_rate": 1.8997510435748675e-06, "loss": 31.5156, "step": 17900 }, { "epoch": 0.16945125472117833, "grad_norm": 314.1378479003906, "learning_rate": 1.8997376637354905e-06, "loss": 28.0508, "step": 17901 }, { "epoch": 0.1694607207428934, "grad_norm": 413.9714660644531, "learning_rate": 1.8997242830504189e-06, "loss": 26.7109, "step": 17902 }, { "epoch": 0.16947018676460843, "grad_norm": 454.08355712890625, "learning_rate": 1.899710901519665e-06, "loss": 31.0703, "step": 17903 }, { "epoch": 0.1694796527863235, "grad_norm": 447.45196533203125, "learning_rate": 1.8996975191432409e-06, "loss": 13.8203, "step": 17904 }, { "epoch": 0.16948911880803855, "grad_norm": 306.5827941894531, "learning_rate": 1.8996841359211601e-06, "loss": 17.8828, "step": 17905 }, { "epoch": 0.1694985848297536, "grad_norm": 482.00811767578125, "learning_rate": 1.8996707518534345e-06, "loss": 17.125, "step": 17906 }, { "epoch": 0.16950805085146867, "grad_norm": 325.26019287109375, "learning_rate": 1.8996573669400773e-06, "loss": 33.1875, "step": 17907 }, { "epoch": 0.1695175168731837, "grad_norm": 2.614637613296509, "learning_rate": 1.8996439811811005e-06, "loss": 0.9194, "step": 17908 }, { "epoch": 0.16952698289489876, "grad_norm": 185.49945068359375, "learning_rate": 1.8996305945765165e-06, "loss": 16.6836, "step": 17909 }, { "epoch": 0.16953644891661382, "grad_norm": 238.22021484375, "learning_rate": 1.8996172071263388e-06, "loss": 26.9219, "step": 17910 }, { "epoch": 0.16954591493832888, "grad_norm": 343.1516418457031, "learning_rate": 1.8996038188305792e-06, "loss": 31.625, "step": 17911 }, { "epoch": 0.1695553809600439, "grad_norm": 278.3705749511719, "learning_rate": 1.8995904296892507e-06, "loss": 21.25, "step": 17912 }, { "epoch": 0.16956484698175897, "grad_norm": 563.8444213867188, "learning_rate": 1.8995770397023657e-06, "loss": 46.9062, "step": 17913 }, { "epoch": 0.16957431300347403, "grad_norm": 659.7127685546875, "learning_rate": 1.8995636488699366e-06, "loss": 23.5781, "step": 17914 }, { "epoch": 0.1695837790251891, "grad_norm": 301.1825866699219, "learning_rate": 1.899550257191976e-06, "loss": 23.2422, "step": 17915 }, { "epoch": 0.16959324504690415, "grad_norm": 3.4236085414886475, "learning_rate": 1.8995368646684971e-06, "loss": 0.9077, "step": 17916 }, { "epoch": 0.16960271106861918, "grad_norm": 766.3114013671875, "learning_rate": 1.899523471299512e-06, "loss": 53.9844, "step": 17917 }, { "epoch": 0.16961217709033424, "grad_norm": 209.3527374267578, "learning_rate": 1.8995100770850332e-06, "loss": 7.3125, "step": 17918 }, { "epoch": 0.1696216431120493, "grad_norm": 372.0699768066406, "learning_rate": 1.8994966820250737e-06, "loss": 36.75, "step": 17919 }, { "epoch": 0.16963110913376436, "grad_norm": 466.7545471191406, "learning_rate": 1.8994832861196456e-06, "loss": 31.875, "step": 17920 }, { "epoch": 0.1696405751554794, "grad_norm": 3.3364529609680176, "learning_rate": 1.899469889368762e-06, "loss": 0.9126, "step": 17921 }, { "epoch": 0.16965004117719445, "grad_norm": 794.6751098632812, "learning_rate": 1.899456491772435e-06, "loss": 31.4062, "step": 17922 }, { "epoch": 0.16965950719890951, "grad_norm": 734.7487182617188, "learning_rate": 1.8994430933306773e-06, "loss": 46.0469, "step": 17923 }, { "epoch": 0.16966897322062457, "grad_norm": 316.2149658203125, "learning_rate": 1.8994296940435018e-06, "loss": 25.7812, "step": 17924 }, { "epoch": 0.16967843924233963, "grad_norm": 227.4922332763672, "learning_rate": 1.899416293910921e-06, "loss": 23.1094, "step": 17925 }, { "epoch": 0.16968790526405467, "grad_norm": 539.3881225585938, "learning_rate": 1.899402892932947e-06, "loss": 30.5391, "step": 17926 }, { "epoch": 0.16969737128576973, "grad_norm": 443.78509521484375, "learning_rate": 1.8993894911095932e-06, "loss": 19.0234, "step": 17927 }, { "epoch": 0.1697068373074848, "grad_norm": 244.57034301757812, "learning_rate": 1.8993760884408716e-06, "loss": 15.8359, "step": 17928 }, { "epoch": 0.16971630332919985, "grad_norm": 632.91650390625, "learning_rate": 1.8993626849267948e-06, "loss": 40.6875, "step": 17929 }, { "epoch": 0.16972576935091488, "grad_norm": 937.2901611328125, "learning_rate": 1.899349280567376e-06, "loss": 44.4141, "step": 17930 }, { "epoch": 0.16973523537262994, "grad_norm": 336.186767578125, "learning_rate": 1.899335875362627e-06, "loss": 24.9141, "step": 17931 }, { "epoch": 0.169744701394345, "grad_norm": 777.3245239257812, "learning_rate": 1.899322469312561e-06, "loss": 42.4141, "step": 17932 }, { "epoch": 0.16975416741606006, "grad_norm": 543.038330078125, "learning_rate": 1.8993090624171902e-06, "loss": 47.75, "step": 17933 }, { "epoch": 0.16976363343777512, "grad_norm": 447.0935363769531, "learning_rate": 1.8992956546765276e-06, "loss": 18.8984, "step": 17934 }, { "epoch": 0.16977309945949015, "grad_norm": 357.9447937011719, "learning_rate": 1.8992822460905855e-06, "loss": 26.8828, "step": 17935 }, { "epoch": 0.1697825654812052, "grad_norm": 1158.2283935546875, "learning_rate": 1.8992688366593766e-06, "loss": 54.3086, "step": 17936 }, { "epoch": 0.16979203150292027, "grad_norm": 952.6947021484375, "learning_rate": 1.8992554263829134e-06, "loss": 11.0703, "step": 17937 }, { "epoch": 0.16980149752463533, "grad_norm": 490.2541809082031, "learning_rate": 1.8992420152612087e-06, "loss": 21.9766, "step": 17938 }, { "epoch": 0.16981096354635036, "grad_norm": 653.3577270507812, "learning_rate": 1.899228603294275e-06, "loss": 48.7188, "step": 17939 }, { "epoch": 0.16982042956806542, "grad_norm": 392.95208740234375, "learning_rate": 1.899215190482125e-06, "loss": 47.9219, "step": 17940 }, { "epoch": 0.16982989558978048, "grad_norm": 454.2657470703125, "learning_rate": 1.899201776824771e-06, "loss": 52.6719, "step": 17941 }, { "epoch": 0.16983936161149554, "grad_norm": 530.7131958007812, "learning_rate": 1.899188362322226e-06, "loss": 24.9766, "step": 17942 }, { "epoch": 0.1698488276332106, "grad_norm": 632.1306762695312, "learning_rate": 1.8991749469745022e-06, "loss": 37.6875, "step": 17943 }, { "epoch": 0.16985829365492564, "grad_norm": 583.5343017578125, "learning_rate": 1.8991615307816127e-06, "loss": 20.1289, "step": 17944 }, { "epoch": 0.1698677596766407, "grad_norm": 637.77197265625, "learning_rate": 1.8991481137435699e-06, "loss": 51.7578, "step": 17945 }, { "epoch": 0.16987722569835575, "grad_norm": 465.25079345703125, "learning_rate": 1.8991346958603861e-06, "loss": 30.7344, "step": 17946 }, { "epoch": 0.16988669172007081, "grad_norm": 846.3147583007812, "learning_rate": 1.8991212771320744e-06, "loss": 16.4453, "step": 17947 }, { "epoch": 0.16989615774178585, "grad_norm": 489.2130126953125, "learning_rate": 1.8991078575586472e-06, "loss": 18.6562, "step": 17948 }, { "epoch": 0.1699056237635009, "grad_norm": 246.97079467773438, "learning_rate": 1.899094437140117e-06, "loss": 17.7578, "step": 17949 }, { "epoch": 0.16991508978521597, "grad_norm": 348.6344909667969, "learning_rate": 1.8990810158764966e-06, "loss": 23.75, "step": 17950 }, { "epoch": 0.16992455580693103, "grad_norm": 561.7269287109375, "learning_rate": 1.8990675937677985e-06, "loss": 43.5508, "step": 17951 }, { "epoch": 0.1699340218286461, "grad_norm": 375.33868408203125, "learning_rate": 1.8990541708140352e-06, "loss": 37.875, "step": 17952 }, { "epoch": 0.16994348785036112, "grad_norm": 262.9178161621094, "learning_rate": 1.8990407470152198e-06, "loss": 11.0586, "step": 17953 }, { "epoch": 0.16995295387207618, "grad_norm": 1521.83740234375, "learning_rate": 1.8990273223713642e-06, "loss": 11.6484, "step": 17954 }, { "epoch": 0.16996241989379124, "grad_norm": 354.79327392578125, "learning_rate": 1.8990138968824817e-06, "loss": 24.1172, "step": 17955 }, { "epoch": 0.1699718859155063, "grad_norm": 252.3321075439453, "learning_rate": 1.8990004705485844e-06, "loss": 18.0312, "step": 17956 }, { "epoch": 0.16998135193722133, "grad_norm": 354.511474609375, "learning_rate": 1.898987043369685e-06, "loss": 22.1094, "step": 17957 }, { "epoch": 0.1699908179589364, "grad_norm": 213.6039581298828, "learning_rate": 1.8989736153457966e-06, "loss": 20.1641, "step": 17958 }, { "epoch": 0.17000028398065145, "grad_norm": 226.76197814941406, "learning_rate": 1.8989601864769316e-06, "loss": 17.3359, "step": 17959 }, { "epoch": 0.1700097500023665, "grad_norm": 589.5108642578125, "learning_rate": 1.8989467567631023e-06, "loss": 56.7383, "step": 17960 }, { "epoch": 0.17001921602408157, "grad_norm": 721.3299560546875, "learning_rate": 1.8989333262043214e-06, "loss": 35.3945, "step": 17961 }, { "epoch": 0.1700286820457966, "grad_norm": 388.6555480957031, "learning_rate": 1.8989198948006017e-06, "loss": 25.125, "step": 17962 }, { "epoch": 0.17003814806751166, "grad_norm": 279.7769470214844, "learning_rate": 1.898906462551956e-06, "loss": 14.7383, "step": 17963 }, { "epoch": 0.17004761408922672, "grad_norm": 275.8846130371094, "learning_rate": 1.8988930294583966e-06, "loss": 14.3828, "step": 17964 }, { "epoch": 0.17005708011094178, "grad_norm": 549.38427734375, "learning_rate": 1.898879595519936e-06, "loss": 24.2812, "step": 17965 }, { "epoch": 0.17006654613265682, "grad_norm": 408.0734558105469, "learning_rate": 1.8988661607365875e-06, "loss": 19.2812, "step": 17966 }, { "epoch": 0.17007601215437188, "grad_norm": 1309.485595703125, "learning_rate": 1.898852725108363e-06, "loss": 37.7891, "step": 17967 }, { "epoch": 0.17008547817608693, "grad_norm": 815.8139038085938, "learning_rate": 1.8988392886352753e-06, "loss": 45.9375, "step": 17968 }, { "epoch": 0.170094944197802, "grad_norm": 596.4310913085938, "learning_rate": 1.8988258513173375e-06, "loss": 36.4688, "step": 17969 }, { "epoch": 0.17010441021951705, "grad_norm": 361.7583923339844, "learning_rate": 1.8988124131545614e-06, "loss": 9.7773, "step": 17970 }, { "epoch": 0.1701138762412321, "grad_norm": 661.9918823242188, "learning_rate": 1.8987989741469604e-06, "loss": 30.5938, "step": 17971 }, { "epoch": 0.17012334226294715, "grad_norm": 447.5648498535156, "learning_rate": 1.898785534294547e-06, "loss": 43.1719, "step": 17972 }, { "epoch": 0.1701328082846622, "grad_norm": 471.4880676269531, "learning_rate": 1.8987720935973332e-06, "loss": 22.6328, "step": 17973 }, { "epoch": 0.17014227430637727, "grad_norm": 1382.9100341796875, "learning_rate": 1.8987586520553325e-06, "loss": 29.0078, "step": 17974 }, { "epoch": 0.1701517403280923, "grad_norm": 240.3826141357422, "learning_rate": 1.898745209668557e-06, "loss": 18.2188, "step": 17975 }, { "epoch": 0.17016120634980736, "grad_norm": 2.8444056510925293, "learning_rate": 1.8987317664370192e-06, "loss": 0.9561, "step": 17976 }, { "epoch": 0.17017067237152242, "grad_norm": 361.05291748046875, "learning_rate": 1.8987183223607324e-06, "loss": 29.8984, "step": 17977 }, { "epoch": 0.17018013839323748, "grad_norm": 270.07672119140625, "learning_rate": 1.8987048774397087e-06, "loss": 19.3516, "step": 17978 }, { "epoch": 0.17018960441495254, "grad_norm": 336.9197692871094, "learning_rate": 1.898691431673961e-06, "loss": 40.3672, "step": 17979 }, { "epoch": 0.17019907043666757, "grad_norm": 2.9408602714538574, "learning_rate": 1.8986779850635016e-06, "loss": 0.9312, "step": 17980 }, { "epoch": 0.17020853645838263, "grad_norm": 243.16546630859375, "learning_rate": 1.8986645376083436e-06, "loss": 18.2461, "step": 17981 }, { "epoch": 0.1702180024800977, "grad_norm": 321.6264343261719, "learning_rate": 1.8986510893084992e-06, "loss": 48.9219, "step": 17982 }, { "epoch": 0.17022746850181275, "grad_norm": 286.67401123046875, "learning_rate": 1.8986376401639814e-06, "loss": 27.25, "step": 17983 }, { "epoch": 0.17023693452352778, "grad_norm": 289.6798400878906, "learning_rate": 1.8986241901748026e-06, "loss": 30.0938, "step": 17984 }, { "epoch": 0.17024640054524284, "grad_norm": 248.4419403076172, "learning_rate": 1.8986107393409757e-06, "loss": 17.4141, "step": 17985 }, { "epoch": 0.1702558665669579, "grad_norm": 3.0520870685577393, "learning_rate": 1.8985972876625127e-06, "loss": 0.9746, "step": 17986 }, { "epoch": 0.17026533258867296, "grad_norm": 256.4686584472656, "learning_rate": 1.898583835139427e-06, "loss": 23.4062, "step": 17987 }, { "epoch": 0.17027479861038802, "grad_norm": 285.01141357421875, "learning_rate": 1.8985703817717308e-06, "loss": 29.3047, "step": 17988 }, { "epoch": 0.17028426463210306, "grad_norm": 376.4917907714844, "learning_rate": 1.8985569275594374e-06, "loss": 36.4219, "step": 17989 }, { "epoch": 0.17029373065381812, "grad_norm": 650.9038696289062, "learning_rate": 1.8985434725025585e-06, "loss": 42.7656, "step": 17990 }, { "epoch": 0.17030319667553317, "grad_norm": 522.6336059570312, "learning_rate": 1.898530016601107e-06, "loss": 18.1719, "step": 17991 }, { "epoch": 0.17031266269724823, "grad_norm": 784.3931274414062, "learning_rate": 1.898516559855096e-06, "loss": 35.4453, "step": 17992 }, { "epoch": 0.1703221287189633, "grad_norm": 605.96142578125, "learning_rate": 1.8985031022645379e-06, "loss": 42.0469, "step": 17993 }, { "epoch": 0.17033159474067833, "grad_norm": 266.9859313964844, "learning_rate": 1.8984896438294453e-06, "loss": 22.1328, "step": 17994 }, { "epoch": 0.1703410607623934, "grad_norm": 589.5961303710938, "learning_rate": 1.898476184549831e-06, "loss": 33.8594, "step": 17995 }, { "epoch": 0.17035052678410845, "grad_norm": 364.2583923339844, "learning_rate": 1.8984627244257073e-06, "loss": 40.8906, "step": 17996 }, { "epoch": 0.1703599928058235, "grad_norm": 452.3497009277344, "learning_rate": 1.8984492634570875e-06, "loss": 29.7109, "step": 17997 }, { "epoch": 0.17036945882753854, "grad_norm": 3.4138476848602295, "learning_rate": 1.8984358016439833e-06, "loss": 0.9194, "step": 17998 }, { "epoch": 0.1703789248492536, "grad_norm": 1333.2449951171875, "learning_rate": 1.8984223389864082e-06, "loss": 28.0859, "step": 17999 }, { "epoch": 0.17038839087096866, "grad_norm": 329.2818603515625, "learning_rate": 1.8984088754843745e-06, "loss": 16.4648, "step": 18000 } ], "logging_steps": 1.0, "max_steps": 105641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": true, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.355322423621976e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }